Вы можете написать грамматику для Regexp :: Grammars , которая, в свою очередь, проанализирует файл для вас.Модуль очень хорошо документирован.
Я надеюсь, что следующий скрипт поможет вам понять идею синтаксического анализа:
Parser
#!/usr/bin/env perl
use strict;
use warnings;
use Regexp::Grammars;
my $parser = qr{
<File>
<rule: File>
(
(?:
\n
| <[Comment]>
)
| <[Comment]>
| <[Source]>
)+
<rule: Comment>
\# <InlineComment>
| ^=begin
<MultilineComment> \n
^=end
<rule: InlineComment>
[^\n]+
<rule: MultilineComment>
.*
<rule: Source>
[^\n]+
}xms;
my $text = do { local $/; <DATA> };
if ( $text =~ $parser ) {
my @source;
my @comments;
if ( exists $/{'File'}->{'Source'} ) {
@source = @{ $/{'File'}->{'Source'} };
}
if ( exists $/{'File'}->{'Comment'} ) {
@comments = @{ $/{'File'}->{'Comment'} };
}
my $line = 1;
print '__SOURCE__ [', scalar @source, "]\n";
for (@source) {
print "$line: $_\n";
$line++;
}
print "\n\n";
$line = 1;
print '__COMMENTS__ [', scalar @comments, "]\n";
for my $comment (@comments) {
print "$line: ";
if ( exists $comment->{'InlineComment'} ) {
print $comment->{'InlineComment'};
}
elsif ( exists $comment->{'MultilineComment'} ) {
print $comment->{'MultilineComment'};
}
print "\n";
$line++;
}
}
else {
}
__DATA__
=begin
The following code snippet was copied from:
http://www.ruby-lang.org/en/documentation/quickstart/4/
=end
# Say hi to everybody
def say_hi
if @names.nil?
puts "..."
elsif @names.respond_to?("each")
# @names is a list of some kind, iterate!
@names.each do |name|
puts "Hello #{name}!"
end
else
puts "Hello #{@names}!"
end
end
Вывод
__SOURCE__ [11]
1: def say_hi
2: if @names.nil?
3: puts "..."
4: elsif @names.respond_to?("each")
5: @names.each do |name|
6: puts "Hello #{name}!"
7: end
8: else
9: puts "Hello #{@names}!"
10: end
11: end
__COMMENTS__ [3]
1: The following code snippet was copied from:
http://www.ruby-lang.org/en/documentation/quickstart/4/
2: Say hi to everybody
3: @names is a list of some kind, iterate!