Вот пример того, как извлечь данные из швейцарского файла:
use feature qw(say);
use strict;
use warnings;
{
my $data = read_swiss_file();
my @ids;
for my $chunk ( @$data ) {
my ( $item1, $item2, $item3);
if( $chunk =~ /^ID\s{3}(\S+)\s+\S+;\s+(.*)\.\s+$/m ){
$item1 = $1;
$item2 = $2;
$item2 =~ s/\s+//;
}
if( $chunk =~ /^AC\s{3}(\S+);/m ){
$item3 = $1;
}
push @ids, [$item1, $item2, $item3] if defined $item1;
}
my $fn = 'text.txt';
open ( my $fh, '>', $fn ) or die "Could not open file '$fn': $!";
for my $items (@ids) {
say $fh "->", join '|', @$items;
}
close $fh;
}
sub read_swiss_file {
my $fn = 'transmem_proteins.swiss';
open ( my $fh, '<', $fn ) or die "Could not open file '$fn': $!";
my $str = do { local $/; <$fh> };
close $fh;
my @chunks = split /(?m:^\/\/)/, $str;
return \@chunks;
}