Я не эксперт по Perl, но вы можете использовать $ - и $ + для отслеживания местоположения индекса для последнего найденного соответствия регулярному выражению.
Ниже приведен код, построенный поверх вашего кода, который объясняет это.
use strict;
use warnings;
my $file="sample.txt";
open(DAT, $file) || die("Could not open file!");
open (OUTPUTFILE, '>data.txt');
my $sequence;
my $someVar = 0;
my $sequenceNums = 1;
my $motif1 = "(HDWFLSFKD)";
my $motif2 = "(HD)";
my $motif3 = "(K)";
my $motif4 = "(DD)";
while (my $line = <DAT>)
{
$someVar = 0;
print "\nSequence $sequenceNums: $line\n";
print OUTPUTFILE "\nSequence $sequenceNums: $line\n";
if ($line=~ /$motif1/g)
{
&printStuff($sequenceNums, "motif1", $motif1, "$-[0]-$+[0]");
$someVar = 1;
}
if ($line=~ /$motif2/g and $someVar == 1)
{
&printStuff($sequenceNums, "motif2", $motif2, "$-[0]-$+[0]");
$someVar = 2;
}
if ($line=~ /$motif3/g and $someVar == 2)
{
&printStuff($sequenceNums, "motif3", $motif4, "$-[0]-$+[0]");
$someVar = 3;
}
if ($line=~ /$motif4/g and $someVar == 3)
{
&printStuff($sequenceNums, "motif4", $motif4, "$-[0]-$+[0]");
}
else
{
$sequence .= $line;
if ($someVar == 0)
{
&printWrongStuff($sequenceNums, "motif1", $motif1);
}
elsif ($someVar == 1)
{
&printWrongStuff($sequenceNums, "motif2", $motif2);
}
elsif ($someVar == 2)
{
&printWrongStuff($sequenceNums, "motif3", $motif3);
}
elsif ($someVar == 3)
{
&printWrongStuff($sequenceNums, "motif4", $motif4);
}
}
$sequenceNums++;
}
sub printStuff
{
print "Sequence: $_[0] $_[1]: $_[2] index location: $_[3] \n";
print OUTPUTFILE "Sequence: $_[0] $_[1]: $_[2] index location: $_[3]\n";
}
sub printWrongStuff
{
print "Sequence: $_[0] $_[1]: $_[2] was not found\n";
print OUTPUTFILE "Sequence: $_[0] $_[1]: $_[2] was not found\n";
}
close (OUTPUTFILE);
close (DAT);
Пример ввода:
MLTSHQKKF HDWFLSFKD SNNYN HD S K QNHSIK DD IFNRFNHYIYNDLGIRTIA
MLTSHQKKFSNNYNSKQNHSIKDIFNRFNHYIYNDLGIRTIA
MLTSHQKKFSNNYNSK HDWFLSFKD QNHSIKDIFNRFNHYIYNDL