sub alignment
{ opt_inputType('seq', 'seq');
opt_default(Gap=>-5, Match=>10, Miss=>-7, output=>'stdout', filename=>'alignment.txt');
my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my $seq1 =\$ gb->{SEQ};
$gb = opt_as_gb(shift @args);
my $seq2 =\$ gb->{SEQ};
my %opt = opt_val();
my %align;
my ($H, $V, $D, $NDIR) = (0, 1, 2, 3);
my @score_path_matrix;
$$seq1 =~ tr/ \n\t[0-9]//d;
$$seq2 =~ tr/ \n\t[0-9]//d;
my $i = length($$seq1);
my $j = length($$seq2);
local *find_max_score = sub{
my ($i, $j, $seq1, $seq2, $Gap_Penalty, $Match, $Miss) = @_;
my ($p, $max_score, $max_dir, @score_tmp, @max_score, %tmp_hash);
my ($H, $V, $D, $NDIR) = (0, 1, 2, 3);
if($score_path_matrix[$i][$j]{score}){
return $score_path_matrix[$i][$j]{score};
}else{
if($i == 0){
$max_score = 0;
$max_dir = $H;
}elsif($j == 0){
$max_score = 0;
$max_dir = $V;
}else{
$score_tmp[$V] = find_max_score($i - 1, $j, $seq1, $seq2, $Gap_Penalty, $Match, $Miss) + $Gap_Penalty;
$score_tmp[$H] = find_max_score($i, $j - 1, $seq1, $seq2, $Gap_Penalty, $Match, $Miss) + $Gap_Penalty;
if(substr($$seq1, $i - 1, 1) eq substr($$seq2, $j - 1, 1)){
$score_tmp[$D] = find_max_score($i - 1, $j - 1, $seq1, $seq2, $Gap_Penalty, $Match, $Miss) + $Match;
}else{
$score_tmp[$D] = find_max_score($i - 1, $j - 1, $seq1, $seq2, $Gap_Penalty, $Match, $Miss) + $Miss;
}
%tmp_hash=();
foreach(@score_tmp){
$tmp_hash{$_} = $p;
$p++;
}
@max_score = sort {$b <=> $a} @score_tmp;
$max_dir = $tmp_hash{$max_score[0]};
$max_score = $score_tmp[$max_dir];
}
$score_path_matrix[$i][$j]{score} = $max_score;
$score_path_matrix[$i][$j]{direction} = $max_dir;
return $max_score;
}
};
$align{score} = find_max_score($i, $j, $seq1, $seq2, $opt{'Gap'}, $opt{'Match'}, $opt{'Miss'});
while($i > 0 | $j > 0){
if($score_path_matrix[$i][$j]{direction} == $H){
$align{seq1} = '-' . $align{seq1};
$align{seq2} = substr($$seq2, $j - 1, 1) . $align{seq2};
$align{hit} = '-' . $align{hit};
$j--;
}
if($score_path_matrix[$i][$j]{direction} == $V){
$align{seq1} = substr($$seq1, $i - 1, 1) . $align{seq1};
$align{seq2} = '-' . $align{seq2};
$align{hit} = '-' . $align{hit};
$i--;
}
if($score_path_matrix[$i][$j]{direction} == $D){
my $tmp1 = substr($$seq1, $i - 1, 1);
my $tmp2 = substr($$seq2, $j - 1, 1);
$align{seq1} = $tmp1 . $align{seq1};
$align{seq2} = $tmp2 . $align{seq2};
if($tmp1 eq $tmp2){
$align{hit} = '+' . $align{hit};
}else{
$align{hit} = '-' . $align{hit};
}
$j--;
$i--;
}
}
my $hit_num = $align{hit} =~ tr/+/+/;
my $miss_num = $align{hit} =~ tr/-/-/;
my $gap_num1 = $align{seq1} =~ tr/-/-/;
my $gap_num2 = $align{seq2} =~ tr/-/-/;
my $result;
$result .= sprintf("Seq1: %d bp Seq2: %d bp\n", length($$seq1), length($$seq2));
$result .= sprintf("Score: %d Match: %d Miss: %d\n", $align{score}, $hit_num, $miss_num);
$result .= sprintf("Gap in seq1: %d Gap in seq2: %d\n", $gap_num1, $gap_num2);
$result .= sprintf("Match score: %d Miss score: %d Gap Penalty: %d\n\n", $opt{'Match'}, $opt{'Miss'}, $opt{'Gap'});
for(my $q = 0; $q * 60 < length($align{seq1}) || $q * 60 < length($align{seq2}); $q++){
$result .= substr($align{seq1}, $q * 60, 60) . "\n";
$result .= substr($align{seq2}, $q * 60, 60) . "\n";
$result .= substr($align{hit}, $q * 60, 60) . "\n\n";
}
if($opt{'output'} eq 'stdout'){
msg_send($result);
}elsif($opt{'output'} =~ /f/){
msg_datafile($result, $opt{'filename'});
}
return\% align; } |
sub diffseq
{ opt_inputType('seq', 'seq');
opt_default(mode=>'align', Gap=>-5, Match=>10, Miss=>-7, output=>'stdout', filename=>'diffseq.csv');
my @args = opt_get(@_);
my %opt = opt_val();
my ($align, $seq1, $seq2);
if($opt{'mode'} eq "align"){
my $gb = opt_as_gb(shift @args);
$seq1 =\$ gb->{SEQ};
$gb = opt_as_gb(shift @args);
$seq2 =\$ gb->{SEQ};
$align = alignment($seq1, $seq2, -Gap=>$opt{'Gap'}, -Match=>$opt{'Match'}, -Miss=>$opt{'Miss'}, -output=>'NULL');
}else{
$align = shift @args;
}
my ($i, @diff, @diff_pos);
while(0 <= ($i = index($$align{hit}, '-', $i + 1))){
push(@diff_pos, $i);
}
foreach(@diff_pos){
my $s1 = substr($$align{seq1}, $_, 1);
my $s2 = substr($$align{seq2}, $_, 1);
my $tmp_gap = substr($$align{seq1}, 0, $_ + 1) =~ tr/-/-/;
my $s1_pos = $_ - $tmp_gap + 1;
$tmp_gap = substr($$align{seq2}, 0, $_ + 1) =~ tr/-/-/;
my $s2_pos = $_ - $tmp_gap + 1;
my $pos = $_ + 1;
push(@diff, join(',', 'insertion', $pos, 'seq1', $s1_pos, 'seq2', $s2_pos, $s1, $s2)) if($s1 eq '-');
push(@diff, join(',', 'deletion', $pos, 'seq1', $s1_pos, 'seq2', $s2_pos, $s1, $s2)) if($s2 eq '-');
push(@diff, join(',', 'transversion', $pos, 'seq1', $s1_pos, 'seq2', $s2_pos, $s1, $s2)) if($s1 ne '-' && $s2 ne '-');
}
if($opt{'output'} eq "stdout"){
foreach(@diff){
my @line = split(/,/, $_, 8);
msg_send(sprintf("%s\t%5d: seq1\(%d\)\-\>seq2\(%d\) %s\-\> %s\n", $line[0], $line[1], $line[3], $line[5], $line[6], $line[7]));
}
}elsif($opt{'output'} =~ /f/){
msg_datafile(join("\n", @diff), $opt{'filename'});
}
return\@ diff; } |