sub find_dif
{ my @argv = opt_get(@_);
my $gb = opt_as_gb(shift @argv);
my $start = -1;
my @pos = ();
my $dif = "ggtgcgcataatgtatattatgttaaat";
while(0 <= ($start = index($gb->{SEQ}, $dif, $start + 1))){
push(@pos, $start);
}
while(0 <= ($start = index($gb->{SEQ}, complement($dif), $start + 1))){
push(@pos, $start);
}
return @pos;} |
sub find_dnaAbox
{ my @argv = opt_get(@_);
my $gb = opt_as_gb(shift @argv);
my $i = 0;
my @pos = ();
for ($i = 0; $i < length($gb->{SEQ}) - 8; $i ++){
if (substr($gb->{SEQ}, $i, 9) =~ /(tt[at]t.caca)/){
push (@pos, $i);
msg_send(sprintf "%d %s\n", $i, $1);
}elsif(substr($gb->{SEQ}, $i, 9) =~ /(tgtg.a[at]aa)/){
push (@pos, $i);
msg_send(sprintf "%d %s\n", $i, $1);
}
}
return @pos;} |
sub find_seq
{ my $gb = opt_as_gb(shift);
my $ref_Genome =\$ gb->{SEQ};
my $sSeq = shift;
my $printer=shift;
my $sSeq2 = complement($sSeq);
my $direct = 0;
my $comp = 0;
my $iSeqStart = 0;
while(0 <= ($iSeqStart = index($$ref_Genome, $sSeq, $iSeqStart + 1))){
$direct ++;
}
$iSeqStart = 0;
while(0 <= ($iSeqStart = index($$ref_Genome, $sSeq2, $iSeqStart + 1))){
$comp ++;
}
if($printer eq "f"){
open(FILE,">>oligomer_count.rst");
print FILE '--- find_sequence_result ---',"\n";
print FILE "$sSeq: $direct\n$sSeq2: $comp\nTotal: $direct+$comp\n\n";
close(FILE);
}
return ($direct, $comp, $direct + $comp);} |
sub nucleotide_periodicity
{ opt_default("nucleotide"=>"aa", "window"=>50, "filename"=>"aa_frequency.png", "output"=>"show");
my @argv = opt_get(@_);
my $gb = opt_as_gb(shift @argv);
my $nuc = opt_val("nucleotide");
my $window = opt_val("window");
my $filename = opt_val("filename");
my $output = opt_val("output");
my @data = ();
$data[$_] = 0 for (0..($window - 1));
my $start = -1;
while(0 <= ($start = index($gb->{SEQ}, $nuc, $start + 1))){
my $innerPos = -1;
my $localSeq = substr($gb->{SEQ}, $start + length($nuc), $window);
while(0 <= ($innerPos = index($localSeq, $nuc, $innerPos + 1))){
$data[$innerPos]++;
}
}
_UniMultiGrapher([0..($window - 1)],\@ data, -filename=>$filename);
msg_gimv("graph/$filename") if ($output eq 'show');
return @data;} |
sub oligomer_counter
{ opt_default("window"=>0);
my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my $seq = shift @args;
my $window = opt_val("window");
$window = length($gb->{SEQ}) if($window <= 0);
if (opt_val("window")){
open(OUT, '>oligo_count.csv') || &msg_error($!)
if (opt_val("output") eq "f");
my $i = 0;
my @wincount = ();
my @winnum = ();
for ($i = 0; $i <= int(length($gb->{SEQ}) / $window); $i ++){ my $partial = substr($gb->{SEQ}, $i * $window, $window); last if (length($partial) < $window);
my $start = 0;
my $count = 0;
if (length($seq) == 1 && $seq =~ /a|t|g|c/){
$count = $partial =~ tr/a/a/ if ($seq eq 'a');
$count = $partial =~ tr/t/t/ if ($seq eq 't');
$count = $partial =~ tr/g/g/ if ($seq eq 'g');
$count = $partial =~ tr/c/c/ if ($seq eq 'c');
}else{
while(0 <= ($start = index($partial, $seq, $start + 1))){
$count ++;
}
}
push (@wincount, $count);
push (@winnum, $i * $window);
print OUT "%d,%d\n", $i*$window, $count
if (opt_val("output") eq "f");
}
close(OUT) if (opt_val("output") eq "f");
if (opt_val("output") eq "g"){
_UniMultiGrapher(\@winnum,\@ wincount, -x=>'window(bp)',
-y=>'number of oligomer',
-title=>'oligomer by window',
-outfile=>'oligo_count.png'
);
}
return (@wincount);
}else{
my $start = 0;
my $count = 0;
while(0 <= ($start = index($gb->{SEQ}, $seq, $start + 1))){
$count ++;
}
return $count;} |
sub palindrome
{ &opt_default(gtmatch=>0, loop=>0, shortest=>4, -output=>"stdout", -filename=>"palindrome.csv");
my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my $length = int(opt_val("shortest") / 2); my $output = opt_val("output");
my $filename = opt_val("filename");
my %palindrome;
my $i = $length - 1;
my ($len, $j, $k, $stem);
if (opt_val("output") eq "f"){
open(OUT, '>' . $filename) || &msg_error("G::Seq::PatSearch::palindrome() $! $filename");
print OUT "Length, start, end, sequence\n";
}
while($i <= length($gb->{SEQ}) - 1 - $length - opt_val("loop")){
$stem = opt_val("loop");
while($stem >= 0){
$j = $i;
$k = $stem + 1 + $i;
$len = 0;
last if ($k > length($gb->{SEQ}) - 1);
while(&baseParingTest(substr($gb->{SEQ}, $j, 1),
substr($gb->{SEQ}, $k, 1),
&opt_val("gtmatch"))
)
{
$j --;
$k ++;
last if ($j < 0 || $k > length($gb->{SEQ}) - 1);
$len += 2;
}
if ($len >= opt_val("shortest")){
&msg_send(sprintf("Length: %2d Position: %7d %7d Sequence: %s %s %s\n",
$len, $j + 1, $k - 2,
substr($gb->{SEQ}, $j + 1, $len/2), substr($gb->{SEQ}, $j + 1 + $len/2, $stem),
substr($gb->{SEQ}, $j + 1 + $len/2 + $stem, $len/2))) if ($output eq 'stdout');
if ($output eq "f"){
printf OUT "%d,%d,%d,%s %s %s\n",
$len, $j + 1, $k - 2,
substr($gb->{SEQ}, $j + 1, $len/2), substr($gb->{SEQ}, $j + 1 + $len/2, $stem),
substr($gb->{SEQ}, $j + 1 + $len/2 + $stem, $len/2);
}
$palindrome{$j + 1} = sprintf("%s %s %s",
substr($gb->{SEQ}, $j + 1, $len/2), substr($gb->{SEQ}, $j + 1 + $len/2, $stem),
substr($gb->{SEQ}, $j + 1 + $len/2 + $stem, $len/2)
);
}
$stem --;
}
$i ++;
}
close(OUT) if ($output eq "f");
return\% palindrome;} |