Privates (from "my" definitions)
%COG_fcode = ( J=>"Translation, ribosomal structure and biogenesis", K=>"Transcription", L=>"DNA replication, recombination and repair", D=>"Cell division and chromosome partitioning", O=>"Posttranslational modification, protein turnover, chaperones", M=>"Cell envelope biogenesis, outer membrane", N=>"Cell motility and secretion", P=>"Inorganic ion transport and metabolism", T=>"Signal transduction mechanisms", C=>"Energy production and conservation", G=>"Carbohydrate transport and metabolism", E=>"Amino acid transport and metabolism", F=>"Nucleotide transport and metabolism", H=>"Coenzyme metabolism", I=>"Lipid metabolism", Q=>"Secondary metabolites biosynthesis, transport and catabolism", R=>"General function prediction only", S=>"Function unknown", '-'=>"Non COG" )
%COG_fcolor = ( J=>"plum", K=>"fuchsia", L=>"pink", D=>"lightgreen", O=>"green", M=>"khaki", N=>"greenyellow", P=>"darkkhaki", T=>"cyan", C=>"blue", G=>"mediumturquoise", E=>"lightskyblue", F=>"mediumpurple", H=>"aqua", I=>"blueviolet", Q=>"lightskyblue", R=>"gainsboro", S=>"darkgrey", '-'=>"aliceblue" )
sub atcgcon
{ &opt_default(output=>"stdout",filename=>"cds_info.csv");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $output=opt_val("output");
my $filename=opt_val("filename");
my $start;
my $end;
my $seq;
my $num=1;
my %hash;
foreach($gb->feature()){
if($gb->{"FEATURE$num"}->{type} eq 'CDS'){
$start=$gb->{"FEATURE$num"}->{start};
$end=$gb->{"FEATURE$num"}->{end};
$seq=$gb->getseq($start-1,$end-1);
$hash{a} += $seq =~tr/a/a/; $hash{t} += $seq =~tr/t/t/; $hash{g} += $seq =~tr/g/g/; $hash{c} += $seq =~tr/c/c/; $hash{total}+=length($seq);
}
$num++;
}
if($output eq "stdout"){
&msg_send(sprintf("total:\t%10d base\n",$hash{total}));
&msg_send(sprintf("a:\t%10d / %2.2f\%\n", $hash{a}, 100.0*$hash{a}/$hash{total})); &msg_send(sprintf("t:\t%10d / %2.2f\%\n", $hash{t}, 100.0*$hash{t}/$hash{total})); &msg_send(sprintf("c:\t%10d / %2.2f\%\n", $hash{c}, 100.0*$hash{c}/$hash{total})); &msg_send(sprintf("g:\t%10d / %2.2f\%\n", $hash{g}, 100.0*$hash{g}/$hash{total}));
&msg_send(sprintf("GC content:\t%.2f\%\n", 100.0*($hash{c} + $hash{g}) / $hash{total})); }
if($output eq "f"){
open(FILE,">$filename");
printf FILE "total:\t%10d base\n",$hash{total};
printf FILE "a:\t%10d / %2.2f\%\n", $hash{a}, 100.0*$hash{a}/$hash{total}; printf FILE "t:\t%10d / %2.2f\%\n", $hash{t}, 100.0*$hash{t}/$hash{total}; printf FILE "c:\t%10d / %2.2f\%\n", $hash{c}, 100.0*$hash{c}/$hash{total}; printf FILE "g:\t%10d / %2.2f\%\n", $hash{g}, 100.0*$hash{g}/$hash{total};
printf FILE "GC content:\t%.2f\%\n", 100.0*($hash{c} + $hash{g}) / $hash{total}; close(FILE);
}
return\% hash;} |
sub oligomer_translation
{ my @args = opt_get(@_);
my $seq = shift @args;
my $frame = shift @args;
my $len = length($seq);
if ($frame > 3){
$seq = G::Seq::Util::complement($seq);
$frame -= 3;
}
my %CodonTable = (
'gac', 'D', 'caa', 'Q', 'gca', 'A', 'ctg', 'L',
'gat', 'D', 'cag', 'Q', 'gcc', 'A', 'ctt', 'L',
'gaa', 'E', 'agc', 'S', 'gcg', 'A', 'ata', 'I',
'gag', 'E', 'agt', 'S', 'gct', 'A', 'atc', 'I',
'aga', 'R', 'tca', 'S', 'gga', 'G', 'att', 'I',
'agg', 'R', 'tcc', 'S', 'ggc', 'G', 'cca', 'P',
'cga', 'R', 'tcg', 'S', 'ggg', 'G', 'ccc', 'P',
'cgc', 'R', 'tct', 'S', 'ggt', 'G', 'ccg', 'P',
'cgg', 'R', 'aca', 'T', 'gta', 'V', 'cct', 'P',
'cgt', 'R', 'acc', 'T', 'gtc', 'V', 'atg', 'M',
'aaa', 'K', 'acg', 'T', 'gtg', 'V', 'tgg', 'W',
'aag', 'K', 'act', 'T', 'gtt', 'V', 'tgc', 'C',
'cac', 'H', 'tac', 'Y', 'tta', 'L', 'tgt', 'C',
'cat', 'H', 'tat', 'Y', 'ttg', 'L', 'taa', '/',
'aac', 'N', 'ttc', 'F', 'cta', 'L', 'tag', '/',
'aat', 'N', 'ttt', 'F', 'ctc', 'L', 'tga', '/'
);
my $return = '';
my $i;
for ($i = 0; $i < $len; $i ++){
if ($i < $frame - 1){
$return .= substr($seq, $i, $frame - 1) . '-';
$i += $frame - 2;
} elsif ($i + 3 <= $len){
$return .= $CodonTable{substr($seq, $i, 3)};
$i += 2;
$return .= '-' unless ($i >= $len - 1);
} else {
$return .= substr($seq, $i);
last;
}
}
return $return;} |
sub view_cds
{ &opt_default(length=>100, filename=>"view_cds.png",
gap=>3, output=>"show", application=>"gimv");
my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my (@a, @t, @g, @c, @pos);
my $numcds = 0;
my $i = 0;
my $length = opt_val("length");
my $filename = opt_val("filename");
my $output = opt_val("output");
my $application = opt_val("application");
$filename = "view_cds.csv" if ($output eq "f" &&
opt_val("filename") eq "view_cds.png");
my $gap = opt_val("gap");
while(defined %{$gb->{"CDS$numcds"}}){ $numcds ++ }
for ($i = 0; $i < $length * 4 + 6 + $gap; $i++){
$a[$i] = 0;
$t[$i] = 0;
$g[$i] = 0;
$c[$i] = 0;
}
foreach my $cds ($gb->cds()){
my $seq;
$seq = $gb->before_startcodon($cds, $length);
$seq .= $gb->startcodon($cds);
$seq .= $gb->after_startcodon($cds, $length);
for ($i = 0; $i < length($seq); $i ++){
if (substr($seq, $i, 1) eq 'a'){
$a[$i] += 100/$numcds; }elsif (substr($seq, $i, 1) eq 't'){
$t[$i] += 100/$numcds; }elsif (substr($seq, $i, 1) eq 'g'){
$g[$i] += 100/$numcds; }elsif (substr($seq, $i, 1) eq 'c'){
$c[$i] += 100/$numcds; }
}
$seq = $gb->before_stopcodon($cds, $length);
$seq .= $gb->stopcodon($cds);
$seq .= $gb->after_stopcodon($cds, $length);
for ($i = 0; $i < length($seq); $i ++){
if (substr($seq, $i, 1) eq 'a'){
$a[$i + length($seq) + $gap] += 100/$numcds; }elsif (substr($seq, $i, 1) eq 't'){
$t[$i + length($seq) + $gap] += 100/$numcds; }elsif (substr($seq, $i, 1) eq 'g'){
$g[$i + length($seq) + $gap] += 100/$numcds; }elsif (substr($seq, $i, 1) eq 'c'){
$c[$i + length($seq) + $gap] += 100/$numcds; }
}
}
for ($i = 1; $i <= $length * 4 + 6 + $gap; $i ++){
push(@pos, $i);
}
if ($output eq "g" || $output eq "show"){
_UniMultiGrapher(\@
pos, -x => "position", -y => "percentage",\@
a, -x1=>"A",\@ t, -x2=>"T",\@
g, -x3=>"G",\@ c, -x4=>"C",
-filename => $filename,
-title => "Base Contents Around Start/Stop Codons"
);
msg_gimv("graph/$filename") if($output eq "show");
}elsif ($output eq "f"){
open(OUT, '>data/' . $filename);
print OUT "position,A,T,G,C\n";
for ($i = 0; $i < $length * 4 + 6 + $gap; $i ++){
printf OUT "%d,%3.2f,%3.2f,%3.2f,%3.2f\n", $i + 1,
$a[$i], $t[$i], $g[$i], $c[$i];
}
close(OUT);
}} |