G::Seq
GCskew
G::Seq::GCskew - Perl extension for blah blah blah
|
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
use G::Seq::GCskew; blah blah blah
|
Stub documentation for G::Seq::GCskew was created by h2xs. It looks like the author of the extension was negligent enough to leave the stub unedited.
Blah blah blah.
|
BEGIN | | Code |
DESTROY | No description | Code |
cum_gcskew | No description | Code |
find_ori_ter | No description | Code |
gcskew | No description | Code |
gcwin | No description | Code |
genomicskew | No description | Code |
leading_strand | No description | Code |
new | No description | Code |
ori_search | No description | Code |
query_strand | No description | Code |
rep_ori_ter | No description | Code |
view_cds | No description | Code |
Methods description
Methods code
BEGIN
{ eval "use Chart::Graph qw(gnuplot);";
if($@){ warn "$@" };
}
sub DESTROY
{ my $self = shift;
}
sub cum_gcskew
{ &opt_default(window=>10000, at=>0, output=>"show", application=>"gimv",
filename=>"cum_gcskew.png");
my @args = opt_get(@_);
&opt_default(filename=>"cum_gcskew.csv") if (opt_val("output") eq 'f');
my $gb = opt_as_gb(shift @args);
my $ref =\$ gb->{SEQ};
my $window = opt_val("window");
my $application = opt_val("application");
my $output = opt_val("output");
my $filename = opt_val("filename");
my $at = opt_val("at");
my @gcskew = ();
my @location = ();
my $j;
my $tmp;
my $i = 0;
while(length($$ref) - ($window * $i) >= $window){
my $g = substr($$ref, $window * $i, $window) =~ tr/g/g/;
$g = substr($$ref, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($$ref, $window * $i, $window) =~ tr/c/c/;
$c = substr($$ref, $window * $i, $window) =~ tr/t/t/ if ($at);
if ($c+$g <= 0){
$tmp += 0;
}else{
$tmp += sprintf("%.6f",($c-$g)/($c+$g));
}
$gcskew[$i] = $tmp;
$location[$i] = $i * $window;
$i ++;
}
$i --;
if ($output eq 'g' || $output eq 'show'){
my $title = "Cumulative GC skew";
mkdir ("graph", 0777);
$title = "Cumulative AT skew" if ($at);
_UniMultiGrapher(\@
location,\@gcskew,
-x=>"bp", -y=>$title,
-filename=>$filename,
-title=>$title,
-style=>"lines", -type=>"columns",
);
msg_gimv("graph/" . $filename)
if ($output eq 'show');
}elsif ($output eq 'f'){
my $title = "Cumulative GC skew";
my $j = 0;
mkdir ("data", 0777);
$title = "Cumulative AT skew" if ($at);
open(OUT, ">data/" . $filename);
print OUT "location,$title\n";
for ($j = 0; $j <= $i; $j++){
print OUT $location[$j], ",", $gcskew[$j], "\n";
}
close(OUT);
}
return @gcskew;
}
sub find_ori_ter
{ &opt_default(window=>1000, output=>"stdout");
my @args = opt_get(@_);
my $ref_Genome = opt_as_gb(shift @args);
my $printer = shift;
my $iWindow = opt_val("window");
my $output = opt_val("output");
my $i = 0;
my $max = 0;
my $maxi = 0;
my $min = 0;
my $mini = 0;
my $before = 0;
my ($g,$c, $cumulative);
&msg_send("\nfind_ori_ter:\n") if ($output eq 'stdout');
&msg_send(" Window size = $iWindow\n") if ($output eq 'stdout');
while(length(substr($ref_Genome->{SEQ}, $i*$iWindow)) >= $iWindow){
$g = substr($ref_Genome->{SEQ}, $i * $iWindow, $iWindow) =~ tr/g/g/;
$c = substr($ref_Genome->{SEQ}, $i * $iWindow, $iWindow) =~ tr/c/c/;
if ($c + $g == 0){
$i++;
&msg_error("ERROR: Window " , $i * $iWindow, "-" ,
$i * $iWindow + $iWindow, "bp contains no C or G\n");
next;
}
$cumulative += ($c-$g)/($c+$g);
if ($cumulative > $max){
$max = $cumulative;
$maxi = $i;
}elsif($cumulative < $min){
$min = $cumulative;
$mini = $i;
}
$i ++;
}
&msg_send(" Predicted Origin: " , $maxi * $iWindow + $iWindow / 2 , "\n") if ($output eq 'stdout');
&msg_send(" Predicted Terminus: " , $mini * $iWindow + $iWindow / 2 , "\n\n") if ($output eq 'stdout');
return ($maxi * $iWindow + $iWindow / 2, $mini * $iWindow + $iWindow / 2);
}
sub gcskew
{ &opt_default(window=>10000, at=>0, output=>"show", application=>"gimv",
filename=>"gcskew.png");
my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my $ref =\$ gb->{SEQ};
my $window = opt_val("window");
my $application = opt_val("application");
my $output = opt_val("output");
my $filename = opt_val("filename");
$filename =~ s/png/csv/g if (opt_val("output") eq 'f');
my $at = opt_val("at");
my @gcskew = ();
my @location = ();
my $j;
my $i = 0;
while(length($$ref) - ($window * $i) >= $window){
my $g = substr($$ref, $window * $i, $window) =~ tr/g/g/;
$g = substr($$ref, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($$ref, $window * $i, $window) =~ tr/c/c/;
$c = substr($$ref, $window * $i, $window) =~ tr/t/t/ if ($at);
if ($c+$g <= 0){
$gcskew[$i] = 0;
}else{
$gcskew[$i] = sprintf("%.6f",($c-$g)/($c+$g));
}
$location[$i] = $i * $window;
$i ++;
}
$i --;
if ($output eq 'g' || $output eq 'show'){
my $title = "GC skew";
mkdir ("graph", 0777);
$title = "AT skew" if ($at);
_UniMultiGrapher(\@
location,\@gcskew,
-x=>"bp", -y=>$title,
-filename=>$filename,
-title=>$title,
-style=>"lines", -type=>"columns",
);
msg_gimv("graph/" . $filename)
if ($output eq 'show');
}elsif ($output eq 'f'){
my $title = "GC skew";
my $j = 0;
mkdir ("data", 0777);
$title = "AT skew" if ($at);
open(OUT, ">data/" . $filename);
print OUT "location,$title\n";
for ($j = 0; $j <= $i; $j++){
print OUT $location[$j], ",", $gcskew[$j], "\n";
}
close(OUT);
}
return @gcskew;
}
sub gcwin
{ &opt_default(window=>10000, at=>0, output=>"show", application=>"gimv",
filename=>"gcwin.png");
my @args = opt_get(@_);
&opt_default(filename=>"gcwin.csv") if (opt_val("output") eq 'f');
my $gb = opt_as_gb(shift @args);
my $ref =\$ gb->{SEQ};
my $window = opt_val("window");
my $at = opt_val("at");
my $application = opt_val("application");
my $filename = opt_val("filename");
my $opt = opt_val("output");
my (@gcwin, @location);
my $j;
my $i = 0;
while(length($$ref) - ($window * $i) >= $window){
my $g = substr($$ref, $window * $i, $window) =~ tr/g/g/;
$g = substr($$ref, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($$ref, $window * $i, $window) =~ tr/c/c/;
$c = substr($$ref, $window * $i, $window) =~ tr/t/t/ if ($at);
$gcwin[$i] = sprintf("%.6f",($g+$c)/$window);
$location[$i] = $i * $window;
$i ++;
}
$i --;
if ($opt eq 'g' || $opt eq 'show'){
my $title = "GC content";
$title = "AT content" if ($at);
mkdir ("graph", 0777);
_UniMultiGrapher(\@
location,\@ gcwin,
-x=>"bp", -y=>$title,
-filename=>$filename,
-title=>$title, -style=>"lines", -type=>"columns"
);
msg_gimv("graph/" . $filename)
if ($opt eq 'show');;
}elsif ($opt eq 'f'){
my $title = "GC content";
my $j = 0;
$title = "AT content" if ($at);
mkdir ("data", 0777);
open(OUT, ">data/" . $filename);
print OUT "location,$title\n";
for ($j = 0; $j <= $i; $j++){
print OUT $location[$j], ",", $gcwin[$j], "\n";
}
close(OUT);
}
return @gcwin;
}
sub genomicskew
{ &opt_default(divide=>250, at=>0, output=>"show", application=>"gimv",
filename=>"genomicskew.png");
my @args = opt_get(@_);
&opt_default(filename=>"genomicskew.csv") if (opt_val("output") eq 'f');
my $gb = opt_as_gb(shift @args);
my $divide = opt_val("divide");
my $opt = opt_val("output");
my $application = opt_val("application");
my $filename = opt_val("filename");
my $at = opt_val("at");
my (@gcskew, @betskew, @geneskew, @thirdskew, @location);
my ($j, $window, $CDS, $BET, $THIRD);
my $before = 0;
my $i = 1;
while(defined %{$gb->{"CDS$i"}}){
my $feature = $gb->{"CDS$i"}->{feature};
if ($gb->{"FEATURE$feature"}->{join}){
$i ++;
next;
}
my $seq = $gb->get_gbkseq(
$gb->{"CDS$i"}->{start},
$gb->{"CDS$i"}->{end}
);
$CDS .= $seq;
for($j = 2; $j <= length($seq); $j += 3){
if ($gb->{"CDS$i"}->{direction} eq 'complement'){
$THIRD .= substr($seq, $j, 1);
}else{
$THIRD .= substr($seq, $j - 2, 1);
}
}
$BET .= substr($gb->{SEQ}, $before, $gb->{"CDS$i"}->{start} - $before)
unless ($gb->{"CDS$i"}->{start} - $before < 1);
$before = $gb->{"CDS$i"}->{end};
$i ++;
}
for ($j = 0; $j <= $divide; $j ++){
$location[$j] = $j;
}
$i = 0;
$window = int(length($gb->{SEQ}) / $divide);
while($i <= $divide){
my $g = substr($gb->{SEQ}, $window * $i, $window) =~ tr/g/g/;
$g = substr($gb->{SEQ}, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($gb->{SEQ}, $window * $i, $window) =~ tr/c/c/;
$c = substr($gb->{SEQ}, $window * $i, $window) =~ tr/t/t/ if ($at);
$gcskew[$i] = sprintf("%.6f",($c-$g)/($c+$g));
$i ++;
}
$i = 0;
$window = int(length($CDS) / $divide);
while($i <= $divide){
my $g = substr($CDS, $window * $i, $window) =~ tr/g/g/;
$g = substr($CDS, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($CDS, $window * $i, $window) =~ tr/c/c/;
$g = substr($CDS, $window * $i, $window) =~ tr/t/t/ if ($at);
$geneskew[$i] = 0;
$geneskew[$i] = sprintf("%.6f",($c-$g)/($c+$g)) unless ($c+$g<1);
$i ++;
}
$i = 0;
$window = int(length($BET) / $divide);
while($i <= $divide){
my $g = substr($BET, $window * $i, $window) =~ tr/g/g/;
$g = substr($BET, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($BET, $window * $i, $window) =~ tr/c/c/;
$g = substr($BET, $window * $i, $window) =~ tr/t/t/ if ($at);
$betskew[$i] = 0;
$betskew[$i] = sprintf("%.6f",($c-$g)/($c+$g)) unless ($c+$g<1);
$i ++;
}
$i = 0;
$window = int(length($THIRD) / $divide);
while($i <= $divide){
my $g = substr($THIRD, $window * $i, $window) =~ tr/g/g/;
$g = substr($THIRD, $window * $i, $window) =~ tr/a/a/ if ($at);
my $c = substr($THIRD, $window * $i, $window) =~ tr/c/c/;
$g = substr($THIRD, $window * $i, $window) =~ tr/t/t/ if ($at);
$thirdskew[$i] = 0;
$thirdskew[$i] = sprintf("%.6f",($c-$g)/($c+$g)) unless ($c+$g<1);
$i ++;
}
if ($opt eq "show" || $opt eq "g"){
my $title = "GC skew";
$title = "AT skew" if ($at);
mkdir ("graph", 0777);
_UniMultiGrapher(\@
location,
-x=>"bp", -y=>$title,\@
gcskew, -x1=>"whole genome",\@
geneskew, -x2=>"coding region",\@
betskew, -x3=>"intergenic region",\@
thirdskew, -x4=>"codon third position",
-style=>"lines", -type=>"columns",
-filename=>$filename,
-title=>$title
);
msg_gimv("graph/" . $filename)
if ($opt eq 'show');
}elsif ($opt eq 'f'){
my $title = "GC skew";
my $j = 0;
$title = "AT skew" if ($at);
mkdir ("data", 0777);
open(OUT, ">data/" . $filename);
print OUT "location,$title,coding,intergenic,third codon\n";
for ($j = 0; $j <= $divide; $j++){
print OUT $location[$j], ",", $gcskew[$j], $geneskew[$j], ",",
$betskew[$j], ",", $thirdskew[$j], ",", "\n";
}
close(OUT);
}
return 1;
}
sub leading_strand
{ my @args = opt_get(@_);
my $gb = shift @args;
my ($ori, $ter) = rep_ori_ter($gb);
my ($seq1, $seq2);
if ($ori > $ter){
$seq1 = substr($gb->{SEQ}, $ori);
$seq1 .= substr($gb->{SEQ}, 0, $ter);
$seq2 = G::Seq::Util::_complement(substr($gb->{SEQ}, $ter, $ori - $ter));
}else{
$seq1 = substr($gb->{SEQ}, $ori, $ter - $ori);
$seq2 = G::Seq::Util::_complement( substr($gb->{SEQ}, $ter) . substr($gb->{SEQ}, 0, $ori) );
}
return ($seq1, $seq2);
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this;
return $this;
}
sub ori_search
{ opt_default(window=>100, mincount=>10);
my @argv = opt_get(@_);
my $gb = opt_as_gb(shift @argv);
my $window = opt_val("window");
my $mincount = opt_val("mincount");
my $i;
my %origin;
for ($i = 0; $i < length($gb->{SEQ}); $i += $window){
my $tmp = substr($gb->{SEQ}, $i) . substr($gb->{SEQ}, 0, $i);
my ($ori, $ter) = find_ori_ter($tmp, -window=>$window, -output=>"NULL");
$ori += $i;
$ori -= length($gb->{SEQ}) if ($ori > length($gb->{SEQ}));
$origin{$ori}++;
}
my @keys = sort {$origin{$b} <=> $origin{$a}} keys %origin;
foreach my $pos (@keys){
next if ($origin{$pos} <= $mincount);
msg_send(sprintf "%d,%d\n", $pos, $origin{$pos});
}
return %origin;
}
sub query_strand
{ opt_default(direction=>'direct');
my @args = opt_get(@_);
my $gb = shift @args;
my $pos = shift @args;
my $direction = opt_val("direction");
my ($ori, $ter) = rep_ori_ter($gb);
if ($ori > $ter){
if ($pos < $ter || $pos > $ori){
if ($direction eq 'complement'){
return ("lagging");
}else{
return ("leading");
}
}else{
if ($direction eq 'complement'){
return ("leading");
}else{
return ("lagging");
}
}
}else{
if ($pos > $ori && $pos < $ter){
if ($direction eq 'complement'){
return ("lagging");
}else{
return ("leading");
}
}else{
if ($direction eq 'complement'){
return ("leading");
}else{
return ("lagging");
}
}
}
}
sub rep_ori_ter
{ my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my ($ori, $ter);
my $id = $gb->{LOCUS}->{id};
if ($id eq 'U00096' || $id eq 'NC_000913'){
##Escherichia coli K12
##Freeman et al 1998
$ori = 3923500 - 1;
$ter = 1588800 - 1;
}elsif ($id eq 'AL009126' || $id eq 'NC_000964'){
##Bacillus subtilis
##Freeman et al 1998
$ori = 1 - 1;
$ter = 2017000 - 1;
}elsif ($id eq 'L42023' || $id eq 'NC_000907'){
##Haemophilus influenzae
##Freeman et al 1998
$ori = 603000 - 1;
$ter = 1518000 - 1;
}elsif ($id eq 'AL513382' || $id eq 'NC_003198'){
##Salmonella typhi
##Parkhill et al 2001
$ori = 3765000 - 1;
$ter = 1437000 - 1;
}else{
($ori, $ter) = &G::Seq::GCskew::find_ori_ter($gb, -window=>10000);
}
return ($ori, $ter);
}
sub view_cds
{ &opt_default(length=>100, filename=>"view_cds.png",
gap=>3, output=>"show", application=>"gimv");
my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
my (@a, @t, @g, @c, @pos);
my $numcds = 0;
my $i = 0;
my $length = opt_val("length");
my $filename = opt_val("filename");
my $output = opt_val("output");
my $application = opt_val("application");
$filename = "view_cds.csv" if ($output eq "f" &&
opt_val("filename") eq "view_cds.png");
my $gap = opt_val("gap");
while(defined %{$gb->{"CDS$numcds"}}){ $numcds ++ }
for ($i = 0; $i < $length * 4 + 6 + $gap; $i++){
$a[$i] = 0;
$t[$i] = 0;
$g[$i] = 0;
$c[$i] = 0;
}
foreach my $cds ($gb->cds()){
my $seq;
$seq = $gb->before_startcodon($cds, $length);
$seq .= $gb->startcodon($cds);
$seq .= $gb->after_startcodon($cds, $length);
for ($i = 0; $i < length($seq); $i ++){
if (substr($seq, $i, 1) eq 'a'){
$a[$i] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 't'){
$t[$i] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'g'){
$g[$i] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'c'){
$c[$i] += 100/$numcds;
}
}
$seq = $gb->before_stopcodon($cds, $length);
$seq .= $gb->stopcodon($cds);
$seq .= $gb->after_stopcodon($cds, $length);
for ($i = 0; $i < length($seq); $i ++){
if (substr($seq, $i, 1) eq 'a'){
$a[$i + length($seq) + $gap] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 't'){
$t[$i + length($seq) + $gap] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'g'){
$g[$i + length($seq) + $gap] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'c'){
$c[$i + length($seq) + $gap] += 100/$numcds;
}
}
}
for ($i = 1; $i <= $length * 4 + 6 + $gap; $i ++){
push(@pos, $i);
}
if ($output eq "g" || $output eq "show"){
_UniMultiGrapher(\@
pos, -x => "position", -y => "percentage",\@
a, -x1=>"A",\@ t, -x2=>"T",\@
g, -x3=>"G",\@ c, -x4=>"C",
-filename => $filename,
-title => "Base Contents Around Start/Stop Codons"
);
msg_gimv("graph/$filename") if($output eq "show");
}elsif ($output eq "f"){
open(OUT, '>data/' . $filename);
print OUT "position,A,T,G,C\n";
for ($i = 0; $i < $length * 4 + 6 + $gap; $i ++){
printf OUT "%d,%3.2f,%3.2f,%3.2f,%3.2f\n", $i + 1,
$a[$i], $t[$i], $g[$i], $c[$i];
}
close(OUT);
}
}
General documentation