G::Seq

GCskew

Summary Included libraries Package variables Synopsis Description General documentation Methods

Summary
G::Seq::GCskew - Perl extension for blah blah blah
Package variables top
Globals (from use vars definitions)
@EXPORT
$VERSION
@EXPORT_OK
Included modulestop
G::Messenger
G::Tools::Graph
SubOpt
strict
Inherit top
AutoLoader Exporter
Synopsistop
  use G::Seq::GCskew;
blah blah blah
Descriptiontop
Stub documentation for G::Seq::GCskew was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.
Methodstop
BEGIN Code
DESTROYNo descriptionCode
cum_gcskewNo descriptionCode
find_ori_terNo descriptionCode
gcskewNo descriptionCode
gcwinNo descriptionCode
genomicskewNo descriptionCode
leading_strandNo descriptionCode
newNo descriptionCode
ori_searchNo descriptionCode
query_strandNo descriptionCode
rep_ori_terNo descriptionCode
view_cdsNo descriptionCode

Methods description


Methods code

BEGINtop
BEGIN {
    eval "use Chart::Graph qw(gnuplot);";
    if($@){ warn "$@" };
}
DESTROYdescriptiontopprevnext
sub DESTROY {
    my $self = shift;
}
cum_gcskewdescriptiontopprevnext
sub cum_gcskew {
    &opt_default(window=>10000, at=>0, output=>"show", application=>"gimv",
		  filename=>"cum_gcskew.png");
    my @args = opt_get(@_);
    &opt_default(filename=>"cum_gcskew.csv") if (opt_val("output") eq 'f');
    my $gb = opt_as_gb(shift @args);
    my $ref =\$ gb->{SEQ};
    my $window = opt_val("window");
    my $application = opt_val("application");
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    my $at = opt_val("at");
    my @gcskew = ();
    my @location = ();
    my $j;
    my $tmp;
    
    my $i = 0;
    while(length($$ref) - ($window * $i) >= $window){
	my $g = substr($$ref, $window * $i, $window) =~ tr/g/g/;
	$g = substr($$ref, $window * $i, $window) =~ tr/a/a/ if ($at);
	my $c = substr($$ref, $window * $i, $window) =~ tr/c/c/;
	$c = substr($$ref, $window * $i, $window) =~ tr/t/t/ if ($at);
	if ($c+$g <= 0){
	    $tmp += 0;
	}else{
	    $tmp += sprintf("%.6f",($c-$g)/($c+$g));
} $gcskew[$i] = $tmp; $location[$i] = $i * $window; $i ++; } $i --; if ($output eq 'g' || $output eq 'show'){ my $title = "Cumulative GC skew"; mkdir ("graph", 0777); $title = "Cumulative AT skew" if ($at); _UniMultiGrapher(\@ location,\@gcskew, -x=>"bp", -y=>$title, -filename=>$filename, -title=>$title, -style=>"lines", -type=>"columns", ); msg_gimv("graph/" . $filename) if ($output eq 'show'); }elsif ($output eq 'f'){ my $title = "Cumulative GC skew"; my $j = 0; mkdir ("data", 0777); $title = "Cumulative AT skew" if ($at); open(OUT, ">data/" . $filename); print OUT "location,$title\n"; for ($j = 0; $j <= $i; $j++){ print OUT $location[$j], ",", $gcskew[$j], "\n"; } close(OUT); } return @gcskew;
}
find_ori_terdescriptiontopprevnext
sub find_ori_ter {
    &opt_default(window=>1000, output=>"stdout");
    my @args = opt_get(@_);
    my $ref_Genome = opt_as_gb(shift @args);
    my $printer = shift;
    my $iWindow = opt_val("window");
    my $output = opt_val("output");
    my $i = 0;
    my $max = 0;
    my $maxi = 0;
    my $min = 0;
    my $mini = 0;
    my $before = 0;
    my ($g,$c, $cumulative);

    &msg_send("\nfind_ori_ter:\n") if ($output eq 'stdout');
    &msg_send("   Window size = $iWindow\n") if ($output eq 'stdout');

    while(length(substr($ref_Genome->{SEQ}, $i*$iWindow)) >= $iWindow){
	$g = substr($ref_Genome->{SEQ}, $i * $iWindow, $iWindow) =~ tr/g/g/;
	$c = substr($ref_Genome->{SEQ}, $i * $iWindow, $iWindow) =~ tr/c/c/;
	if ($c + $g == 0){
	    $i++;
	        &msg_error("ERROR: Window " , $i * $iWindow, "-" ,
	    $i * $iWindow + $iWindow, "bp contains no C or G\n");
	    next;
	} 
	$cumulative += ($c-$g)/($c+$g);
if ($cumulative > $max){ $max = $cumulative; $maxi = $i; }elsif($cumulative < $min){ $min = $cumulative; $mini = $i; } $i ++; } &msg_send(" Predicted Origin: " , $maxi * $iWindow + $iWindow / 2 , "\n") if ($output eq 'stdout');
&msg_send(" Predicted Terminus: " , $mini * $iWindow + $iWindow / 2 , "\n\n") if ($output eq 'stdout');
return ($maxi * $iWindow + $iWindow / 2, $mini * $iWindow + $iWindow / 2);
}
gcskewdescriptiontopprevnext
sub gcskew {
    &opt_default(window=>10000, at=>0, output=>"show", application=>"gimv",
		  filename=>"gcskew.png");
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my $ref =\$ gb->{SEQ};
    my $window = opt_val("window");
    my $application = opt_val("application");
    my $output = opt_val("output");
    my $filename = opt_val("filename");
    $filename =~ s/png/csv/g if (opt_val("output") eq 'f');
    my $at = opt_val("at");
    my @gcskew = ();
    my @location = ();
    my $j;
    
    my $i = 0;
    while(length($$ref) - ($window * $i) >= $window){
	my $g = substr($$ref, $window * $i, $window) =~ tr/g/g/;
	$g = substr($$ref, $window * $i, $window) =~ tr/a/a/ if ($at);
	my $c = substr($$ref, $window * $i, $window) =~ tr/c/c/;
	$c = substr($$ref, $window * $i, $window) =~ tr/t/t/ if ($at);
	if ($c+$g <= 0){
	    $gcskew[$i] = 0;
	}else{
	    $gcskew[$i] = sprintf("%.6f",($c-$g)/($c+$g));
} $location[$i] = $i * $window; $i ++; } $i --; if ($output eq 'g' || $output eq 'show'){ my $title = "GC skew"; mkdir ("graph", 0777); $title = "AT skew" if ($at); _UniMultiGrapher(\@ location,\@gcskew, -x=>"bp", -y=>$title, -filename=>$filename, -title=>$title, -style=>"lines", -type=>"columns", ); msg_gimv("graph/" . $filename) if ($output eq 'show'); }elsif ($output eq 'f'){ my $title = "GC skew"; my $j = 0; mkdir ("data", 0777); $title = "AT skew" if ($at); open(OUT, ">data/" . $filename); print OUT "location,$title\n"; for ($j = 0; $j <= $i; $j++){ print OUT $location[$j], ",", $gcskew[$j], "\n"; } close(OUT); } return @gcskew;
}
gcwindescriptiontopprevnext
sub gcwin {
    &opt_default(window=>10000, at=>0, output=>"show", application=>"gimv",
		  filename=>"gcwin.png");
    my @args = opt_get(@_);
    &opt_default(filename=>"gcwin.csv") if (opt_val("output") eq 'f');

    my $gb = opt_as_gb(shift @args);
    my $ref =\$ gb->{SEQ};
    my $window = opt_val("window");
    my $at = opt_val("at");
    my $application = opt_val("application");
    my $filename = opt_val("filename");
    my $opt = opt_val("output");
    my (@gcwin, @location);
    my $j;
    
    my $i = 0;
    while(length($$ref) - ($window * $i) >= $window){
	my $g = substr($$ref, $window * $i, $window) =~ tr/g/g/;
	$g = substr($$ref, $window * $i, $window) =~ tr/a/a/ if ($at);
	my $c = substr($$ref, $window * $i, $window) =~ tr/c/c/;
	$c = substr($$ref, $window * $i, $window) =~ tr/t/t/ if ($at);
	$gcwin[$i] = sprintf("%.6f",($g+$c)/$window);
$location[$i] = $i * $window; $i ++; } $i --; if ($opt eq 'g' || $opt eq 'show'){ my $title = "GC content"; $title = "AT content" if ($at); mkdir ("graph", 0777); _UniMultiGrapher(\@ location,\@ gcwin, -x=>"bp", -y=>$title, -filename=>$filename, -title=>$title, -style=>"lines", -type=>"columns" ); msg_gimv("graph/" . $filename) if ($opt eq 'show');; }elsif ($opt eq 'f'){ my $title = "GC content"; my $j = 0; $title = "AT content" if ($at); mkdir ("data", 0777); open(OUT, ">data/" . $filename); print OUT "location,$title\n"; for ($j = 0; $j <= $i; $j++){ print OUT $location[$j], ",", $gcwin[$j], "\n"; } close(OUT); } return @gcwin;
}
genomicskewdescriptiontopprevnext
sub genomicskew {
    &opt_default(divide=>250, at=>0, output=>"show", application=>"gimv",
		  filename=>"genomicskew.png");
    my @args = opt_get(@_);
    &opt_default(filename=>"genomicskew.csv") if (opt_val("output") eq 'f');
    
    my $gb = opt_as_gb(shift @args);
    my $divide = opt_val("divide");
    my $opt = opt_val("output");
    my $application = opt_val("application");
    my $filename = opt_val("filename");
    my $at = opt_val("at");
    my (@gcskew, @betskew, @geneskew, @thirdskew, @location);
    my ($j, $window, $CDS, $BET, $THIRD);
    my $before = 0;
    my $i = 1;
    
    while(defined %{$gb->{"CDS$i"}}){
	my $feature = $gb->{"CDS$i"}->{feature};
	if ($gb->{"FEATURE$feature"}->{join}){
	    $i ++;
	    next;
	}
	my $seq = $gb->get_gbkseq(
				  $gb->{"CDS$i"}->{start},
                                  $gb->{"CDS$i"}->{end}
				  );
	$CDS .= $seq;
	
	for($j = 2; $j <= length($seq); $j += 3){
	    if ($gb->{"CDS$i"}->{direction} eq 'complement'){
		$THIRD .= substr($seq, $j, 1);
	    }else{
		$THIRD .= substr($seq, $j - 2, 1);
	    }
	}
	$BET .= substr($gb->{SEQ}, $before, $gb->{"CDS$i"}->{start} - $before)
	    unless ($gb->{"CDS$i"}->{start} - $before < 1);
	$before = $gb->{"CDS$i"}->{end};
	$i ++;
    }
    
    for ($j = 0; $j <= $divide; $j ++){
	$location[$j] = $j;
    }
    $i = 0;
    
    $window = int(length($gb->{SEQ}) / $divide);
while($i <= $divide){ my $g = substr($gb->{SEQ}, $window * $i, $window) =~ tr/g/g/; $g = substr($gb->{SEQ}, $window * $i, $window) =~ tr/a/a/ if ($at); my $c = substr($gb->{SEQ}, $window * $i, $window) =~ tr/c/c/; $c = substr($gb->{SEQ}, $window * $i, $window) =~ tr/t/t/ if ($at); $gcskew[$i] = sprintf("%.6f",($c-$g)/($c+$g));
$i ++; } $i = 0; $window = int(length($CDS) / $divide);
while($i <= $divide){ my $g = substr($CDS, $window * $i, $window) =~ tr/g/g/; $g = substr($CDS, $window * $i, $window) =~ tr/a/a/ if ($at); my $c = substr($CDS, $window * $i, $window) =~ tr/c/c/; $g = substr($CDS, $window * $i, $window) =~ tr/t/t/ if ($at); $geneskew[$i] = 0; $geneskew[$i] = sprintf("%.6f",($c-$g)/($c+$g)) unless ($c+$g<1);
$i ++; } $i = 0; $window = int(length($BET) / $divide);
while($i <= $divide){ my $g = substr($BET, $window * $i, $window) =~ tr/g/g/; $g = substr($BET, $window * $i, $window) =~ tr/a/a/ if ($at); my $c = substr($BET, $window * $i, $window) =~ tr/c/c/; $g = substr($BET, $window * $i, $window) =~ tr/t/t/ if ($at); $betskew[$i] = 0; $betskew[$i] = sprintf("%.6f",($c-$g)/($c+$g)) unless ($c+$g<1);
$i ++; } $i = 0; $window = int(length($THIRD) / $divide);
while($i <= $divide){ my $g = substr($THIRD, $window * $i, $window) =~ tr/g/g/; $g = substr($THIRD, $window * $i, $window) =~ tr/a/a/ if ($at); my $c = substr($THIRD, $window * $i, $window) =~ tr/c/c/; $g = substr($THIRD, $window * $i, $window) =~ tr/t/t/ if ($at); $thirdskew[$i] = 0; $thirdskew[$i] = sprintf("%.6f",($c-$g)/($c+$g)) unless ($c+$g<1);
$i ++; } if ($opt eq "show" || $opt eq "g"){ my $title = "GC skew"; $title = "AT skew" if ($at); mkdir ("graph", 0777); _UniMultiGrapher(\@ location, -x=>"bp", -y=>$title,\@ gcskew, -x1=>"whole genome",\@ geneskew, -x2=>"coding region",\@ betskew, -x3=>"intergenic region",\@ thirdskew, -x4=>"codon third position", -style=>"lines", -type=>"columns", -filename=>$filename, -title=>$title ); msg_gimv("graph/" . $filename) if ($opt eq 'show'); }elsif ($opt eq 'f'){ my $title = "GC skew"; my $j = 0; $title = "AT skew" if ($at); mkdir ("data", 0777); open(OUT, ">data/" . $filename); print OUT "location,$title,coding,intergenic,third codon\n"; for ($j = 0; $j <= $divide; $j++){ print OUT $location[$j], ",", $gcskew[$j], $geneskew[$j], ",", $betskew[$j], ",", $thirdskew[$j], ",", "\n"; } close(OUT); } return 1;
}
leading_stranddescriptiontopprevnext
sub leading_strand {
    my @args = opt_get(@_);
    my $gb = shift @args;
    my ($ori, $ter) = rep_ori_ter($gb);
    my ($seq1, $seq2);

    if ($ori > $ter){
	$seq1  = substr($gb->{SEQ}, $ori);
	$seq1 .= substr($gb->{SEQ}, 0, $ter);
	$seq2  = G::Seq::Util::_complement(substr($gb->{SEQ}, $ter, $ori - $ter));
    }else{
	$seq1 = substr($gb->{SEQ}, $ori, $ter - $ori);
	$seq2 = G::Seq::Util::_complement( substr($gb->{SEQ}, $ter) . substr($gb->{SEQ}, 0, $ori) );
    }
    
    return ($seq1, $seq2);
}
newdescriptiontopprevnext
sub new {
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}
ori_searchdescriptiontopprevnext
sub ori_search {
    opt_default(window=>100, mincount=>10);
    my @argv = opt_get(@_);
    my $gb = opt_as_gb(shift @argv);
    my $window = opt_val("window");
    my $mincount = opt_val("mincount");
    my $i;
    my %origin;

    for ($i = 0; $i < length($gb->{SEQ}); $i += $window){
	my $tmp = substr($gb->{SEQ}, $i) . substr($gb->{SEQ}, 0, $i);
	my ($ori, $ter) = find_ori_ter($tmp, -window=>$window, -output=>"NULL");

	$ori += $i;
	$ori -= length($gb->{SEQ}) if ($ori > length($gb->{SEQ}));
	$origin{$ori}++;
    }
        
    my @keys = sort {$origin{$b} <=> $origin{$a}} keys %origin;

    foreach my $pos (@keys){
	next if ($origin{$pos} <= $mincount);
	msg_send(sprintf "%d,%d\n", $pos, $origin{$pos});
    }

    return %origin;
}
query_stranddescriptiontopprevnext
sub query_strand {
    opt_default(direction=>'direct');
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $pos = shift @args;
    my $direction = opt_val("direction");

    my ($ori, $ter) = rep_ori_ter($gb);

    if ($ori > $ter){
	if ($pos < $ter || $pos > $ori){
	    if ($direction eq 'complement'){
		return ("lagging");
	    }else{
		return ("leading");
	    }
	}else{
	    if ($direction eq 'complement'){
		return ("leading");
	    }else{
		return ("lagging");
	    }
	}
    }else{
	if ($pos > $ori && $pos < $ter){
	    if ($direction eq 'complement'){
		return ("lagging");
	    }else{
		return ("leading");
	    }
	}else{
	    if ($direction eq 'complement'){
		return ("leading");
	    }else{
		return ("lagging");
	    }
	}
    }
}
rep_ori_terdescriptiontopprevnext
sub rep_ori_ter {
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my ($ori, $ter);
    my $id = $gb->{LOCUS}->{id};

    if ($id eq 'U00096' || $id eq 'NC_000913'){
	##Escherichia coli K12
##Freeman et al 1998
$ori = 3923500 - 1; $ter = 1588800 - 1; }elsif ($id eq 'AL009126' || $id eq 'NC_000964'){ ##Bacillus subtilis
##Freeman et al 1998
$ori = 1 - 1; $ter = 2017000 - 1; }elsif ($id eq 'L42023' || $id eq 'NC_000907'){ ##Haemophilus influenzae
##Freeman et al 1998
$ori = 603000 - 1; $ter = 1518000 - 1; }elsif ($id eq 'AL513382' || $id eq 'NC_003198'){ ##Salmonella typhi
##Parkhill et al 2001
$ori = 3765000 - 1; $ter = 1437000 - 1; }else{ ($ori, $ter) = &G::Seq::GCskew::find_ori_ter($gb, -window=>10000); } return ($ori, $ter);
}
view_cdsdescriptiontopprevnext
sub view_cds {
    &opt_default(length=>100, filename=>"view_cds.png", 
		  gap=>3, output=>"show", application=>"gimv");
    my @args = opt_get(@_);
    my $gb = opt_as_gb(shift @args);
    my (@a, @t, @g, @c, @pos);
    my $numcds = 0;
    my $i = 0;
    my $length = opt_val("length");
    my $filename = opt_val("filename");
    my $output = opt_val("output");
    my $application = opt_val("application");

    $filename = "view_cds.csv" if ($output eq "f" &&
				   opt_val("filename") eq "view_cds.png");
    my $gap = opt_val("gap");

    while(defined %{$gb->{"CDS$numcds"}}){ $numcds ++ }

    for ($i = 0; $i < $length * 4 + 6 + $gap; $i++){
	$a[$i] = 0;
	$t[$i] = 0;
	$g[$i] = 0;
	$c[$i] = 0;
    }

    foreach my $cds ($gb->cds()){
	my $seq;
	$seq  = $gb->before_startcodon($cds, $length);
	$seq .= $gb->startcodon($cds);
	$seq .= $gb->after_startcodon($cds, $length);
	
	for ($i = 0; $i < length($seq); $i ++){
	    if     (substr($seq, $i, 1) eq 'a'){
		$a[$i] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 't'){ $t[$i] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'g'){ $g[$i] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'c'){ $c[$i] += 100/$numcds;
} } $seq = $gb->before_stopcodon($cds, $length); $seq .= $gb->stopcodon($cds); $seq .= $gb->after_stopcodon($cds, $length); for ($i = 0; $i < length($seq); $i ++){ if (substr($seq, $i, 1) eq 'a'){ $a[$i + length($seq) + $gap] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 't'){ $t[$i + length($seq) + $gap] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'g'){ $g[$i + length($seq) + $gap] += 100/$numcds;
}elsif (substr($seq, $i, 1) eq 'c'){ $c[$i + length($seq) + $gap] += 100/$numcds;
} } } for ($i = 1; $i <= $length * 4 + 6 + $gap; $i ++){ push(@pos, $i); } if ($output eq "g" || $output eq "show"){ _UniMultiGrapher(\@ pos, -x => "position", -y => "percentage",\@ a, -x1=>"A",\@ t, -x2=>"T",\@ g, -x3=>"G",\@ c, -x4=>"C", -filename => $filename, -title => "Base Contents Around Start/Stop Codons" ); msg_gimv("graph/$filename") if($output eq "show"); }elsif ($output eq "f"){ open(OUT, '>data/' . $filename); print OUT "position,A,T,G,C\n"; for ($i = 0; $i < $length * 4 + 6 + $gap; $i ++){ printf OUT "%d,%3.2f,%3.2f,%3.2f,%3.2f\n", $i + 1, $a[$i], $t[$i], $g[$i], $c[$i]; } close(OUT); }
}

General documentation

AUTHOR top
A. U. Thor, a.u.thor@a.galaxy.far.far.away
SEE ALSO top
perl(1).