G::Tools
GPAC
G::Tools::GPAC - Perl extension for blah blah blah
|
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
use G::Tools::PBS; blah blah blah
|
Stub documentation for G::GPAC::PBS was created by h2xs. It looks like the author of the extension was negligent enough to leave the stub unedited.
Blah blah blah.
|
BEGIN | | Code |
DESTROY | No description | Code |
_acc2ftp_bacteria | No description | Code |
_random_elimination | No description | Code |
gpac | No description | Code |
new | No description | Code |
set_gpac | No description | Code |
test_gpac | No description | Code |
Methods description
Methods code
BEGIN
{ eval "use GD::Graph::bars;";
if($@){ warn "$@" };
}
sub DESTROY
{ my $self = shift;
}
sub _acc2ftp_bacteria
{ my @args = opt_get(@_);
my $gb = shift @args;
my $cwd = getcwd();
my $specie = '';
my $time = time;
my @hoge = ();
mkdir("/tmp/$time");
chdir("/tmp/$time");
if($gb->{LOCUS}->{id} =~/NC\_/){
@hoge = `wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/accessions`;
}else{
@hoge = `wget ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Bacteria/accessions`;
}
open(FILE, "accessions") || die($!);
while(<FILE>){
chomp;
my ($acc, $nym) = split(/\s/, $_, 2);
if ($acc eq $gb->{LOCUS}->{id}){
$specie = $nym;
$specie =~ s/ /_/g;
substr($specie, 0, 1) = '' if (substr($specie, 0, 1) eq '_');
last;
}
}
chdir($cwd);
if (length $specie){
if($gb->{LOCUS}->{id} =~/NC\_/){
return "ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/$specie/"
. $gb->{LOCUS}->{id};
}else{
return "ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Bacteria/$specie/"
. $gb->{LOCUS}->{id};
}
}else{
return;
}
}
sub _random_elimination
{ my $gb = shift;
my $n = shift;
my @cds = $gb->cds('all');
my $j = scalar(@cds);
while ($j > $n){
splice(@cds, int(rand($j)), 1);
$j --;
}
foreach my $cds ($gb->cds('all')){
$gb->{$cds}->{on} = 0;
}
foreach my $cds (@cds){
$gb->{$cds}->{on} = 1;
}
}
sub gpac
{ opt_default("ptt"=>'', "elim"=>0);
my @args = opt_get(@_);
my $gb = shift @args;
my $gpac = shift @args;
my $ptt = opt_val("ptt");
my $elim = opt_val("elim");
my $n = 0;
my $id = $gb->{CDS1}->{feature};
unless (length($gb->{"FEATURE$id"}->{gpac})){
set_gpac($gb, -ptt=>$ptt);
}
if ($elim){
foreach my $cds ($gb->cds('all')){
if ($gb->{$cds}->{gpac} =~ /$gpac/){
$gb->{$cds}->{on} = 0;
}else{
$gb->{$cds}->{on} = 1;
$n ++;
}
}
}else{
foreach my $cds ($gb->cds('all')){
if ($gb->{$cds}->{gpac} =~ /$gpac/){
$gb->{$cds}->{on} = 1;
$n ++;
}else{
$gb->{$cds}->{on} = 0;
}
}
}
return $n;
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this;
return $this;
}
sub set_gpac
{ opt_default("ptt"=>'');
my @args = opt_get(@_);
my $gb = shift @args;
my $ptt = opt_val("ptt");
my ($cds) = (0,0,0,0,0,0,0);
my $specie;
my $cwd = getcwd();
my $time = time;
my $openfile = '';
if (length $ptt){
$openfile = $ptt;
}else{
my $file = _acc2ftp_bacteria($gb) . '.ptt';
mkdir("/tmp/$time");
chdir("/tmp/$time");
my @hoge = `wget $file`;
my @name = split(/\//, $file);
chdir($cwd);
$openfile = "/tmp/$time/" . pop @name;
}
open(FILE, $openfile) || die($!);
while(<FILE>){
chomp;
my ($hypo, $func, $put, $pcog) = (0,0,0,0);
if (/^\s+[0-9]+\.\.[0-9]+\s+/){
s/^\s+//;
s/\s+$//;
my ($location, $strand, $length, $pid, $gene,
$synonym, $code, $cog, $product) = split (/\s+/, $_, 9);
$cds ++;
$pcog ++ if ($cog =~ /^COG/);
my $id = $gb->{"CDS$cds"}->{feature};
if (lc($product) =~ /hypothetical/ || lc($product) =~ /^unknown/
|| lc($product) =~ /^orf/ || lc($synonym) eq lc($product)){
$hypo ++;
}elsif (lc($product) =~ /putative/ || lc($product) =~ /similar/
|| lc($product) =~ /probable/){
$put ++;
}else{
$func ++;
}
$gb->{"FEATURE$id"}->{code} = $code;
$gb->{"FEATURE$id"}->{cog} = $cog;
$gb->{"FEATURE$id"}->{synonym} = $synonym;
$gb->{"FEATURE$id"}->{gpac} .= '0';
$gb->{"FEATURE$id"}->{gpac} .= '1' if ($hypo < 1);
$gb->{"FEATURE$id"}->{gpac} .= '2' if ($put < 1 && $hypo < 1);
$gb->{"FEATURE$id"}->{gpac} .= '3' if ($pcog);
$gb->{"FEATURE$id"}->{gpac} .= '4' if ($pcog && $put < 1 && $hypo < 1);
$gb->{"FEATURE$id"}->{gpac} .= '5' if ($pcog || $func || $put);
}
}
close(FILE);
return $openfile;
}
sub test_gpac
{ opt_default("stat"=>"t.test", "verbose"=>0, "output"=>"show",
"filename"=>"gpac.html", "ptt"=>'');
my @args = opt_get(@_);
my $gb = shift @args;
my $sub = shift @args;
my $rcmd = new Rcmd;
my $stat = opt_val("stat");
my $verbose = opt_val("verbose");
my $filename = opt_val("filename");
my $output = opt_val("output");
my $ptt = opt_val("ptt");
my $time = time;
my $numcds = scalar($gb->cds());
$rcmd->exec('require(ctest)');
my ($i, $j, @r0, @r1, @r2, @r3, @r4, @r5,
@r1rev, @r2rev, @r3rev, @r4rev, @r5rev,
$type, @nrev, @ans, @control, @controlrev);
my @ansrev = (0);
my $label = 'results';
no strict;
for ($i = 0; $i <= 5; $i ++){
msg_error("Running GPAC Level $i...\n\n");
$n[$i] = gpac($gb, $i, -ptt=>$ptt);
@{"r$i"} = &{$sub};
my @tmp = @{"r$i"};
if (length $tmp[1]){
$type = "ARRAY";
}else{
my $ref = ref $tmp[0];
if ($ref){
$type = $ref;
if ($ref eq "SCALAR"){
@{"r$i"} = ($$tmp[0]);
$ans[$i] = $$tmp[0];
}elsif ($ref eq "ARRAY"){
@{"r$i"} = (@$tmp[0]);
}elsif ($ref eq "HASH"){
my @values;
foreach my $val (sort keys %{$tmp[0]}){
push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
}
@{"r$i"} = @values;
}else{
die("Return value of the subroutine not fitted for GPAC\n\n");
}
}else{
$type = "SCALAR";
$ans[$i] = $tmp[0];
}
}
if ($type eq 'ARRAY' || $type eq 'HASH'){
if ($i == 0){
$rcmd->exec('x_c(', join(",\n", @{"r0"}), ')');
$ans[$i] = 0;
}else{
$rcmd->exec('y_c(', join(",\n", @{"r$i"}), ')');
$ans[$i] = $rcmd->exec("$stat\(x,y, paired=T\)\$p\.value");
}
$label = "p value";
}
}
for ($i = 1; $i <= 5; $i ++){
msg_error("Running GPAC Level ", $i, " Eliminated...\n\n");
$nrev[$i] = gpac($gb, $i, -ptt=>$ptt, -elim=>1);
@{"r$i" . "rev"} = &{$sub};
my @tmp = @{"r$i" . "rev"};
if (defined $tmp[1]){
$type = "ARRAY";
}else{
my $ref = ref $tmp[0];
if ($ref){
$type = $ref;
if ($ref eq "SCALAR"){
@{"r$i" . "rev"} = ($$tmp[0]);
$ansrev[$i] = $$tmp[0];
}elsif ($ref eq "ARRAY"){
@{"r$i" . "rev"} = (@$tmp[0]);
}elsif ($ref eq "HASH"){
my @values;
foreach my $val (sort keys %{$tmp[0]}){
push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
}
@{"r$i" . "rev"} = @values;
}else{
die("Return value of the subroutine not fitted for GPAC\n\n");
}
}else{
$type = "SCALAR";
$ansrev[$i] = $tmp[0];
}
}
if ($type eq 'ARRAY' || $type eq 'HASH'){
$rcmd->exec('y_c(', join(",\n", @{"r$i" . "rev"}), ')');
$ansrev[$i] = $rcmd->exec("$stat\(x,y, paired=T\)\$p\.value");
$label = "p value";
}
}
for ($j = 0; $j < $verbose; $j ++){
my @tmparray = (0);
msg_error("Running GPAC Control ", $j + 1, '/' , $verbose, "...\n\n");
for ($i = 1; $i <= 5; $i ++){
_random_elimination($gb, $n[$i]);
my @tmp = &{$sub};
my @tmp2 = @tmp;
unless (defined $tmp[1]){
my $ref = ref $tmp[0];
if ($ref){
if ($ref eq "SCALAR"){
$tmparray[$i] = $$tmp[0];
}elsif ($ref eq "ARRAY"){
@tmp2 = (@$tmp[0]);
}elsif ($ref eq "HASH"){
my @values;
foreach my $val (sort keys %{$tmp[0]}){
push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
}
@tmp2 = @values;
}else{
die("Return value of the subroutine not fitted for GPAC\n\n");
}
}else{
$tmparray[$i] = $tmp[0];
}
}
if ($type eq 'ARRAY' || $type eq 'HASH'){
$rcmd->exec('y_c(', join(",\n", @tmp2), ')');
$tmparray[$i] = $rcmd->exec("$stat\(x,y,paired=T\)\$p\.value");
}
}
push (@control, [@tmparray]);
@tmparray = (0);
for ($i = 1; $i <= 5; $i ++){
_random_elimination($gb, $nrev[$i]);
my @tmp = &{$sub};
my @tmp2 = @tmp;
unless (defined $tmp[1]){
my $ref = ref $tmp[0];
if ($ref){
if ($ref eq "SCALAR"){
$tmparray[$i] = $$tmp[0];
}elsif ($ref eq "ARRAY"){
@tmp2 = (@$tmp[0]);
}elsif ($ref eq "HASH"){
my @values;
foreach my $val (sort keys %{$tmp[0]}){
push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
}
@tmp2 = @values;
}else{
die("Return value of the subroutine not fitted for GPAC\n\n");
}
}else{
$tmparray[$i] = $tmp[0];
}
}
if ($type eq 'ARRAY' || $type eq 'HASH'){
$rcmd->exec('y_c(', join(",\n", @tmp2), ')');
$tmparray[$i] = $rcmd->exec("$stat\(x,y,paired=T\)\$p\.value");
}
}
push (@controlrev, [@tmparray]);
}
my (@data, @datarev, @legend, @temp, @temprev);
if ($verbose > 0){
@data = ([0,1,2,3,4,5],\@ ans, @control);
@datarev = ([0,1,2,3,4,5],\@ ansrev, @controlrev);
@legend = ("Result");
for ($j = 1; $j <= $verbose; $j ++){
push(@legend, "Control $j");
}
}else{
@data = ([0,1,2,3,4,5],\@ ans);
@datarev = ([0,1,2,3,4,5],\@ ansrev);
}
@temp = @ans;
@temprev = @ansrev;
foreach my $ctrl (@control){
@temp = (@temp, @$ctrl);
}
foreach my $ctrl (@controlrev){
@temprev = (@temprev, @$ctrl);
}
my @temp3 = (@temp, @temprev);
my @temp2 = sort {$b <=> $a} @temp3;
my $max = shift(@temp2) * 1.1;
my @expo = split(/e/, sprintf("%e", $max, 2));
$max = sprintf("%.2f", $expo[0]) * (10 ** $expo[1]);
my @data2 = ([1,2,3,4,5], [$n[1]/$n[0], $n[2]/$n[0],
$n[3]/$n[0], $n[4]/$n[0], $n[5]/$n[0]],
[$nrev[1]/$n[0], $nrev[2]/$n[0], $nrev[3]/$n[0],
$nrev[4]/$n[0], $nrev[5]/$n[0]]);
my $graph = GD::Graph::bars->new(400, 300);
my $graph2 = GD::Graph::bars->new(250, 270);
my $graph3 = GD::Graph::bars->new(400, 300);
$graph->set(
x_label => 'GPAC',
y_label => $label,
title => 'GPAC Test',
y_max_value => $max,
bar_width => 2,
bar_spacing => 1
);
$graph2->set(
x_label => 'GPAC',
y_label => 'percentage of CDS',
y_max_value => 1,
y_min_value => 0,
title => 'GPAC Degree of Freedom',
bar_width => 5,
bar_spacing => 10
);
$graph3->set(
x_label => 'GPAC',
y_label => $label,
title => 'GPAC Eliminated Test',
y_max_value => $max,
bar_width => 2,
bar_spacing => 1
);
$graph->set_legend(@legend) if ($verbose > 0);
$graph2->set_legend("GPAC CDS", "Eliminated CDS");
$graph3->set_legend(@legend) if ($verbose > 0);
my $gd = $graph->plot(\@data);
my $gd2 = $graph2->plot(\@data2);
my $gd3 = $graph3->plot(\@datarev);
mkdir ("graph", 0777);
open(IMG, '>graph/' . $time . '-gpac.png') or die $!;
binmode IMG;
print IMG $gd->png;
close(IMG);
open(IMG, '>graph/' . $time . '-df.png') or die $!;
binmode IMG;
print IMG $gd2->png;
close(IMG);
open(IMG, '>graph/' . $time . '-elim.png') or die $!;
binmode IMG;
print IMG $gd3->png;
close(IMG);
open(HTML, '>graph/' . $filename) or die $!;
print HTML qq(
<HTML><HEAD><TITLE>GPAC Test</TITLE></HEAD>
<BODY bgcolor="white">
<table><tr><td valign="top">
<img src="$time-gpac.png">
</td><td valign="top">
<img src="$time-elim.png">
</td></tr>
<tr><td style="font-size:10pt">
<UL>
<LI>GPAC Level 0: all CDS<BR>
<LI>GPAC Level 1: without hypothetical CDS<BR>
<LI>GPAC Level 2: without hypothetical & putative CDS<BR>
<LI>GPAC Level 3: only conserved CDS (COGs)<BR>
<LI>GPAC Level 4: only conserved CDS, but without hypothetical & putative<BR>
<LI>GPAC Level 5: conserved hypothetical & putative and functional proteins<BR>
</UL>
</td><td align="right">
<img src="$time-df.png"> <BR>
N = $numcds
</td></tr></table>
<BR><BR>
</BODY></HTML>
);
system("galeon graph/$filename &") if ($output eq 'show');
}
General documentation