G::Tools

GPAC

Summary Included libraries Package variables Synopsis Description General documentation Methods

Summary
G::Tools::GPAC - Perl extension for blah blah blah
Package variables top
Globals (from use vars definitions)
@EXPORT
$VERSION
@EXPORT_OK
Included modulestop
Cwd
G::Messenger
GD::Graph::bars
Rcmd
SubOpt
strict
Inherit top
AutoLoader Exporter
Synopsistop
  use G::Tools::PBS;
blah blah blah
Descriptiontop
Stub documentation for G::GPAC::PBS was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.
Methodstop
BEGIN Code
DESTROYNo descriptionCode
_acc2ftp_bacteriaNo descriptionCode
_random_eliminationNo descriptionCode
gpacNo descriptionCode
newNo descriptionCode
set_gpacNo descriptionCode
test_gpacNo descriptionCode

Methods description


Methods code

BEGINtop
BEGIN {
    eval "use GD::Graph::bars;";
    if($@){ warn "$@" };
}
DESTROYdescriptiontopprevnext
sub DESTROY {
    my $self = shift;
}
_acc2ftp_bacteriadescriptiontopprevnext
sub _acc2ftp_bacteria {
    my @args = opt_get(@_);
    my $gb = shift @args;

    my $cwd = getcwd();
    my $specie = '';
    my $time = time;
    my @hoge = ();

    mkdir("/tmp/$time"); 
    chdir("/tmp/$time");

    if($gb->{LOCUS}->{id} =~/NC\_/){
        @hoge = `wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/accessions`;       
}else{ @hoge = `wget ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Bacteria/accessions`; } open(FILE, "accessions") || die($!); while(<FILE>){ chomp; my ($acc, $nym) = split(/\s/, $_, 2); if ($acc eq $gb->{LOCUS}->{id}){ $specie = $nym; $specie =~ s/ /_/g; substr($specie, 0, 1) = '' if (substr($specie, 0, 1) eq '_'); last; } } chdir($cwd); if (length $specie){ if($gb->{LOCUS}->{id} =~/NC\_/){ return "ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/$specie/" . $gb->{LOCUS}->{id}; }else{ return "ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Bacteria/$specie/" . $gb->{LOCUS}->{id}; } }else{ return; }
}
_random_eliminationdescriptiontopprevnext
sub _random_elimination {
    my $gb = shift;
    my $n = shift;
    my @cds = $gb->cds('all');

    my $j = scalar(@cds);
    
    while ($j > $n){
	splice(@cds, int(rand($j)), 1);
	$j --;
    }

    foreach my $cds ($gb->cds('all')){
	$gb->{$cds}->{on} = 0;
    }

    foreach my $cds (@cds){
	$gb->{$cds}->{on} = 1;
    }
}
gpacdescriptiontopprevnext
sub gpac {
    opt_default("ptt"=>'', "elim"=>0);
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $gpac = shift @args;
    my $ptt = opt_val("ptt");
    my $elim = opt_val("elim");
    my $n = 0;

    my $id = $gb->{CDS1}->{feature};
    unless (length($gb->{"FEATURE$id"}->{gpac})){
	set_gpac($gb, -ptt=>$ptt);
    }

    if ($elim){
	foreach my $cds ($gb->cds('all')){
	    if ($gb->{$cds}->{gpac} =~ /$gpac/){
		$gb->{$cds}->{on} = 0;
	    }else{
		$gb->{$cds}->{on} = 1;
		$n ++;
	    }
	}
    }else{
	foreach my $cds ($gb->cds('all')){
	    if ($gb->{$cds}->{gpac} =~ /$gpac/){
		$gb->{$cds}->{on} = 1;
		$n ++;
	    }else{
		$gb->{$cds}->{on} = 0;
	    }
	}
    }

    return $n;
}
newdescriptiontopprevnext
sub new {
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}
set_gpacdescriptiontopprevnext
sub set_gpac {
    opt_default("ptt"=>'');
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $ptt = opt_val("ptt");
    my ($cds) = (0,0,0,0,0,0,0);
    my $specie;
    my $cwd = getcwd();
    my $time = time;
    my $openfile = '';

    if (length $ptt){
	$openfile = $ptt;
    }else{
	my $file = _acc2ftp_bacteria($gb) . '.ptt';
	mkdir("/tmp/$time"); 
	chdir("/tmp/$time");
	my @hoge = `wget $file`;
	my @name = split(/\//, $file);
	chdir($cwd);
	$openfile = "/tmp/$time/" . pop @name;
    }

    open(FILE, $openfile) || die($!);
    while(<FILE>){
	chomp;

	my ($hypo, $func, $put, $pcog) = (0,0,0,0);

	if (/^\s+[0-9]+\.\.[0-9]+\s+/){
	    s/^\s+//;
	    s/\s+$//;
	    my ($location, $strand, $length, $pid, $gene, 
		$synonym, $code, $cog, $product) = split (/\s+/, $_, 9);

	    $cds ++;
	    $pcog ++ if ($cog =~ /^COG/);
	    my $id = $gb->{"CDS$cds"}->{feature};

	    if (lc($product) =~ /hypothetical/ || lc($product) =~ /^unknown/ 
		|| lc($product) =~ /^orf/ || lc($synonym) eq lc($product)){
		$hypo ++;
	    }elsif (lc($product) =~ /putative/ || lc($product) =~ /similar/ 
		    || lc($product) =~ /probable/){
		$put ++;
	    }else{
		$func ++;
	    }

	    $gb->{"FEATURE$id"}->{code} = $code;
	    $gb->{"FEATURE$id"}->{cog} = $cog;
	    $gb->{"FEATURE$id"}->{synonym} = $synonym;
	    $gb->{"FEATURE$id"}->{gpac} .= '0';
	    $gb->{"FEATURE$id"}->{gpac} .= '1' if ($hypo < 1);
	    $gb->{"FEATURE$id"}->{gpac} .= '2' if ($put < 1 && $hypo < 1);
	    $gb->{"FEATURE$id"}->{gpac} .= '3' if ($pcog);
	    $gb->{"FEATURE$id"}->{gpac} .= '4' if ($pcog && $put < 1 && $hypo < 1);
	    $gb->{"FEATURE$id"}->{gpac} .= '5' if ($pcog || $func || $put);
	}
    }
    close(FILE);

    return $openfile;
}
test_gpacdescriptiontopprevnext
sub test_gpac {
    opt_default("stat"=>"t.test", "verbose"=>0, "output"=>"show",
		"filename"=>"gpac.html", "ptt"=>'');
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $sub = shift @args;
    my $rcmd = new Rcmd;
    my $stat = opt_val("stat");
    my $verbose = opt_val("verbose");
    my $filename = opt_val("filename");
    my $output = opt_val("output");
    my $ptt = opt_val("ptt");
    my $time = time;
    my $numcds = scalar($gb->cds());

    $rcmd->exec('require(ctest)');

    my ($i, $j, @r0, @r1, @r2, @r3, @r4, @r5, 
	@r1rev, @r2rev, @r3rev, @r4rev, @r5rev, 
	$type, @nrev, @ans, @control, @controlrev);
    my @ansrev = (0);
    my $label = 'results';

    no strict;

    for ($i = 0; $i <= 5; $i ++){
	msg_error("Running GPAC Level $i...\n\n");
        $n[$i] = gpac($gb, $i, -ptt=>$ptt);

	@{"r$i"} = &{$sub};
	my @tmp = @{"r$i"};
	if (length $tmp[1]){
	    $type = "ARRAY";
	}else{
	    my $ref = ref $tmp[0];
	    if ($ref){
		$type = $ref;

		if ($ref eq "SCALAR"){
		    @{"r$i"} = ($$tmp[0]);
		    $ans[$i] = $$tmp[0];
		}elsif ($ref eq "ARRAY"){
		    @{"r$i"} = (@$tmp[0]);
		}elsif ($ref eq "HASH"){
		    my @values;
		    foreach my $val (sort keys %{$tmp[0]}){
			push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
		    }
		    @{"r$i"} = @values;
		}else{
		    die("Return value of the subroutine not fitted for GPAC\n\n");
		}
	    }else{
		$type = "SCALAR";
		$ans[$i] = $tmp[0];
	    }
	}

	if ($type eq 'ARRAY' || $type eq 'HASH'){
	    if ($i == 0){
		$rcmd->exec('x_c(',  join(",\n", @{"r0"}), ')');
		$ans[$i] = 0;
	    }else{
		$rcmd->exec('y_c(',  join(",\n", @{"r$i"}), ')');
		$ans[$i] = $rcmd->exec("$stat\(x,y, paired=T\)\$p\.value");
	    }
	    $label = "p value";
	}
    }

    for ($i = 1; $i <= 5; $i ++){
	msg_error("Running GPAC Level ",  $i, " Eliminated...\n\n");
        $nrev[$i] = gpac($gb, $i, -ptt=>$ptt, -elim=>1);

	@{"r$i" . "rev"} = &{$sub};
	my @tmp = @{"r$i" . "rev"};

	if (defined $tmp[1]){
	    $type = "ARRAY";
	}else{
	    my $ref = ref $tmp[0];
	    if ($ref){
		$type = $ref;

		if ($ref eq "SCALAR"){
		    @{"r$i" . "rev"} = ($$tmp[0]);
		    $ansrev[$i] = $$tmp[0];
		}elsif ($ref eq "ARRAY"){
		    @{"r$i" . "rev"} = (@$tmp[0]);
		}elsif ($ref eq "HASH"){
		    my @values;
		    foreach my $val (sort keys %{$tmp[0]}){
			push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
		    }
		    @{"r$i" . "rev"} = @values;
		}else{
		    die("Return value of the subroutine not fitted for GPAC\n\n");
		}
	    }else{
		$type = "SCALAR";
		$ansrev[$i] = $tmp[0];
	    }
	}

	if ($type eq 'ARRAY' || $type eq 'HASH'){
	    $rcmd->exec('y_c(',  join(",\n", @{"r$i" . "rev"}), ')');
	    $ansrev[$i] = $rcmd->exec("$stat\(x,y, paired=T\)\$p\.value");
	    $label = "p value";
	}
    }

    
    for ($j = 0; $j < $verbose; $j ++){
	my @tmparray = (0);
	msg_error("Running GPAC Control ", $j + 1, '/' , $verbose, "...\n\n");

        for ($i = 1; $i <= 5; $i ++){
	    _random_elimination($gb, $n[$i]);
	    
	    my @tmp = &{$sub};
	    my @tmp2 = @tmp;

	    unless (defined $tmp[1]){
		my $ref = ref $tmp[0];
		if ($ref){
		    if ($ref eq "SCALAR"){
			$tmparray[$i] = $$tmp[0];
		    }elsif ($ref eq "ARRAY"){
			@tmp2 = (@$tmp[0]);
		    }elsif ($ref eq "HASH"){
			my @values;
			foreach my $val (sort keys %{$tmp[0]}){
			    push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
			}
			@tmp2 = @values;
		    }else{
			die("Return value of the subroutine not fitted for GPAC\n\n");
		    }
		}else{
		    $tmparray[$i] = $tmp[0];
		}
	    }
	    
	    if ($type eq 'ARRAY' || $type eq 'HASH'){
		$rcmd->exec('y_c(',  join(",\n", @tmp2), ')');
		$tmparray[$i] = $rcmd->exec("$stat\(x,y,paired=T\)\$p\.value");
	    }
	}
	push (@control, [@tmparray]);

        @tmparray = (0);
        for ($i = 1; $i <= 5; $i ++){
	    _random_elimination($gb, $nrev[$i]);
	    
	    my @tmp = &{$sub};
	    my @tmp2 = @tmp;

	    unless (defined $tmp[1]){
		my $ref = ref $tmp[0];
		if ($ref){
		    if ($ref eq "SCALAR"){
			$tmparray[$i] = $$tmp[0];
		    }elsif ($ref eq "ARRAY"){
			@tmp2 = (@$tmp[0]);
		    }elsif ($ref eq "HASH"){
			my @values;
			foreach my $val (sort keys %{$tmp[0]}){
			    push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
			}
			@tmp2 = @values;
		    }else{
			die("Return value of the subroutine not fitted for GPAC\n\n");
		    }
		}else{
		    $tmparray[$i] = $tmp[0];
		}
	    }
	    
	    if ($type eq 'ARRAY' || $type eq 'HASH'){
		$rcmd->exec('y_c(',  join(",\n", @tmp2), ')');
		$tmparray[$i] = $rcmd->exec("$stat\(x,y,paired=T\)\$p\.value");
	    }
	}
	push (@controlrev, [@tmparray]);
    }

    my (@data, @datarev, @legend, @temp, @temprev);
    if ($verbose > 0){
	@data = ([0,1,2,3,4,5],\@ ans, @control);
	@datarev = ([0,1,2,3,4,5],\@ ansrev, @controlrev);
	@legend = ("Result");
	for ($j = 1; $j <= $verbose; $j ++){
	    push(@legend, "Control $j");
	}
    }else{
	@data = ([0,1,2,3,4,5],\@ ans);
	@datarev = ([0,1,2,3,4,5],\@ ansrev);
    }

    @temp = @ans;
    @temprev = @ansrev;
    foreach my $ctrl (@control){
	@temp = (@temp, @$ctrl);
    }
    foreach my $ctrl (@controlrev){
	@temprev = (@temprev, @$ctrl);
    }
    my @temp3 = (@temp, @temprev); 
    my @temp2 = sort {$b <=> $a} @temp3;
    my $max = shift(@temp2) * 1.1;
    my @expo = split(/e/, sprintf("%e", $max, 2));
    $max = sprintf("%.2f", $expo[0]) * (10 ** $expo[1]);

    my @data2 = ([1,2,3,4,5], [$n[1]/$n[0], $n[2]/$n[0], 
			       $n[3]/$n[0], $n[4]/$n[0], $n[5]/$n[0]],
[
$nrev[1]/$n[0], $nrev[2]/$n[0], $nrev[3]/$n[0], $nrev[4]/$n[0], $nrev[5]/$n[0]]); my $graph = GD::Graph::bars->new(400, 300); my $graph2 = GD::Graph::bars->new(250, 270); my $graph3 = GD::Graph::bars->new(400, 300); $graph->set( x_label => 'GPAC', y_label => $label, title => 'GPAC Test', y_max_value => $max, bar_width => 2, bar_spacing => 1 ); $graph2->set( x_label => 'GPAC', y_label => 'percentage of CDS', y_max_value => 1, y_min_value => 0, title => 'GPAC Degree of Freedom', bar_width => 5, bar_spacing => 10 ); $graph3->set( x_label => 'GPAC', y_label => $label, title => 'GPAC Eliminated Test', y_max_value => $max, bar_width => 2, bar_spacing => 1 ); $graph->set_legend(@legend) if ($verbose > 0); $graph2->set_legend("GPAC CDS", "Eliminated CDS"); $graph3->set_legend(@legend) if ($verbose > 0); my $gd = $graph->plot(\@data); my $gd2 = $graph2->plot(\@data2); my $gd3 = $graph3->plot(\@datarev); mkdir ("graph", 0777); open(IMG, '>graph/' . $time . '-gpac.png') or die $!; binmode IMG; print IMG $gd->png; close(IMG); open(IMG, '>graph/' . $time . '-df.png') or die $!; binmode IMG; print IMG $gd2->png; close(IMG); open(IMG, '>graph/' . $time . '-elim.png') or die $!; binmode IMG; print IMG $gd3->png; close(IMG); open(HTML, '>graph/' . $filename) or die $!; print HTML qq( <HTML><HEAD><TITLE>GPAC Test</TITLE></HEAD> <BODY bgcolor="white"> <table><tr><td valign="top"> <img src="$time-gpac.png"> </td><td valign="top"> <img src="$time-elim.png"> </td></tr> <tr><td style="font-size:10pt"> <UL> <LI>GPAC Level 0: all CDS<BR> <LI>GPAC Level 1: without hypothetical CDS<BR> <LI>GPAC Level 2: without hypothetical & putative CDS<BR> <LI>GPAC Level 3: only conserved CDS (COGs)<BR> <LI>GPAC Level 4: only conserved CDS, but without hypothetical & putative<BR> <LI>GPAC Level 5: conserved hypothetical & putative and functional proteins<BR> </UL>
</td
><td align="right"> <img src="$time-df.png"> <BR> N = $numcds </td></tr></table>
<BR><BR>
</
BODY></HTML>
);
system("galeon graph/$filename &") if ($output eq 'show');
}

General documentation

AUTHOR top
A. U. Thor, a.u.thor@a.galaxy.far.far.away
SEE ALSO top
perl(1).