G::Tools GPAC
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
G::Tools::GPAC - Perl extension for blah blah blah
Package variables
Globals (from "use vars" definitions)
@EXPORT
@EXPORT_OK
$VERSION
Included modules
Cwd
G::Messenger
GD::Graph::bars
Rcmd
SelfLoader
SubOpt
Inherit
AutoLoader Exporter
Synopsis
  use G::Tools::PBS;
  blah blah blah
Description
Stub documentation for G::GPAC::PBS was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.
Blah blah blah.
Methods
BEGIN Code
DESTROY
No description
Code
_acc2ftp_bacteria
No description
Code
_random_elimination
No description
Code
gpac
No description
Code
set_gpac
No description
Code
test_gpac
No description
Code
Methods description
None available.
Methods code
BEGINTop
BEGIN {
    eval "use GD::Graph::bars;";
    if($@){ warn "$@" };
}
DESTROYdescriptionprevnextTop
sub DESTROY {
    my $self = shift;
}
_acc2ftp_bacteriadescriptionprevnextTop
sub _acc2ftp_bacteria {
    my @args = opt_get(@_);
    my $gb = shift @args;

    my $cwd = getcwd();
    my $specie = '';
    my $time = time;
    my @hoge = ();

    mkdir("/tmp/$time"); 
    chdir("/tmp/$time");

    if($gb->{LOCUS}->{id} =~/NC\_/){
        @hoge = `wget ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/accessions`;       
}else{ @hoge = `wget ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Bacteria/accessions`; } open(FILE, "accessions") || die($!); while(<FILE>){ chomp; my ($acc, $nym) = split(/\s/, $_, 2); if ($acc eq $gb->{LOCUS}->{id}){ $specie = $nym; $specie =~ s/ /_/g; substr($specie, 0, 1) = '' if (substr($specie, 0, 1) eq '_'); last; } } chdir($cwd); if (length $specie){ if($gb->{LOCUS}->{id} =~/NC\_/){ return "ftp://ftp.ncbi.nlm.nih.gov/genomes/Bacteria/$specie/" . $gb->{LOCUS}->{id}; }else{ return "ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Bacteria/$specie/" . $gb->{LOCUS}->{id}; } }else{ return; }
}
_random_eliminationdescriptionprevnextTop
sub _random_elimination {
    my $gb = shift;
    my $n = shift;
    my @cds = $gb->cds('all');

    my $j = scalar(@cds);
    
    while ($j > $n){
	splice(@cds, int(rand($j)), 1);
	$j --;
    }

    foreach my $cds ($gb->cds('all')){
	$gb->{$cds}->{on} = 0;
    }

    foreach my $cds (@cds){
	$gb->{$cds}->{on} = 1;
    }
}
gpacdescriptionprevnextTop
sub gpac {
    opt_default("ptt"=>'', "elim"=>0);
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $gpac = shift @args;
    my $ptt = opt_val("ptt");
    my $elim = opt_val("elim");
    my $n = 0;

    my $id = $gb->{CDS1}->{feature};
    unless (length($gb->{"FEATURE$id"}->{gpac})){
	set_gpac($gb, -ptt=>$ptt);
    }

    if ($elim){
	foreach my $cds ($gb->cds('all')){
	    if ($gb->{$cds}->{gpac} =~ /$gpac/){
		$gb->{$cds}->{on} = 0;
	    }else{
		$gb->{$cds}->{on} = 1;
		$n ++;
	    }
	}
    }else{
	foreach my $cds ($gb->cds('all')){
	    if ($gb->{$cds}->{gpac} =~ /$gpac/){
		$gb->{$cds}->{on} = 1;
		$n ++;
	    }else{
		$gb->{$cds}->{on} = 0;
	    }
	}
    }

    return $n;
}
set_gpacdescriptionprevnextTop
sub set_gpac {
    opt_default("ptt"=>'');
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $ptt = opt_val("ptt");
    my ($cds) = (0,0,0,0,0,0,0);
    my $specie;
    my $cwd = getcwd();
    my $time = time;
    my $openfile = '';

    if (length $ptt){
	$openfile = $ptt;
    }else{
	my $file = _acc2ftp_bacteria($gb) . '.ptt';
	mkdir("/tmp/$time"); 
	chdir("/tmp/$time");
	my @hoge = `wget $file`;
	my @name = split(/\//, $file);
	chdir($cwd);
	$openfile = "/tmp/$time/" . pop @name;
    }

    open(FILE, $openfile) || die($!);
    while(<FILE>){
	chomp;

	my ($hypo, $func, $put, $pcog) = (0,0,0,0);

	if (/^\s*[0-9]+\.\.[0-9]+\s+/){
	    s/^\s+//;
	    s/\s+$//;
	    my ($location, $strand, $length, $pid, $gene, 
		$synonym, $code, $cog, $product) = split (/\s+/, $_, 9);

	    $cds ++;
	    $pcog ++ if ($cog =~ /^COG/);
	    my $id = $gb->{"CDS$cds"}->{feature};

	    if (lc($product) =~ /hypothetical/ || lc($product) =~ /^unknown/ 
		|| lc($product) =~ /^orf/ || lc($synonym) eq lc($product)){
		$hypo ++;
	    }elsif (lc($product) =~ /putative/ || lc($product) =~ /similar/ 
		    || lc($product) =~ /probable/){
		$put ++;
	    }else{
		$func ++;
	    }

	    $gb->{"FEATURE$id"}->{code} = $code;
	    $gb->{"FEATURE$id"}->{cog} = $cog;
	    $gb->{"FEATURE$id"}->{synonym} = $synonym;
	    $gb->{"FEATURE$id"}->{gpac} .= '0';
	    $gb->{"FEATURE$id"}->{gpac} .= '1' if ($hypo < 1);
	    $gb->{"FEATURE$id"}->{gpac} .= '2' if ($put < 1 && $hypo < 1);
	    $gb->{"FEATURE$id"}->{gpac} .= '3' if ($pcog);
	    $gb->{"FEATURE$id"}->{gpac} .= '4' if ($pcog && $put < 1 && $hypo < 1);
	    $gb->{"FEATURE$id"}->{gpac} .= '5' if ($pcog || $func || $put);
	}
    }
    close(FILE);

    return $openfile;
}
test_gpacdescriptionprevnextTop
sub test_gpac {
    opt_default("stat"=>"t.test", "verbose"=>5, "output"=>"show",
		"filename"=>"gpac.html", "ptt"=>'');
    my @args = opt_get(@_);
    my $gb = shift @args;
    my $sub = shift @args;
    my $rcmd = new Rcmd;
    my $stat = opt_val("stat");
    my $verbose = opt_val("verbose");
    my $filename = opt_val("filename");
    my $output = opt_val("output");
    my $ptt = opt_val("ptt");
    my $time = time;
    my $numcds = scalar($gb->cds());

    $rcmd->exec('require(ctest)');

    my ($i, $j, @r0, @r1, @r2, @r3, @r4, @r5, 
	@r1rev, @r2rev, @r3rev, @r4rev, @r5rev, 
	$type, @nrev, @ans, @control, @controlrev);
    my @ansrev = (0);
    my $label = 'results';

    no strict;

    for ($i = 0; $i <= 5; $i ++){
	msg_error("Running GPAC Level $i...\n\n");
        $n[$i] = gpac($gb, $i, -ptt=>$ptt);

	@{"r$i"} = &{$sub};
	my @tmp = @{"r$i"};
	if (length $tmp[1]){
	    $type = "ARRAY";
	}else{
	    my $ref = ref $tmp[0];
	    if ($ref){
		$type = $ref;

		if ($ref eq "SCALAR"){
		    @{"r$i"} = ($$tmp[0]);
		    $ans[$i] = $$tmp[0];
		}elsif ($ref eq "ARRAY"){
		    @{"r$i"} = (@$tmp[0]);
		}elsif ($ref eq "HASH"){
		    my @values;
		    foreach my $val (sort keys %{$tmp[0]}){
			push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
		    }
		    @{"r$i"} = @values;
		}else{
		    die("Return value of the subroutine not fitted for GPAC\n\n");
		}
	    }else{
		$type = "SCALAR";
		$ans[$i] = $tmp[0];
	    }
	}

	if ($type eq 'ARRAY' || $type eq 'HASH'){
	    if ($i == 0){
		$rcmd->exec('x_c(',  join(",\n", @{"r0"}), ')');
		$ans[$i] = 0;
	    }else{
		$rcmd->exec('y_c(',  join(",\n", @{"r$i"}), ')');
		$ans[$i] = $rcmd->exec("$stat\(x,y, paired=T\)\$p\.value");
	    }
	    $label = "p value";
	}
    }

    for ($i = 1; $i <= 5; $i ++){
	msg_error("Running GPAC Level ",  $i, " Eliminated...\n\n");
        $nrev[$i] = gpac($gb, $i, -ptt=>$ptt, -elim=>1);

	@{"r$i" . "rev"} = &{$sub};
	my @tmp = @{"r$i" . "rev"};

	if (defined $tmp[1]){
	    $type = "ARRAY";
	}else{
	    my $ref = ref $tmp[0];
	    if ($ref){
		$type = $ref;

		if ($ref eq "SCALAR"){
		    @{"r$i" . "rev"} = ($$tmp[0]);
		    $ansrev[$i] = $$tmp[0];
		}elsif ($ref eq "ARRAY"){
		    @{"r$i" . "rev"} = (@$tmp[0]);
		}elsif ($ref eq "HASH"){
		    my @values;
		    foreach my $val (sort keys %{$tmp[0]}){
			push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
		    }
		    @{"r$i" . "rev"} = @values;
		}else{
		    die("Return value of the subroutine not fitted for GPAC\n\n");
		}
	    }else{
		$type = "SCALAR";
		$ansrev[$i] = $tmp[0];
	    }
	}

	if ($type eq 'ARRAY' || $type eq 'HASH'){
	    $rcmd->exec('y_c(',  join(",\n", @{"r$i" . "rev"}), ')');
	    $ansrev[$i] = $rcmd->exec("$stat\(x,y, paired=T\)\$p\.value");
	    $label = "p value";
	}
    }

    
    for ($j = 0; $j < $verbose; $j ++){
	my @tmparray = (0);
	msg_error("Running GPAC Control ", $j + 1, '/' , $verbose, "...\n\n");

        for ($i = 1; $i <= 5; $i ++){
	    _random_elimination($gb, $n[$i]);
	    
	    my @tmp = &{$sub};
	    my @tmp2 = @tmp;

	    unless (defined $tmp[1]){
		my $ref = ref $tmp[0];
		if ($ref){
		    if ($ref eq "SCALAR"){
			$tmparray[$i] = $$tmp[0];
		    }elsif ($ref eq "ARRAY"){
			@tmp2 = (@$tmp[0]);
		    }elsif ($ref eq "HASH"){
			my @values;
			foreach my $val (sort keys %{$tmp[0]}){
			    push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
			}
			@tmp2 = @values;
		    }else{
			die("Return value of the subroutine not fitted for GPAC\n\n");
		    }
		}else{
		    $tmparray[$i] = $tmp[0];
		}
	    }
	    
	    if ($type eq 'ARRAY' || $type eq 'HASH'){
		$rcmd->exec('y_c(',  join(",\n", @tmp2), ')');
		$tmparray[$i] = $rcmd->exec("$stat\(x,y,paired=T\)\$p\.value");
	    }
	}
	push (@control, [@tmparray]);

        @tmparray = (0);
        for ($i = 1; $i <= 5; $i ++){
	    _random_elimination($gb, $nrev[$i]);
	    
	    my @tmp = &{$sub};
	    my @tmp2 = @tmp;

	    unless (defined $tmp[1]){
		my $ref = ref $tmp[0];
		if ($ref){
		    if ($ref eq "SCALAR"){
			$tmparray[$i] = $$tmp[0];
		    }elsif ($ref eq "ARRAY"){
			@tmp2 = (@$tmp[0]);
		    }elsif ($ref eq "HASH"){
			my @values;
			foreach my $val (sort keys %{$tmp[0]}){
			    push (@values, sprintf("%.4f", ${$tmp[0]}{$val}));
			}
			@tmp2 = @values;
		    }else{
			die("Return value of the subroutine not fitted for GPAC\n\n");
		    }
		}else{
		    $tmparray[$i] = $tmp[0];
		}
	    }
	    
	    if ($type eq 'ARRAY' || $type eq 'HASH'){
		$rcmd->exec('y_c(',  join(",\n", @tmp2), ')');
		$tmparray[$i] = $rcmd->exec("$stat\(x,y,paired=T\)\$p\.value");
	    }
	}
	push (@controlrev, [@tmparray]);
    }

    my (@data, @datarev, @legend, @temp, @temprev);
    if ($verbose > 0){
	@data = ([0,1,2,3,4,5],\@ ans, @control);
	@datarev = ([0,1,2,3,4,5],\@ ansrev, @controlrev);
	@legend = ("Result");
	for ($j = 1; $j <= $verbose; $j ++){
	    push(@legend, "Control $j");
	}
    }else{
	@data = ([0,1,2,3,4,5],\@ ans);
	@datarev = ([0,1,2,3,4,5],\@ ansrev);
    }

    @temp = @ans;
    @temprev = @ansrev;
    foreach my $ctrl (@control){
	@temp = (@temp, @$ctrl);
    }
    foreach my $ctrl (@controlrev){
	@temprev = (@temprev, @$ctrl);
    }
    my @temp3 = (@temp, @temprev); 
    my @temp2 = sort {$b <=> $a} @temp3;
    my $max = shift(@temp2) * 1.1;
    my @expo = split(/e/, sprintf("%e", $max, 2));
    $max = sprintf("%.2f", $expo[0]) * (10 ** $expo[1]);

    my @data2 = ([1,2,3,4,5], [$n[1]/$n[0], $n[2]/$n[0], 
			       $n[3]/$n[0], $n[4]/$n[0], $n[5]/$n[0]],
[
$nrev[1]/$n[0], $nrev[2]/$n[0], $nrev[3]/$n[0], $nrev[4]/$n[0], $nrev[5]/$n[0]]); my $graph = GD::Graph::bars->new(400, 300); my $graph2 = GD::Graph::bars->new(250, 270); my $graph3 = GD::Graph::bars->new(400, 300); $graph->set( x_label => 'GPAC', y_label => $label, title => 'GPAC Test', y_max_value => $max, bar_width => 2, bar_spacing => 1 ); $graph2->set( x_label => 'GPAC', y_label => 'percentage of CDS', y_max_value => 1, y_min_value => 0, title => 'GPAC Degree of Freedom', bar_width => 5, bar_spacing => 10 ); $graph3->set( x_label => 'GPAC', y_label => $label, title => 'GPAC Eliminated Test', y_max_value => $max, bar_width => 2, bar_spacing => 1 ); $graph->set_legend(@legend) if ($verbose > 0); $graph2->set_legend("GPAC CDS", "Eliminated CDS"); $graph3->set_legend(@legend) if ($verbose > 0); my $gd = $graph->plot(\@data); my $gd2 = $graph2->plot(\@data2); my $gd3 = $graph3->plot(\@datarev); mkdir ("graph", 0777); open(IMG, '>graph/' . $time . '-gpac.png') or die $!; binmode IMG; print IMG $gd->png; close(IMG); open(IMG, '>graph/' . $time . '-df.png') or die $!; binmode IMG; print IMG $gd2->png; close(IMG); open(IMG, '>graph/' . $time . '-elim.png') or die $!; binmode IMG; print IMG $gd3->png; close(IMG); open(HTML, '>graph/' . $filename) or die $!; print HTML qq( <HTML><HEAD><TITLE>GPAC Test</TITLE></HEAD> <BODY bgcolor="white"> <table><tr><td valign="top"> <img src="$time-gpac.png"> </td><td valign="top"> <img src="$time-elim.png"> </td></tr> <tr><td style="font-size:10pt"> <UL> <LI>GPAC Level 0: all CDS<BR> <LI>GPAC Level 1: without hypothetical CDS<BR> <LI>GPAC Level 2: without hypothetical & putative CDS<BR> <LI>GPAC Level 3: only conserved CDS (COGs)<BR> <LI>GPAC Level 4: only conserved CDS, but without hypothetical & putative<BR> <LI>GPAC Level 5: conserved hypothetical & putative and functional proteins<BR> </UL>
</td
><td align="right"> <img src="$time-df.png"> <BR> N = $numcds </td></tr></table>
<BR><BR>
</
BODY></HTML>
);
system("galeon graph/$filename &") if ($output eq 'show');
}
General documentation
AUTHORTop
A. U. Thor, a.u.thor@a.galaxy.far.far.away
SEE ALSOTop
perl(1).