G::Seq
Consensus
G::Seq::Consensus - Perl extension for blah blah blah
|
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
use G::Seq::Consensus; blah blah blah
|
Stub documentation for G::Seq::Consensus was created by h2xs. It looks like the author of the extension was negligent enough to leave the stub unedited.
Blah blah blah.
|
BEGIN | | Code |
DESTROY | No description | Code |
_base_printer | No description | Code |
base_counter | No description | Code |
base_entropy | No description | Code |
base_individual_information_matrix | No description | Code |
base_information_content | No description | Code |
base_relative_entropy | No description | Code |
base_z_value | No description | Code |
consensus_z | No description | Code |
logmod | No description | Code |
new | No description | Code |
Methods description
Methods code
BEGIN
{ eval "use Statistics::Descriptive;";
if ($@){ warn "$@" };
}
sub DESTROY
{ my $self = shift;
}
sub _base_printer
{ &opt_default(output=>"stdout",filename=>"consensus.csv");
my @args=opt_get(@_);
my $hash=shift @args;
my $printer=opt_val("output");
my $filename=opt_val("filename");
my $PatLen=$$hash[0]{PatLen};
my $upstream=$$hash[0]{upstream};
my $downstream=$$hash[0]{downstream};
my $gb=$$hash[0]{gbk};
my $position=$$hash[0]{pos};
my $i;
my $j;
if($printer eq "f"){
open(FILE,">>$filename");
print FILE "Pattern";
for(my $i=$upstream;$i>-$downstream;$i--){
print FILE ",$i";
}
print FILE "\n";
foreach(sort keys(%{$$hash[0]{pat}})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
print FILE $_;
for(my $i=$upstream;$i>-$downstream;$i--){
printf FILE ",%d",$$hash[$i]{$_};
}
print FILE "\n";
}
}
print FILE "\n\n";
print FILE "Pattern";
for(my $i=$upstream;$i>-$downstream;$i--){
print FILE ",$i";
}
print FILE "\n";
foreach(sort keys(%{$$hash[0]{pat}})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
print FILE $_;
for(my $i=$upstream;$i>-$downstream;$i--){
printf FILE ",%03.3f",$$hash[$i]{$_}*100/$$hash[$i]{kazu};
}
print FILE "\n";
}
}
close(FILE);
}
if($printer eq "stdout"){
&msg_send("CDS number:",$$hash[$upstream-1]{kazu},"\n");
for($i=$upstream;$i>-$downstream;$i=$i-12){
for(my $c=0;$c<102;$c++){
&msg_send("-");
}
&msg_send("\npos\t");
for(my $c=0;$c<12 && $i-$c > -$downstream;$c++){
&msg_send(sprintf("%6d\t",-($i-$c)));
}
&msg_send("\n");
foreach(sort keys(%{$$hash[0]{pat}})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
&msg_send("\n $_|\t");
for($j=0;$j<12 && $i-$j>-$downstream;$j++){
&msg_send(sprintf("%6d\t",$$hash[$upstream-$i+$j]{$_}));
}
}
}
&msg_send("\n");
foreach(sort keys(%{$$hash[0]{pat}})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){
&msg_send(sprintf("\n $_|\t"));
for($j=0;$j<12 && $i-$j>-$downstream;$j++){
&msg_send(sprintf("%03.3f\t",$$hash[$upstream-$i+$j]{$_}*100/$$hash[$upstream-$i+$j]{kazu}));
}
}
}
&msg_send("\n");
}
}
}
sub base_counter
{ &opt_default(output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $position=opt_val("position");
my $PatLen=opt_val("PatLength");
my $upstream=opt_val("upstream");
my $downstream=opt_val("downstream");
my $output=opt_val("output");
my $filename=opt_val("filename");
my @hash;
my $num=1;
my $start;
my $i;
my $nuc;
my $cnuc;
$PatLen--;
$downstream++;
foreach my $cds ($gb->cds()){
if($gb->{$cds}->{direction} eq 'direct'){
if($position eq "start"){
$start = $gb->{$cds}->{start};
}
elsif($position eq "end"){
$start = $gb->{$cds}->{end};
}
else{
&msg_error('Invalid parameter.Please enter "start" or "end".',"\n");
}
for($i=$upstream;$i>-$downstream;$i--){
if($start-1-$i>0){
$hash[$upstream-$i]{kazu}++;
$nuc=$gb->getseq($start-1-$i,$start-1-$i+$PatLen);
$hash[$upstream-$i]{$nuc}++;
$hash[0]{pat}{$nuc}++;
}
}
}
elsif($gb->{$cds}->{direction} eq 'complement'){
if($position eq "start"){
$start = $gb->{$cds}->{end};
}
elsif($position eq "end"){
$start = $gb->{$cds}->{start};
}
else{
&msg_error('Invalid parameter.Please enter "start" or "end".',"\n");
}
for($i=$upstream;$i>-$downstream;$i--){
if($start-1+$i<length($gb->{SEQ})){
$hash[$upstream-$i]{kazu}++;
$nuc=$gb->getseq($start-1+$i-$PatLen,$start-1+$i);
$cnuc=_complement($nuc);
$hash[$upstream-$i]{$cnuc}++;
$hash[0]{pat}{$cnuc}++;
}
}
}
}
$hash[0]{upstream}=$upstream;
$hash[0]{downstream}=$downstream;
$hash[0]{PatLen}=$PatLen;
$hash[0]{gbk}=$gb;
$hash[0]{pos}=$position;
if($output eq "f"){
_base_printer(\@hash,-output=>"f",-filename=>$filename);
}
if($output eq "stdout"){
_base_printer(\@hash,-output=>"stdout");
}
return\@ hash;
}
sub base_entropy
{ &opt_default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $position=opt_val("position");
my $PatLen=opt_val("PatLength");
my $upstream=opt_val("upstream");
my $downstream=opt_val("downstream");
my $printer=opt_val("output");
my $filename=opt_val("filename");
my $hash;
my %ratio;
my @loc_ratio;
my @loc_total;
my @entropy;
my $offset;
my %seqnum;
my $tmp;
my @y;
$hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
$downstream++;
if($printer=~/f/){
open(FILE,">>$filename");
}
for(my $i=$upstream;$i>-$downstream;$i--){
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
unless(defined($seqnum{$_})){
$offset=$tmp=0;
while($tmp!=-1){
$tmp=index($gb->{SEQ},$_,$offset);
$offset=$tmp+1;
$seqnum{$_}++ if($tmp!=-1);
}
}
$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
if($ratio{$_} != 0){
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
$loc_ratio[$upstream-$i]{$_}=$loc_ratio[$upstream-$i]{$_}/$ratio{$_};
$loc_total[$upstream-$i]+=$loc_ratio[$upstream-$i]{$_};
}
else{
$entropy[$upstream-$i]=0;
}
}
}
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
$entropy[$upstream-$i]-=$loc_ratio[$upstream-$i]{$_}/$loc_total[$upstream-$i]
*&logmod($loc_ratio[$upstream-$i]{$_}/$loc_total[$upstream-$i])/&logmod(2.0)/length($_);
}
}
if($printer=~/f/){
printf FILE "%d,%.5f\n",-$i ,$entropy[$upstream-$i];
}
if($printer!~/[fn]/){
&msg_send(sprintf("%d\t%.5f\n",-$i ,$entropy[$upstream-$i]));
}
push(@y,-$i);
}
close(FILE);
if($printer=~/g/ || $printer=~/show/){
&G::Tools::Graph::_UniMultiGrapher(\@y,\@entropy,-filename=>"base_entropy.png",-x=>"position",-y=>"entropy",-title=>"entropy");
}
msg_gimv('graph/base_entropy.png') if($printer=~/show/);
$tmp="entropy";
@entropy=($position,$tmp,$upstream,$downstream-1,@entropy);
return\@ entropy;
}
base_individual_information_matrix | description | top | prev | next |
sub base_individual_information_matrix
{ &opt_default(output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $position=opt_val("position");
my $PatLen=opt_val("PatLength");
my $upstream=opt_val("upstream");
my $downstream=opt_val("downstream");
my $printer=opt_val("output");
my $filename=opt_val("filename");
my $hash;
my $hgenome;
my $tmp;
my $offset;
my %seqnum;
my %ratio;
my @loc_ratio;
my @IIM;
my %q;
$hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
$downstream++;
for(my $i=$upstream;$i>-$downstream;$i--){
$hgenome=0;
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
unless(defined($seqnum{$_})){
$offset=$tmp=0;
while($tmp!=-1){
$tmp=index($gb->{SEQ},$_,$offset);
$offset=$tmp+1;
$seqnum{$_}++ if($tmp!=-1);
}
}
$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$hgenome-=$ratio{$_} * logmod($ratio{$_})/logmod(2.0);
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
$loc_ratio[$upstream-$i]{$_}=1/($$hash[$upstream-$i]{kazu}+2) if($loc_ratio[$upstream-$i]{$_}==0);
}
}
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
$IIM[$upstream-$i]{$_}=$hgenome-(4-1)/(2.0*logmod(2.0)*$$hash[$upstream-$i]{kazu})+
logmod($loc_ratio[$upstream-$i]{$_})/logmod(2.0);
}
}
}
if($printer eq "f"){
open(FILE,">>$filename");
print FILE "Pattern";
for(my $i=$upstream;$i>-$downstream;$i--){
print FILE ",$i";
}
print FILE "\n";
foreach(sort keys(%{$$hash[0]{pat}})){
print FILE $_;
for(my $i=$upstream;$i>-$downstream;$i--){
print FILE sprintf(",%.3f",$IIM[$i]{$_});
}
print FILE "\n";
}
close(FILE);
}
if($printer eq "stdout"){
for(my $i=$upstream;$i>-$downstream;$i += 0){
for(my $c=0;$c<102;$c++){
&msg_send("-");
}
&msg_send("\npos");
for(my $c=0;$c<10;$c++){
&msg_send("\t",-$i+$q{$_}*10);
$i--;
last if($i<=-$downstream);
}
&msg_send("\n\n");
foreach(sort keys %{$$hash[0]{pat}}){
&msg_send("$_\|");
for(my $c=0;$c<10;$c++){
&msg_send(sprintf("\t%.3f",$IIM[$c+$q{$_}*10]{$_}));
last if($c+$q{$_}*10>=$downstream+$upstream-1);
}
&msg_send("\n");
$q{$_}++;
}
}
}
return\@ IIM;
}
sub base_information_content
{ &opt_default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $position=opt_val("position");
my $PatLen=opt_val("PatLength");
my $upstream=opt_val("upstream");
my $downstream=opt_val("downstream");
my $printer=opt_val("output");
my $filename=opt_val("filename");
my $hash;
my @IC;
my $hgenome;
my %ratio;
my @loc_ratio;
my $offset;
my %seqnum;
my $tmp;
my @y;
$hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
$downstream++;
if($printer=~/f/){
open(FILE,">>$filename");
}
for(my $i=$upstream;$i>-$downstream;$i--){
$hgenome=0;
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
unless(defined($seqnum{$_})){
$offset=$tmp=0;
while($tmp!=-1){
$tmp=index($gb->{SEQ},$_,$offset);
$offset=$tmp+1;
$seqnum{$_}++ if($tmp!=-1);
}
}
$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$hgenome-=$ratio{$_} * logmod($ratio{$_})/logmod(2.0);
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
}
}
$IC[$upstream-$i]=$hgenome - (4-1)/(2.0 * logmod(2.0) * $$hash[$upstream-$i]{kazu});
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
$IC[$upstream-$i]+=logmod($loc_ratio[$upstream-$i]{$_})/logmod(2.0)*$loc_ratio[$upstream-$i]{$_};
}
}
if($printer=~/f/){
printf FILE "%d,%.5f\n",-$i ,$IC[$upstream-$i];
}
if($printer!~/[fn]/){
&msg_send(sprintf("%d\t%.5f\n",-$i ,$IC[$upstream-$i]));
}
push(@y,-$i);
}
close(FILE);
if($printer=~/g/ || $printer=~/show/){
&G::Tools::Graph::_UniMultiGrapher(\@y,\@IC,-filename=>"base_information_content.png",-x=>"position",-y=>"information_content",-title=>"information_content");
}
msg_gimv('graph/base_information_content.png') if($printer=~/show/);
$tmp="IC";
@IC=($position,$tmp,$upstream,$downstream-1,@IC);
return\@ IC;
}
sub base_relative_entropy
{ &opt_default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $position=opt_val("position");
my $PatLen=opt_val("PatLength");
my $upstream=opt_val("upstream");
my $downstream=opt_val("downstream");
my $printer=opt_val("output");
my $filename=opt_val("filename");
my $hash;
my @RE;
my %ratio;
my @loc_ratio;
my $offset;
my %seqnum;
my $tmp;
my @y;
$hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
$downstream++;
if($printer=~/f/){
open(FILE,">>$filename");
}
for(my $i=$upstream;$i>-$downstream;$i--){
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
unless(defined($seqnum{$_})){
$offset=$tmp=0;
while($tmp!=-1){
$tmp=index($gb->{SEQ},$_,$offset);
$offset=$tmp+1;
$seqnum{$_}++ if($tmp!=-1);
}
}
$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
$RE[$upstream-$i]+=logmod($loc_ratio[$upstream-$i]{$_}/$ratio{$_})/logmod(2.0)*$loc_ratio[$upstream-$i]{$_};
}
}
if($printer=~/f/){
printf FILE "%d,%.5f\n",-$i ,$RE[$upstream-$i];
}
if($printer!~/[fn]/){
&msg_send(sprintf("%d\t%.5f\n",-$i ,$RE[$upstream-$i]));
}
push(@y,-$i);
}
close(FILE);
if($printer=~/g/ || $printer=~/show/){
&G::Tools::Graph::_UniMultiGrapher(\@y,\@RE,-filename=>"base_relative_entropy.png",-x=>"position",-y=>"relative_entropy",-title=>"relative_entropy");
}
msg_gimv('graph/base_relative_entropy.png') if($printer=~/show/);
$tmp="RE";
@RE=($position,$tmp,$upstream,$downstream-1,@RE);
return\@ RE;
}
sub base_z_value
{ &opt_default(limit=>5,output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
my @args=opt_get(@_);
my $gb=opt_as_gb(shift @args);
my $position=opt_val("position");
my $PatLen=opt_val("PatLength");
my $upstream=opt_val("upstream");
my $downstream=opt_val("downstream");
my $printer=opt_val("output");
my $filename=opt_val("filename");
my $limit=opt_val("limit");
my $hash;
my @Z;
my @Z_value;
my @Z_tmp;
my %ratio;
my $offset;
my %seqnum;
my $tmp;
my @tmp;
my @key;
my $c;
$hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
$downstream++;
if($printer eq "f"){
open(FILE,">>$filename");
}
for(my $i=$upstream;$i>-$downstream;$i--){
foreach(keys(%{$$hash[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
unless(defined($seqnum{$_})){
$offset=$tmp=0;
while($tmp!=-1){
$tmp=index($gb->{SEQ},$_,$offset);
$offset=$tmp+1;
$seqnum{$_}++ if($tmp!=-1);
}
}
$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$Z_tmp[$upstream-$i]{$_}=($$hash[$upstream-$i]{$_}-$$hash[$upstream-$i]{kazu}*$ratio{$_})
/sqrt($$hash[$upstream-$i]{kazu}*$ratio{$_}*(1-$ratio{$_}));
}
}
foreach(keys(%{$Z_tmp[$upstream-$i]})){
if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
push(@Z_value,sprintf("%.5f",$Z_tmp[$upstream-$i]{$_})." ".$_);
}
}
$c=1;
foreach(sort{$b <=> $a}@Z_value){
@tmp=split(/ +/,$_);
$Z[$upstream-$i]{$c}{$tmp[1]}=$tmp[0];
$c++;
last if($c>$limit);
}
if($i==0){
if($printer eq "f"){
print FILE "position:0,";
}
if($printer eq "stdout"){
&msg_send("position:0\n");
}
}
else{
if($printer eq "f"){
print FILE "position:",-1*$i,",";
}
if($printer eq "stdout"){
&msg_send("position:",-1*$i,"\n");
}
}
foreach(sort{$a <=> $b} keys(%{$Z[$upstream-$i]})){
if($printer eq "f"){
print FILE $_,",",(keys(%{$Z[$upstream-$i]{$_}}))[0],",",
$Z[$upstream-$i]{$_}{(keys(%{$Z[$upstream-$i]{$_}}))[0]},"\n";
}
if($printer eq "stdout"){
&msg_send($_,"\t",(keys(%{$Z[$upstream-$i]{$_}}))[0],"\t",
$Z[$upstream-$i]{$_}{(keys(%{$Z[$upstream-$i]{$_}}))[0]},"\n");
}
}
if($printer eq "f"){
print FILE "";
}
if($printer eq "stdout"){
&msg_send("\n");
}
@Z_value=();
}
return\@ Z;
close(FILE);
}
sub consensus_z
{ &opt_default(high=>1, low=>0.2, output=>"show",
filename=>"consensus_z.png");
my @args = opt_get(@_);
my $ref = shift @args;
my $high_z = opt_val("high");
my $low_z = opt_val("low");
my $filename = opt_val("filename");
$filename = 'consensus_z.csv' if (opt_val("output") eq "f");
my $output = opt_val("output");
my @inseq = @$ref;
my ($i,$tmp,$outseq,@seq,@array,%nuc_table, @out, @pos);
my $length = length($inseq[0]);
my $rows = @inseq;
my $nuc_max = 0;
foreach $tmp (@inseq){
for ($i = 0; $i < $length; $i++){
$seq[$i]{substr($tmp, $i,1)} += 1/$rows;
$nuc_table{substr($tmp, $i,1)} ++;
push (@array, $seq[$i]{substr($tmp,$i,1)});
}
}
foreach $tmp (keys %nuc_table){$nuc_max ++;}
for ($i = 0; $i < $length; $i++){
my $max = 0.0;
my $max_index;
my $nuc = '';
foreach $nuc (keys %{$seq[$i]}){
if ($seq[$i]{$nuc} > $max){
$max_index = lc($nuc);
$max = $seq[$i]{$nuc};
}
}
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(@array);
my $z = ($max - $stat->mean())/$stat->standard_deviation();
$max_index = '-' if ($z <= $low_z);
$max_index = uc($max_index) if ($z >= $high_z);
$outseq = $outseq . $max_index;
push (@out, $z);
push (@pos, $i);
}
if ($output eq 'g' || $output eq 'show'){
&G::Tools::Graph::_UniMultiGrapher(\@
pos,\@ out,-x=>"position",
-y=>"Z value",
-title=>"Consensus by Z value",
-filename=>$filename
);
msg_gimv("graph/$filename") if ($output eq "show");
}elsif ($output eq 'f'){
$, = ',';
mkdir ('data', 0777);
open(OUT, '>data/' . $filename);
print OUT @out, "\n";
close(OUT);
$, = '';
}
return $outseq;
}
sub logmod
{ my $x=shift;
if($x == 0){
return 0;
}
else{
return log($x);
}
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this;
return $this;
}
General documentation