G::Seq Consensus
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Summary
G::Seq::Consensus - Analysis methods related to sequence consensus
Package variables
No package variables defined.
Included modules
G::Messenger
G::Seq::Primitive
G::Seq::Util
G::Tools::Graph
SelfLoader
SubOpt
Inherit
Exporter
Synopsis
No synopsis!
Description
    This class is a part of G-language Genome Analysis Environment, 
    collecting sequence analysis methods related to sequence consensus.
Methods
_base_printer
No description
Code
base_counterDescriptionCode
base_entropyDescriptionCode
base_individual_information_matrix
No description
Code
base_information_contentDescriptionCode
base_relative_entropyDescriptionCode
base_z_valueDescriptionCode
consensus_zDescriptionCode
logmod
No description
Code
Methods description
base_countercode    nextTop
  Name: base_counter   -   creates a position weight matrix of oligomers around start codon

  Description:
    This function creates a position weight matrix (PWM) of 
    oligomers of specified length around the start codon of all 
    genes in the given genome.

  Usage:
    $structure = base_counter($genome);

    Here the $structure is a reference to the array of hashes, intended for 
    internal usage.

  Options:
     -position   either "start" (around start codon) or "end" (around stop codon)
                 to create the PWM (default: "start");
     -PatLen     length of oligomer to count (default: 3)
     -upstream   length upstream of specified position to create PWM (default: 30)
     -downstream length downstream of specified position to create PWM (default: 30)
     -filename   outfile file name (default: consensus.csv)
     -output     "stdout" for standard output, "f" for file

  Author: 
    Koya Mori(mory@g-language.org)
History: 20010623-01 initial posting (ported from ATG7 program, "bun")
base_entropycodeprevnextTop
  Name: base_entropy   -   calculates and graphs the sequence conservation using Shanon uncertainty (entropy)

  Description:
    This function calculates and graphs the sequence conservation in regions 
    around the start/stop codons using Shanon uncertainty (entropy).

  Usage:
    ($position, "entropy", $upstream-length, $downstream-length, @entropyvalues) = base_entropy($genome);

  Options:
     -position   either "start" (around start codon) or "end" (around stop codon)
                 to create the PWM (default: "start");
     -PatLen     length of oligomer to count (default: 3)
     -upstream   length upstream of specified position to create PWM (default: 30)
     -downstream length downstream of specified position to create PWM (default: 30)
     -filename   outfile file name (default: consensus.csv)
     -output     "g" for graph, "f" for file, "show" for display

  Author: 
    Koya Mori(mory@g-language.org)
History: 20010623-01 initial posting (ported from ATG7 program, "buns::ent")
base_information_contentcodeprevnextTop
  Name: base_information_content   -   calculates and graphs the sequence conservation using information content

  Description:
    This function calculates and graphs the sequence conservation in regions 
    around the start/stop codons using information content.

  Usage:
    ($position, "IC", $upstream-length, $downstream-length, @entropyvalues) = base_information_content($genome);

  Options:
     -position   either "start" (around start codon) or "end" (around stop codon)
                 to create the PWM (default: "start");
     -PatLen     length of oligomer to count (default: 3)
     -upstream   length upstream of specified position to create PWM (default: 30)
     -downstream length downstream of specified position to create PWM (default: 30)
     -filename   outfile file name (default: consensus.csv)
     -output     "g" for graph, "f" for file, "show" for display

  Author: 
    Koya Mori(mory@g-language.org)
History: 20010707-01 initial posting (ported from ATG7 program, "buns::rseq")
base_relative_entropycodeprevnextTop
  Name: base_relative_entropy   -   calculates and graphs the sequence conservation using Kullback-Leibler divergence (relative entropy)

  Description:
    This function calculates and graphs the sequence conservation in regions 
    around the start/stop codons using Kullback-Leibler divergence (relative entropy).

  Usage:
    ($position, "RE", $upstream-length, $downstream-length, @entropyvalues) = base_relative_entropy($genome);

  Options:
     -position   either "start" (around start codon) or "end" (around stop codon)
                 to create the PWM (default: "start");
     -PatLen     length of oligomer to count (default: 3)
     -upstream   length upstream of specified position to create PWM (default: 30)
     -downstream length downstream of specified position to create PWM (default: 30)
     -filename   outfile file name (default: consensus.csv)
     -output     "g" for graph, "f" for file, "show" for display

  Author: 
    Koya Mori(mory@g-language.org)
History: 20010714-01 initial posting (ported from ATG7 program, "buns::ic")
base_z_valuecodeprevnextTop
  Name: base_z_value   -   extracts conserved oligomers per position using Z-score

  Description:
    This function calculates and extracts conserved oligomers per position using Z-score,
    in regions around the start/stop codons using Z-score

  Usage:
    $structure = base_z_value($genome);

    Here the $structure is a reference to the array of hashes, intended for 
    internal usage.

  Options:
     -limit      rank threshold for showing the conserved oligomer (default: 5)
     -position   either "start" (around start codon) or "end" (around stop codon)
                 to create the PWM (default: "start");
     -PatLen     length of oligomer to count (default: 3)
     -upstream   length upstream of specified position to create PWM (default: 30)
     -downstream length downstream of specified position to create PWM (default: 30)
     -output     "stdout" for standard output, "f" for file

  Author: 
    Koya Mori(mory@g-language.org)
History: 20010714-01 initial posting (ported from ATG7 program, "buns::ic")
consensus_zcodeprevnextTop
  Name: consensus_z   -   calculate consensus in given array of sequences

  Description:
    Calculates the consensus of given list of sequences, using Z-score.
    Consensus with Z value greater than int high_z (default is 1) is 
    capitalized, and consensus with Z value less than int low_z 
    (default is 0.2) is shown as '-'.

  Usage:
     $consensus_sequence = &consensus_z(\@array_of_sequences);

      @array_of_sequences is a reference to an array of sequences.
      The sequences must be of same lengths.
       eg. consensus_z(["atgc","ctaa","tttt","cttg"]);
      returns 'cTt-'

  Options:
     -high      z value greater than which is significant (default: 1)
     -low       z value less than which is insignificant (default: 0.2)
     -filename  outfile name (default: consensus_z.png for -output=>"g",
                                       consensus_z.csv for -output=>"f")
     -output    "g" for graph, "f" for file, "show" for display

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20010906-01 update with options 20010713-01 initial posting
Methods code
_base_printerdescriptionprevnextTop
sub _base_printer {
    &opt_default(output=>"stdout",filename=>"consensus.csv");
    my @args=opt_get(@_);

    my $hash=shift @args;
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $PatLen=$$hash[0]{PatLen};
    my $upstream=$$hash[0]{upstream};
    my $downstream=$$hash[0]{downstream};
#    my $gb=$$hash[0]{gbk};
my $position=$$hash[0]{pos}; my $i; my $j; if($printer eq "f"){ open(FILE,">>$filename"); print FILE "Pattern"; for(my $i=$upstream;$i>-$downstream;$i--){ print FILE ",$i"; } print FILE "\n"; foreach(sort keys(%{$$hash[0]{pat}})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){ print FILE $_; for(my $i=$upstream;$i>-$downstream;$i--){ printf FILE ",%d",$$hash[$i]{$_}; } print FILE "\n"; } } print FILE "\n\n"; print FILE "Pattern"; for(my $i=$upstream;$i>-$downstream;$i--){ print FILE ",$i"; } print FILE "\n"; foreach(sort keys(%{$$hash[0]{pat}})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){ print FILE $_; for(my $i=$upstream;$i>-$downstream;$i--){ printf FILE ",%03.3f",$$hash[$i]{$_}*100/$$hash[$i]{kazu};
} print FILE "\n"; } } close(FILE); } if($printer eq "stdout"){ &msg_send("CDS number:",$$hash[$upstream-1]{kazu},"\n"); for($i=$upstream;$i>-$downstream;$i=$i-12){ for(my $c=0;$c<102;$c++){ &msg_send("-"); } &msg_send("\npos\t"); for(my $c=0;$c<12 && $i-$c > -$downstream;$c++){ &msg_send(sprintf("%6d\t",-($i-$c))); } &msg_send("\n"); foreach(sort keys(%{$$hash[0]{pat}})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){ &msg_send("\n $_|\t"); for($j=0;$j<12 && $i-$j>-$downstream;$j++){ &msg_send(sprintf("%6d\t",$$hash[$upstream-$i+$j]{$_})); } } } &msg_send("\n"); foreach(sort keys(%{$$hash[0]{pat}})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "gbk" && $_ ne "kazu" && $_ ne "pos"){ &msg_send(sprintf("\n $_|\t")); for($j=0;$j<12 && $i-$j>-$downstream;$j++){ &msg_send(sprintf("%03.3f\t",$$hash[$upstream-$i+$j]{$_}*100/$$hash[$upstream-$i+$j]{kazu}));
} } } &msg_send("\n"); } }
}
base_counterdescriptionprevnextTop
sub base_counter {
    &opt_default(output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"start");
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $position=opt_val("position");
    my $PatLen=opt_val("PatLength");
    my $upstream=opt_val("upstream");
    my $downstream=opt_val("downstream");
    my $output=opt_val("output");
    my $filename=opt_val("filename");
    my @hash;
    my $num=1;
    my $start;
    my $i;
    my $nuc;
    my $cnuc;

    $PatLen--;
    $downstream++;

    foreach my $cds ($gb->cds()){
	if($gb->{$cds}->{direction} eq 'direct'){
	    if($position eq "start"){
		$start = $gb->{$cds}->{start};
	    }
	    elsif($position eq "end"){
		$start = $gb->{$cds}->{end};
	    }
	    else{
		&msg_error('Invalid parameter.Please enter "start" or "end".',"\n");
	    }

	    for($i=$upstream;$i>-$downstream;$i--){
		if($start-1-$i>0){
		    $hash[$upstream-$i]{kazu}++;
		    $nuc=$gb->getseq($start-1-$i,$start-1-$i+$PatLen);
		    next if(length($nuc != $PatLen));
		    $hash[$upstream-$i]{$nuc}++;
		    $hash[0]{pat}{$nuc}++;
		}
	    }
	}
	elsif($gb->{$cds}->{direction} eq 'complement'){
	    if($position eq "start"){
		$start = $gb->{$cds}->{end};
	    }
	    elsif($position eq "end"){
		$start = $gb->{$cds}->{start};
	    }
	    else{
		&msg_error('Invalid parameter.Please enter "start" or "end".',"\n");
	    }

	    for($i=$upstream;$i>-$downstream;$i--){
		if($start-1+$i<length($gb->{SEQ})){
		    $hash[$upstream-$i]{kazu}++;
		    $nuc=$gb->getseq($start-1+$i-$PatLen,$start-1+$i);
		    $cnuc=complement($nuc);
		    $hash[$upstream-$i]{$cnuc}++;
		    $hash[0]{pat}{$cnuc}++;
		}
	    }

	}
    }


    $hash[0]{upstream}=$upstream;
    $hash[0]{downstream}=$downstream;
    $hash[0]{PatLen}=$PatLen;
#    $hash[0]{gbk}=$gb;
$hash[0]{pos}=$position; if($output eq "f"){ _base_printer(\@hash,-output=>"f",-filename=>$filename); } if($output eq "stdout"){ _base_printer(\@hash,-output=>"stdout"); } return\@ hash;
}
base_entropydescriptionprevnextTop
sub base_entropy {
    &opt_default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"start");
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $position=opt_val("position");
    my $PatLen=opt_val("PatLength");
    my $upstream=opt_val("upstream");
    my $downstream=opt_val("downstream");
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $hash;
    my %ratio;
    my @loc_ratio;
    my @loc_total;
    my @entropy;
    my $offset;
    my %seqnum;
    my $tmp;
    my @y;

    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;
    
    if($printer=~/f/){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
if($ratio{$_} != 0){ $loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
$loc_ratio[$upstream-$i]{$_}=$loc_ratio[$upstream-$i]{$_}/$ratio{$_};
$loc_total[$upstream-$i]+=$loc_ratio[$upstream-$i]{$_}; } else{ $entropy[$upstream-$i]=0; } } } foreach(keys(%{$$hash[$upstream-$i]})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){ $entropy[$upstream-$i]-=$loc_ratio[$upstream-$i]{$_}/$loc_total[$upstream-$i]
*&logmod(
$loc_ratio[$upstream-$i]{$_}/$loc_total[$upstream-$i])/&logmod(2.0)/length($_); } } if($printer=~/f/){ printf FILE "%d,%.5f\n",-$i ,$entropy[$upstream-$i]; } if($printer!~/[fn]/){ &msg_send(sprintf("%d\t%.5f\n",-$i ,$entropy[$upstream-$i])); } push(@y,-$i); } close(FILE); if($printer=~/g/ || $printer=~/show/){ &G::Tools::Graph::_UniMultiGrapher(\@y,\@entropy,-filename=>"base_entropy.png",-x=>"position",-y=>"entropy",-title=>"entropy"); } msg_gimv('graph/base_entropy.png') if($printer=~/show/); $tmp="entropy"; @entropy=($position,$tmp,$upstream,$downstream-1,@entropy); return @entropy;
}
base_individual_information_matrixdescriptionprevnextTop
sub base_individual_information_matrix {
    &opt_default(output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $position=opt_val("position");
    my $PatLen=opt_val("PatLength");
    my $upstream=opt_val("upstream");
    my $downstream=opt_val("downstream");
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $hash;
    my $hgenome;
    my $tmp;
    my $offset;
    my %seqnum;
    my %ratio;
    my @loc_ratio;
    my @IIM;
    my %q;

    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;    

    for(my $i=$upstream;$i>-$downstream;$i--){
	$hgenome=0;
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$hgenome-=$ratio{$_} * logmod($ratio{$_})/logmod(2.0);
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
$loc_ratio[$upstream-$i]{$_}=1/($$hash[$upstream-$i]{kazu}+2) if($loc_ratio[$upstream-$i]{$_}==0);
} } foreach(keys(%{$$hash[$upstream-$i]})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){ $IIM[$upstream-$i]{$_}=$hgenome-(4-1)/(2.0*logmod(2.0)*$$hash[$upstream-$i]{kazu})+
logmod($loc_ratio[$upstream-$i]{$_})/logmod
(2.0); } } } if($printer eq "f"){ open(FILE,">>$filename"); print FILE "Pattern"; for(my $i=$upstream;$i>-$downstream;$i--){ print FILE ",$i"; } print FILE "\n"; foreach(sort keys(%{$$hash[0]{pat}})){ print FILE $_; for(my $i=$upstream;$i>-$downstream;$i--){ print FILE sprintf(",%.3f",$IIM[$i]{$_}); } print FILE "\n"; } close(FILE); } if($printer eq "stdout"){ for(my $i=$upstream;$i>-$downstream;$i += 0){ for(my $c=0;$c<102;$c++){ &msg_send("-"); } &msg_send("\npos"); for(my $c=0;$c<10;$c++){ &msg_send("\t",-$i+$q{$_}*10); $i--; last if($i<=-$downstream); } &msg_send("\n\n"); foreach(sort keys %{$$hash[0]{pat}}){ &msg_send("$_\|"); for(my $c=0;$c<10;$c++){ &msg_send(sprintf("\t%.3f",$IIM[$c+$q{$_}*10]{$_})); last if($c+$q{$_}*10>=$downstream+$upstream-1); } &msg_send("\n"); $q{$_}++; } } } return\@ IIM;
}
base_information_contentdescriptionprevnextTop
sub base_information_content {
    &opt_default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"start");
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $position=opt_val("position");
    my $PatLen=opt_val("PatLength");
    my $upstream=opt_val("upstream");
    my $downstream=opt_val("downstream");
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $hash;
    my @IC;
    my $hgenome;
    my %ratio;
    my @loc_ratio;
    my $offset;
    my %seqnum;
    my $tmp;
    my @y;
    
    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;

    if($printer=~/f/){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	$hgenome=0;
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$hgenome-=$ratio{$_} * logmod($ratio{$_})/logmod(2.0);
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
} } $IC[$upstream-$i]=$hgenome - (4-1)/(2.0 * logmod(2.0) * $$hash[$upstream-$i]{kazu});
foreach(keys(%{$$hash[$upstream-$i]})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){ $IC[$upstream-$i]+=logmod($loc_ratio[$upstream-$i]{$_})/logmod(2.0)*$loc_ratio[$upstream-$i]{$_};
} } if($printer=~/f/){ printf FILE "%d,%.5f\n",-$i ,$IC[$upstream-$i]; } if($printer!~/[fn]/){ &msg_send(sprintf("%d\t%.5f\n",-$i ,$IC[$upstream-$i])); } push(@y,-$i); } close(FILE); if($printer=~/g/ || $printer=~/show/){ &G::Tools::Graph::_UniMultiGrapher(\@y,\@IC,-filename=>"base_information_content.png",-x=>"position",-y=>"information_content",-title=>"information_content"); } msg_gimv('graph/base_information_content.png') if($printer=~/show/); $tmp="IC"; @IC=($position,$tmp,$upstream,$downstream-1,@IC); return\@ IC;
}
base_relative_entropydescriptionprevnextTop
sub base_relative_entropy {
    &opt_default(output=>"show",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"end");
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $position=opt_val("position");
    my $PatLen=opt_val("PatLength");
    my $upstream=opt_val("upstream");
    my $downstream=opt_val("downstream");
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $hash;
    my @RE;
    my %ratio;
    my @loc_ratio;
    my $offset;
    my %seqnum;
    my $tmp;
    my @y;
    
    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;

    if($printer=~/f/){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$loc_ratio[$upstream-$i]{$_}=$$hash[$upstream-$i]{$_}/$$hash[$upstream-$i]{kazu};
$RE[$upstream-$i]+=logmod($loc_ratio[$upstream-$i]{$_}/$ratio{$_})/logmod(2.0)*$loc_ratio[$upstream-$i]{$_}; } } if($printer=~/f/){ printf FILE "%d,%.5f\n",-$i ,$RE[$upstream-$i]; } if($printer!~/[fn]/){ &msg_send(sprintf("%d\t%.5f\n",-$i ,$RE[$upstream-$i])); } push(@y,-$i); } close(FILE); if($printer=~/g/ || $printer=~/show/){ &G::Tools::Graph::_UniMultiGrapher(\@y,\@RE,-filename=>"base_relative_entropy.png",-x=>"position",-y=>"relative_entropy",-title=>"relative_entropy"); } msg_gimv('graph/base_relative_entropy.png') if($printer=~/show/); $tmp="RE"; @RE=($position,$tmp,$upstream,$downstream-1,@RE); return\@ RE;
}
base_z_valuedescriptionprevnextTop
sub base_z_value {
    &opt_default(limit=>5,output=>"stdout",filename=>"consensus.csv",PatLength=>3,upstream=>30,downstream=>30,position=>"start");
    my @args=opt_get(@_);

    my $gb=opt_as_gb(shift @args);
    my $position=opt_val("position");
    my $PatLen=opt_val("PatLength");
    my $upstream=opt_val("upstream");
    my $downstream=opt_val("downstream");
    my $printer=opt_val("output");
    my $filename=opt_val("filename");
    my $limit=opt_val("limit");
    my $hash;
    my @Z;
    my @Z_value;
    my @Z_tmp;
    my %ratio;
    my $offset;
    my %seqnum;
    my $tmp;
    my @tmp;
    my @key;
    my $c;
    
    $hash=base_counter($gb,-output=>"n",-filename=>$filename,-position=>$position,-PatLength=>$PatLen,-upstream=>$upstream,-downstream=>$downstream);
    $downstream++;    

    if($printer eq "f"){
	open(FILE,">>$filename");
    }

    for(my $i=$upstream;$i>-$downstream;$i--){
	foreach(keys(%{$$hash[$upstream-$i]})){
	    if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){
		unless(defined($seqnum{$_})){
		    $offset=$tmp=0;
		    while($tmp!=-1){
			$tmp=index($gb->{SEQ},$_,$offset);
			$offset=$tmp+1;
			$seqnum{$_}++ if($tmp!=-1);
		    }
		}
		$ratio{$_}=$seqnum{$_}/(length($gb->{SEQ})-length($_)+1);
$Z_tmp[$upstream-$i]{$_}=($$hash[$upstream-$i]{$_}-$$hash[$upstream-$i]{kazu}*$ratio{$_}) /sqrt($$hash[$upstream-$i]{kazu}*$ratio{$_}*(1-$ratio{$_}));
} } foreach(keys(%{$Z_tmp[$upstream-$i]})){ if($_ ne "PatLen" && $_ ne "upstream" && $_ ne "downstream" && $_ ne "kazu" && $_ ne "pat" && $_ ne "gbk" && $_ ne "pos"){ push(@Z_value,sprintf("%.5f",$Z_tmp[$upstream-$i]{$_})." ".$_); } } $c=1; foreach(sort{$b <=> $a}@Z_value){ @tmp=split(/ +/,$_); $Z[$upstream-$i]{$c}{$tmp[1]}=$tmp[0]; $c++; last if($c>$limit); } if($i==0){ if($printer eq "f"){ print FILE "position:0,"; } if($printer eq "stdout"){ &msg_send("position:0\n"); } } else{ if($printer eq "f"){ print FILE "position:",-1*$i,","; } if($printer eq "stdout"){ &msg_send("position:",-1*$i,"\n"); } } foreach(sort{$a <=> $b} keys(%{$Z[$upstream-$i]})){ if($printer eq "f"){ print FILE $_,",",(keys(%{$Z[$upstream-$i]{$_}}))[0],",", $Z[$upstream-$i]{$_}{(keys(%{$Z[$upstream-$i]{$_}}))[0]},"\n"; } if($printer eq "stdout"){ &msg_send($_,"\t",(keys(%{$Z[$upstream-$i]{$_}}))[0],"\t", $Z[$upstream-$i]{$_}{(keys(%{$Z[$upstream-$i]{$_}}))[0]},"\n"); } } if($printer eq "f"){ print FILE ""; } if($printer eq "stdout"){ &msg_send("\n"); } @Z_value=(); } return\@ Z; close(FILE);
}
consensus_zdescriptionprevnextTop
sub consensus_z {
    require Statistics::Descriptive;

    &opt_default(high=>1, low=>0.2, output=>"show", 
		  filename=>"consensus_z.png");
    my @args = opt_get(@_);
    my $ref = shift @args;
    my $high_z = opt_val("high");
    my $low_z = opt_val("low");
    my $filename = opt_val("filename");
    $filename = 'consensus_z.csv' if (opt_val("output") eq "f");
    my $output = opt_val("output");
    my @inseq = @$ref;
    my ($i,$tmp,$outseq,@seq,@array,%nuc_table, @out, @pos);
    my $length = length($inseq[0]);
    my $rows = @inseq;
    my $nuc_max = 0;

    foreach $tmp (@inseq){
	for ($i = 0; $i < $length; $i++){
	    $seq[$i]{substr($tmp, $i,1)} += 1/$rows;
$nuc_table{substr($tmp, $i,1)} ++; push (@array, $seq[$i]{substr($tmp,$i,1)}); } } foreach $tmp (keys %nuc_table){$nuc_max ++;} for ($i = 0; $i < $length; $i++){ my $max = 0.0; my $max_index; my $nuc = ''; foreach $nuc (keys %{$seq[$i]}){ if ($seq[$i]{$nuc} > $max){ $max_index = lc($nuc); $max = $seq[$i]{$nuc}; } } my $stat = Statistics::Descriptive::Full->new(); $stat->add_data(@array); my $z = ($max - $stat->mean())/$stat->standard_deviation();
$max_index = '-' if ($z <= $low_z); $max_index = uc($max_index) if ($z >= $high_z); $outseq = $outseq . $max_index; push (@out, $z); push (@pos, $i); } if ($output eq 'g' || $output eq 'show'){ &G::Tools::Graph::_UniMultiGrapher(\@ pos,\@ out,-x=>"position", -y=>"Z value", -title=>"Consensus by Z value", -filename=>$filename ); msg_gimv("graph/$filename") if ($output eq "show"); }elsif ($output eq 'f'){ $, = ','; mkdir ('data', 0777); open(OUT, '>data/' . $filename); print OUT @out, "\n"; close(OUT); $, = ''; } return $outseq;
}
logmoddescriptionprevnextTop
sub logmod {
    return $_[0] == 0 ? 0 : log($_[0]);
}
General documentation
No general documentation available.