G::Seq Primitive
SummaryPackage variablesDescriptionGeneral documentationMethods
Summary
  G::Seq::Primitivew - Basic sequence analysis methods
Package variables
Privates (from "my" definitions)
%CodonTable = ( 'gac', 'D', 'caa', 'Q', 'gca', 'A', 'ctg', 'L', 'gat', 'D', 'cag', 'Q', 'gcc', 'A', 'ctt', 'L', 'gaa', 'E', 'agc', 'S', 'gcg', 'A', 'ata', 'I', 'gag', 'E', 'agt', 'S', 'gct', 'A', 'atc', 'I', 'aga', 'R', 'tca', 'S', 'gga', 'G', 'att', 'I', 'agg', 'R', 'tcc', 'S', 'ggc', 'G', 'cca', 'P', 'cga', 'R', 'tcg', 'S', 'ggg', 'G', 'ccc', 'P', 'cgc', 'R', 'tct', 'S', 'ggt', 'G', 'ccg', 'P', 'cgg', 'R', 'aca', 'T', 'gta', 'V', 'cct', 'P', 'cgt', 'R', 'acc', 'T', 'gtc', 'V', 'atg', 'M', 'aaa', 'K', 'acg', 'T', 'gtg', 'V', 'tgg', 'W', 'aag', 'K', 'act', 'T', 'gtt', 'V', 'tgc', 'C', 'cac', 'H', 'tac', 'Y', 'tta', 'L', 'tgt', 'C', 'cat', 'H', 'tat', 'Y', 'ttg', 'L', 'taa', '/', 'aac', 'N', 'ttc', 'F', 'cta', 'L', 'tag', '/', 'aat', 'N', 'ttt', 'F', 'ctc', 'L', 'tga', '/' )
Included modules
Algorithm::Numerical::Shuffle qw /shuffle/
G::Messenger
SelfLoader
SubOpt
Synopsis
No synopsis!
Description
           This class is a part of G-language Genome Analysis Environment,
           collecting basic sequence analysis methods.
Methods
DoubleHelixDescriptionCode
complement
No description
Code
shuffleseqDescriptionCode
splitprintseqDescriptionCode
to_fastaDescriptionCode
translate
No description
Code
Methods description
DoubleHelixcode    nextTop
  Name: DoubleHelix   -   we all love the DNA molecule!

  Description:
    This method prints the given sequence as an ASCII text-based graphic
    depicting a DNA molecule. Enjoy :-)
    
  Usage: 
    DoubleHelix($seq);

  Options:
    -speed      controls the time to display one base-pair.
                default is 0.05 (second)

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20060511-01 initial posting
shuffleseqcodeprevnextTop
  Name: shuffleseq   -   create randomized sequence with conserved composition

  Description:
    Shuffle and randomize the given sequence, conserving the nucleotide/peptide
    content of the original sequence. Fisher-Yates Algorithm is used for shuffling.
    
  Usage:
    $shuffled_seq = shuffleseq($gb);

  Options:
    None.

  References:
    1. Fisher R.A. and Yates F. (1938) "Example 12", Statistical Tables, London
    2. Durstenfeld R. (1964) "Algorithm 235: Random permutation", CACM 7(7):420
  
  Author:
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 initial posting
splitprintseqcodeprevnextTop
  Name: splitprintseq   -   format sequence data for printing

  Description:
    This method splits the given sequence string in segments of 
    certain length and inserts newline code ("\n") to print the
    sequence in a formatted way. By default, this function splits
    the string in segments of 60 letters. This length can be
    changed by supplying the second argument.

    
  Usage : 
    print splitprintseq($seq); # print sequence in a formatted way.
      or
    $formatted_seq = splitprintseq($seq, 100);  
    # split into segments of 100 characters and add "\n" to each line.

 Options:
    Length of segments can be specified as the second argument.
    Default is 60 characters to match GenBank.

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20050116-01 initial posting
to_fastacodeprevnextTop
  Name: to_fasta   -   output given sequence to a FASTA file

  Description:
    This method outputs the given sequence as a FASTA file.
    
  Usage : 
    to_fasta($seq, -name=>"My sequence"); # output the sequence to out.fasta
      or
    $fasta_string = to_fasta($gb, -output=>"return");

 Options:
    -name        string for FASTA header (default: sequence)
                 if the first argument is an instance of new G(), 
                 $gb->{LOCUS}->{id} is used as default
    -length      number of characters per line (default: 60)
    -filename    output filename (default: "out.fasta")
    -output      "f" to output to file, and return the filename.
                 "return" to return the fasta as a string

  Author: 
    Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 added -output option 20050116-01 initial posting
Methods code
DoubleHelixdescriptionprevnextTop
sub DoubleHelix {
    SubOpt::opt_default(speed=>0.05);
    my @args = SubOpt::opt_get(@_);  
    my $gb = SubOpt::opt_as_gb(shift @args);
    my $speed = SubOpt::opt_val("speed");

    $| = 1;

    my $i;
    my (@offset) = qw/1 0 0 0 1 2 3 4 5 5 4 3 2 1 0 0 0 1/;
    my (@dist)   = qw/0 2 3 4 4 4 3 2 0 0 2 3 4 4 4 3 2 0/;

    foreach my $base (split(//, $gb->{SEQ})){
        print "         ", q// /x$offset[($i%scalar(@offset))];
print uc($base); $i ++; print q//-/x$dist[($i%scalar(@dist))];
print uc(complement($base)), "\n"; select(undef,undef,undef,$speed); }
}
complementdescriptionprevnextTop
sub complement {
    my $nuc = reverse(shift);

    $nuc =~ tr
[acgturymkdhbvwsnACGTURYMKDHBVWSN]
[tgcaayrkmhdvbwsnTGCAAYRKMHDVBWSN];
return $nuc;
}
shuffleseqdescriptionprevnextTop
sub shuffleseq {
    my @args = SubOpt::opt_get(@_);
    my $gb = SubOpt::opt_as_gb(shift @args);

    return join('', shuffle(split(//, $gb->{SEQ})));
}
splitprintseqdescriptionprevnextTop
sub splitprintseq {
    my $seq = shift;
    my $len = shift || 60;
    my $ret = '';

    while(length $seq){
	$ret .= substr($seq, 0, $len) . "\n";
	substr($seq, 0, $len) = '';
    }
    
    return $ret;
}
to_fastadescriptionprevnextTop
sub to_fasta {
    SubOpt::opt_default(length=>60, filename=>"out.fasta", name=>"sequence", output=>"f");
    my @args = SubOpt::opt_get(@_);
    my $gb = SubOpt::opt_as_gb(shift @args);
    my $filename = SubOpt::opt_val("filename");
    my $name = SubOpt::opt_val("name");
    my $length = SubOpt::opt_val("length");
    my $output = SubOpt::opt_val("output");

    $name = $gb->{LOCUS}->{id} if($name eq 'sequence' && length $gb->{LOCUS}->{id});

    if($output eq 'f'){
	open(OUT, ">$filename") || die($!);
	printf OUT ">%s\n%s", $name, splitprintseq($gb->{SEQ}, $length);
	close(OUT);
	return $filename;
    }else{
	return sprintf ">%s\n%s", $name, splitprintseq($gb->{SEQ}, $length);
    }
}
translatedescriptionprevnextTop
sub translate {
    my $seq = lc(shift);
    my $amino = '';

    while(3 <= length($seq)){
	my $codon = substr($seq, 0, 3);
	substr($seq, 0, 3) = '';
	if ($codon =~ /[^atgc]/){
	    $amino .= 'X';
	}else{
	    $amino .= $CodonTable{$codon};
	}
    }

    msg_error("Translation: illegal length.\n") if(length($seq));

    return $amino;
}
General documentation
No general documentation available.