Primitive documentation.

Privates (from "my" definitions)

%CodonTable = ( 'gac', 'D', 'caa', 'Q', 'gca', 'A', 'ctg', 'L', 'gat', 'D', 'cag', 'Q', 'gcc', 'A', 'ctt', 'L', 'gaa', 'E', 'agc', 'S', 'gcg', 'A', 'ata', 'I', 'gag', 'E', 'agt', 'S', 'gct', 'A', 'atc', 'I', 'aga', 'R', 'tca', 'S', 'gga', 'G', 'att', 'I', 'agg', 'R', 'tcc', 'S', 'ggc', 'G', 'cca', 'P', 'cga', 'R', 'tcg', 'S', 'ggg', 'G', 'ccc', 'P', 'cgc', 'R', 'tct', 'S', 'ggt', 'G', 'ccg', 'P', 'cgg', 'R', 'aca', 'T', 'gta', 'V', 'cct', 'P', 'cgt', 'R', 'acc', 'T', 'gtc', 'V', 'atg', 'M', 'aaa', 'K', 'acg', 'T', 'gtg', 'V', 'tgg', 'W', 'aag', 'K', 'act', 'T', 'gtt', 'V', 'tgc', 'C', 'cac', 'H', 'tac', 'Y', 'tta', 'L', 'tgt', 'C', 'cat', 'H', 'tat', 'Y', 'ttg', 'L', 'taa', '/', 'aac', 'N', 'ttc', 'F', 'cta', 'L', 'tag', '/', 'aat', 'N', 'ttt', 'F', 'ctc', 'L', 'tga', '/' )

           This class is a part of G-language Genome Analysis Environment,
           collecting basic sequence analysis methods.

Name: DoubleHelix - we all love the DNA molecule! Description: This method prints the given sequence as an ASCII text-based graphic depicting a DNA molecule. Enjoy :-) Usage: DoubleHelix($seq); Options: -speed controls the time to display one base-pair. default is 0.05 (second) Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20060511-01 initial posting

Name: shuffleseq - create randomized sequence with conserved composition Description: Shuffle and randomize the given sequence, conserving the nucleotide/peptide content of the original sequence. Fisher-Yates Algorithm is used for shuffling. Usage: $shuffled_seq = shuffleseq($gb); Options: None. References: 1. Fisher R.A. and Yates F. (1938) "Example 12", Statistical Tables, London 2. Durstenfeld R. (1964) "Algorithm 235: Random permutation", CACM 7(7):420 Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 initial posting

Name: splitprintseq - format sequence data for printing Description: This method splits the given sequence string in segments of certain length and inserts newline code ("\n") to print the sequence in a formatted way. By default, this function splits the string in segments of 60 letters. This length can be changed by supplying the second argument. Usage : print splitprintseq($seq); # print sequence in a formatted way. or $formatted_seq = splitprintseq($seq, 100); # split into segments of 100 characters and add "\n" to each line. Options: Length of segments can be specified as the second argument. Default is 60 characters to match GenBank. Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20050116-01 initial posting

Name: to_fasta - output given sequence to a FASTA file Description: This method outputs the given sequence as a FASTA file. Usage : to_fasta($seq, -name=>"My sequence"); # output the sequence to out.fasta or $fasta_string = to_fasta($gb, -output=>"return"); Options: -name string for FASTA header (default: sequence) if the first argument is an instance of new G(), $gb->{LOCUS}->{id} is used as default -length number of characters per line (default: 60) -filename output filename (default: "out.fasta") -output "f" to output to file, and return the filename. "return" to return the fasta as a string Author: Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20070612-01 added -output option 20050116-01 initial posting

sub DoubleHelix {

    SubOpt::opt_default(speed=>0.05);
    my @args = SubOpt::opt_get(@_);  
    my $gb = SubOpt::opt_as_gb(shift @args);
    my $speed = SubOpt::opt_val("speed");

    $| = 1;

    my $i;
    my (@offset) = qw/1 0 0 0 1 2 3 4 5 5 4 3 2 1 0 0 0 1/;
    my (@dist)   = qw/0 2 3 4 4 4 3 2 0 0 2 3 4 4 4 3 2 0/;

    foreach my $base (split(//, $gb->{SEQ})){
        print "         ", q// /x$offset[($i%scalar(@offset))];
	print uc($base);
	$i ++;
	print q//-/x$dist[($i%scalar(@dist))];
	print uc(complement($base)), "\n";
	select(undef,undef,undef,$speed);
    }

}

sub complement {

    my $nuc = reverse(shift);

    $nuc =~ tr
	[acgturymkdhbvwsnACGTURYMKDHBVWSN]
	[tgcaayrkmhdvbwsnTGCAAYRKMHDVBWSN];

    return $nuc;

}

sub shuffleseq {

    my @args = SubOpt::opt_get(@_);
    my $gb = SubOpt::opt_as_gb(shift @args);

    return join('', shuffle(split(//, $gb->{SEQ})));

}

sub splitprintseq {

    my $seq = shift;
    my $len = shift || 60;
    my $ret = '';

    while(length $seq){
	$ret .= substr($seq, 0, $len) . "\n";
	substr($seq, 0, $len) = '';
    }
    
    return $ret;

}

sub to_fasta {

    SubOpt::opt_default(length=>60, filename=>"out.fasta", name=>"sequence", output=>"f");
    my @args = SubOpt::opt_get(@_);
    my $gb = SubOpt::opt_as_gb(shift @args);
    my $filename = SubOpt::opt_val("filename");
    my $name = SubOpt::opt_val("name");
    my $length = SubOpt::opt_val("length");
    my $output = SubOpt::opt_val("output");

    $name = $gb->{LOCUS}->{id} if($name eq 'sequence' && length $gb->{LOCUS}->{id});

    if($output eq 'f'){
	open(OUT, ">$filename") || die($!);
	printf OUT ">%s\n%s", $name, splitprintseq($gb->{SEQ}, $length);
	close(OUT);
	return $filename;
    }else{
	return sprintf ">%s\n%s", $name, splitprintseq($gb->{SEQ}, $length);
    }

}

sub translate {

    my $seq = lc(shift);
    my $amino = '';

    while(3 <= length($seq)){
	my $codon = substr($seq, 0, 3);
	substr($seq, 0, 3) = '';
	if ($codon =~ /[^atgc]/){
	    $amino .= 'X';
	}else{
	    $amino .= $CodonTable{$codon};
	}
    }

    msg_error("Translation: illegal length.\n") if(length($seq));

    return $amino;

}

DoubleHelix	Description	Code
complement	No description	Code
shuffleseq	Description	Code
splitprintseq	Description	Code
to_fasta	Description	Code
translate	No description	Code