G::IO
Bioperl
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
use G::IO::Bioperl; G::IO::Bioperl::convert($bioperl, $glang);
|
This class is intended to be called internally.
|
BEGIN | | Code |
DESTROY | No description | Code |
_set_glang_header | No description | Code |
_write_line_GenBank_regex | No description | Code |
convert | No description | Code |
Methods description
Methods code
BEGIN
{ eval "use Bio::Seq;";
if ($@) { warn "$@" };
eval "use Bio::SeqIO;";
if ($@) { warn "$@" };
}
sub DESTROY
{ my $this = shift;
}
sub _set_glang_header
{ my $gb = shift; ##G instance
my $seq = shift; ##Bioperl instance
my ($mol, $i);
my $len = $seq->length();
my $div = 'UNK';
my $circular = 'linear ';
$div = $seq->division if ( $seq->can('division') );
$mol = $seq->alphabet || 'DNA' if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) );
$circular = 1 if $seq->is_circular;
local($^W) = 0; ## supressing warnings about uninitialized fields.
my $date = '';
if( $seq->can('get_dates') ) {
($date) = $seq->get_dates();
}
$gb->{LOCUS}->{circular} = $circular;
$gb->{LOCUS}->{id} = $seq->id();
$gb->{LOCUS}->{length} = $len;
$gb->{LOCUS}->{nucleotide} = $mol;
$gb->{LOCUS}->{type} = $div;
$gb->{LOCUS}->{date} = $date;
$gb->{HEADER} .= _write_line_GenBank_regex("DEFINITION ", " ",
$seq->desc(),"\\s\+\|\$",80);
## if there, write the accession line
my @acc = ();
push(@acc, $seq->accession_number());
if( $seq->isa('Bio::Seq::RichSeqI') ) {
push(@acc, $seq->get_secondary_accessions());
}
$gb->{HEADER} .= sprintf("ACCESSION %s\n", join(" ", @acc));
## if PID defined, print it
if($seq->isa('Bio::Seq::RichSeqI') && $seq->pid()) {
$gb->{HEADER} .= sprintf("PID %s\n", $seq->pid());
}
## if there, write the version line
if($seq->isa('Bio::Seq::RichSeqI') && defined($seq->seq_version)) {
my $id = $seq->primary_id(); ## this may be a GI number
$gb->{HEADER} .= sprintf("VERSION %s\.%s%s\n",
$seq->accession_number(), ".", $seq->seq_version,
($id && ($id =~ /^\d+$/) ? " GI:".$id : ""));
}
## if there, write the keywords line
if( $seq->can('keywords') ) {
$gb->{HEADER} .= sprintf("KEYWORDS %s\n",$seq->keywords);
}
## Organism lines
if (my $spec = $seq->species) {
my ($species, $genus, @class) = $spec->classification();
my $OS;
if( $spec->common_name ) {
$OS = $spec->common_name;
} else {
$OS = "$genus $species";
}
if (my $ssp = $spec->sub_species) {
$OS .= " $ssp";
}
my $organelle = '';
$organelle = $spec->organelle if (length $spec->organelle());
$gb->{HEADER} .= sprintf("SOURCE $OS.\n ORGANISM %s%s %s\n",
$organelle, $genus, $species);
my $OC = join('; ', (reverse(@class), $genus)) .'.';
$gb->{HEADER} .= _write_line_GenBank_regex(' 'x12,' 'x12,
$OC,"\\s\+\|\$",80);
}
## Reference lines
my $count = 1;
foreach my $ref ( $seq->annotation('reference') ) {
$gb->{HEADER} .= sprintf ("REFERENCE $count (%s %d to %d)",
($seq->alphabet() eq "protein" ?
"residues" : "bases"),
$ref->start,$ref->end);
$gb->{HEADER} .= _write_line_GenBank_regex(" AUTHORS ",' 'x12,
$ref->authors,"\\s\+\|\$",80);
$gb->{HEADER} .= _write_line_GenBank_regex(" TITLE "," "x12,
$ref->title,"\\s\+\|\$",80);
$gb->{HEADER} .= _write_line_GenBank_regex(" JOURNAL "," "x12,
$ref->location,"\\s\+\|\$",80);
if ($ref->comment) {
$gb->{HEADER} .= _write_line_GenBank_regex(" REMARK "," "x12,
$ref->comment,"\\s\+\|\$",80);
}
if( $ref->medline) {
$gb->{HEADER} .= _write_line_GenBank_regex(" MEDLINE "," "x12,
$ref->medline, "\\s\+\|\$",80);
if( $ref->pubmed ) {
$gb->{HEADER} .= _write_line_GenBank_regex(" PUBMED "," "x12,
$ref->pubmed, "\\s\+\|\$",
80);
}
}
$count++;
}
## Comment lines
foreach my $comment ( $seq->annotation('comment') ) {
$gb->{COMMENT} = _write_line_GenBank_regex("COMMENT "," "x12,
$comment->text,"\\s\+\|\$",80);
}
unless( $mol eq 'protein' ) {
my $alen = $gb->{SEQ} =~ tr/a/a/;
my $clen = $gb->{SEQ} =~ tr/c/c/;
my $glen = $gb->{SEQ} =~ tr/g/g/;
my $tlen = $gb->{SEQ} =~ tr/t/t/;
my $olen = $len - ($alen + $tlen + $clen + $glen);
my $base_count = sprintf("BASE COUNT %8s a %6s c %6s g %6s t%s\n",
$alen,$clen,$glen,$tlen,
( $olen > 0 ) ? sprintf("%6s others",$olen) : '');
}
return 1;
}
sub _write_line_GenBank_regex
{ my ($pre1,$pre2,$line,$regex,$length) = @_;
my $subl = $length - (length $pre1) - 2;
my @lines;
my $return;
while($line =~ m/(.{1,$subl})($regex)/g) {
## be strict about not padding spaces according to
## genbank format
my $l = $1.$2;
$l =~ s/\s+$//;
push(@lines, $l);
}
my $s = shift @lines;
$return .= "$pre1$s\n";
foreach my $s ( @lines ) {
$return .= "$pre2$s\n";
}
return $return;
}
sub convert
{ my $bpobj = shift;
my $this = shift;
my $num = -1;
my $cds = 0;
my ($feat,$tag);
$this->{"CDS0"}->{dummy} = 1;
foreach $feat ($bpobj->all_SeqFeatures()){
$num ++;
if ($feat->primary_tag eq 'CDS'){
$cds ++;
$this->{"CDS$cds"}->{start} = $feat->start;
$this->{"CDS$cds"}->{end} = $feat->end;
if ($feat->strand == 0 || $feat->strand == 1){
$this->{"CDS$cds"}->{direction} = "direct";
}else{
$this->{"CDS$cds"}->{direction} = "complement";
}
$this->{"CDS$cds"}->{feature} = $num;
$this->{"FEATURE$num"}->{cds} = $cds;
}
$this->{"FEATURE$num"}->{type} = $feat->primary_tag;
$this->{"FEATURE$num"}->{start} = $feat->start;
$this->{"FEATURE$num"}->{end} = $feat->end;
if ($feat->strand == 0 || $feat->strand == 1){
$this->{"FEATURE$num"}->{direction} = "direct";
}else{
$this->{"FEATURE$num"}->{direction} = "complement";
}
foreach $tag ($feat->all_tags()){
$this->{"FEATURE$num"}->{$tag} = join('', $feat->each_tag_value($tag));
}
unless (defined $feat->{_location}->{_start}){
my @new_join;
foreach my $loc (@{$feat->{_location}->{_sublocations}}){
if ($loc->strand == -1){
push(@new_join, sprintf("c%d\.\.%d", $loc->start, $loc->end));
}else{
push(@new_join, sprintf("%d\.\.%d", $loc->start, $loc->end));
}
}
$this->{"FEATURE$num"}->{join} = join(',', @new_join);
$this->{"CDS$cds"}->{join} = join(',', @new_join) if ($feat->primary_tag eq 'CDS');
}
}
$this->{"SEQ"} = lc($bpobj->seq());
_set_glang_header($this, $bpobj);
}
General documentation