Bioperl documentation.

G::IO

Bioperl

Summary

Included libraries

Package variables

Synopsis

Description

General documentation

Methods

Summary

    G::IO::Bioperl

Package variables

top

Globals (from use vars definitions)
@EXPORT
$VERSION
@EXPORT_OK

Included modules

top

strict

Inherit

top

AutoLoader Exporter

Synopsis

top

    use G::IO::Bioperl;
    G::IO::Bioperl::convert($bioperl, $glang);

Description

top

    This class is intended to be called internally.

Methods

top

BEGIN		Code
DESTROY	No description	Code
_set_glang_header	No description	Code
_write_line_GenBank_regex	No description	Code
convert	No description	Code

Methods description

Methods code

BEGIN

top

BEGIN {

    eval "use Bio::Seq;";
    if ($@) { warn "$@" };
    eval "use Bio::SeqIO;";
    if ($@) { warn "$@" };

}

DESTROY

description

top

sub DESTROY {

    my $this = shift;

}

_set_glang_header

description

top

sub _set_glang_header {

    my $gb = shift;     ##G instance
    my $seq = shift;    ##Bioperl instance

    my ($mol, $i);
    my $len = $seq->length();
    my $div = 'UNK';    
    my $circular = 'linear  ';
    $div = $seq->division if ( $seq->can('division') );
    $mol = $seq->alphabet || 'DNA' if( !$seq->can('molecule') || ! defined ($mol = $seq->molecule()) );
    $circular = 1 if $seq->is_circular;

    local($^W) = 0;   ## supressing warnings about uninitialized fields.
    
    my $date = '';
    if( $seq->can('get_dates') ) {      
	($date) = $seq->get_dates();
    }

    $gb->{LOCUS}->{circular} = $circular;
    $gb->{LOCUS}->{id} = $seq->id();
    $gb->{LOCUS}->{length} = $len;
    $gb->{LOCUS}->{nucleotide} = $mol;
    $gb->{LOCUS}->{type} = $div;
    $gb->{LOCUS}->{date} = $date;

    $gb->{HEADER} .= _write_line_GenBank_regex("DEFINITION  ", "            ",
				    $seq->desc(),"\\s\+\|\$",80);
    
    ## if there, write the accession line
    my @acc = ();
    push(@acc, $seq->accession_number());
    if( $seq->isa('Bio::Seq::RichSeqI') ) {
	push(@acc, $seq->get_secondary_accessions());
    }
    $gb->{HEADER} .= sprintf("ACCESSION   %s\n", join(" ", @acc));
    
    ## if PID defined, print it
    if($seq->isa('Bio::Seq::RichSeqI') && $seq->pid()) {
	$gb->{HEADER} .= sprintf("PID         %s\n", $seq->pid());
    }
    
    ## if there, write the version line
    if($seq->isa('Bio::Seq::RichSeqI') && defined($seq->seq_version)) {
	my $id = $seq->primary_id(); ## this may be a GI number
	$gb->{HEADER} .= sprintf("VERSION     %s\.%s%s\n",
				 $seq->accession_number(), ".", $seq->seq_version,
				 ($id && ($id =~ /^\d+$/) ? "  GI:".$id : ""));
    }
    
    ## if there, write the keywords line
    if( $seq->can('keywords') ) {
	$gb->{HEADER} .= sprintf("KEYWORDS    %s\n",$seq->keywords);
    }
    
    ## Organism lines
    if (my $spec = $seq->species) {
	my ($species, $genus, @class) = $spec->classification();
	my $OS;
	if( $spec->common_name ) {
	    $OS = $spec->common_name;
	} else { 
	    $OS = "$genus $species";
	}
	if (my $ssp = $spec->sub_species) {
	    $OS .= " $ssp";
	}

	my $organelle = '';
	$organelle = $spec->organelle if (length $spec->organelle());
	$gb->{HEADER} .= sprintf("SOURCE      $OS.\n  ORGANISM  %s%s %s\n", 
				 $organelle, $genus, $species);
	my $OC = join('; ', (reverse(@class), $genus)) .'.';
	$gb->{HEADER} .= _write_line_GenBank_regex(' 'x12,' 'x12,
					$OC,"\\s\+\|\$",80);
    }
    
    ## Reference lines
    my $count = 1;
    foreach my $ref ( $seq->annotation('reference') ) {
	$gb->{HEADER} .= sprintf ("REFERENCE   $count  (%s %d to %d)",
			      ($seq->alphabet() eq "protein" ?
			       "residues" : "bases"),
			      $ref->start,$ref->end);

	$gb->{HEADER} .= _write_line_GenBank_regex("  AUTHORS   ",' 'x12,
					$ref->authors,"\\s\+\|\$",80);
	$gb->{HEADER} .=  _write_line_GenBank_regex("  TITLE     "," "x12,
					$ref->title,"\\s\+\|\$",80);
	$gb->{HEADER} .= _write_line_GenBank_regex("  JOURNAL   "," "x12,
					$ref->location,"\\s\+\|\$",80);
	if ($ref->comment) {
	    $gb->{HEADER} .= _write_line_GenBank_regex("  REMARK    "," "x12,
					    $ref->comment,"\\s\+\|\$",80);
	}
	if( $ref->medline) {
	    $gb->{HEADER} .= _write_line_GenBank_regex("  MEDLINE   "," "x12,
					    $ref->medline, "\\s\+\|\$",80);
	    if( $ref->pubmed ) {
		$gb->{HEADER} .= _write_line_GenBank_regex("   PUBMED   "," "x12,
						$ref->pubmed, "\\s\+\|\$",
						80);
	    }
	}
	$count++;
    }

    ## Comment lines
    foreach my $comment ( $seq->annotation('comment') ) {
	$gb->{COMMENT} = _write_line_GenBank_regex("COMMENT     "," "x12,
					$comment->text,"\\s\+\|\$",80);
    }
    
    unless(  $mol eq 'protein' ) {
	my $alen = $gb->{SEQ} =~ tr/a/a/;
	my $clen = $gb->{SEQ} =~ tr/c/c/;
	my $glen = $gb->{SEQ} =~ tr/g/g/;
	my $tlen = $gb->{SEQ} =~ tr/t/t/;
        
	my $olen = $len - ($alen + $tlen + $clen + $glen);
	
	my $base_count = sprintf("BASE COUNT %8s a %6s c %6s g %6s t%s\n",
				 $alen,$clen,$glen,$tlen,
				 ( $olen > 0 ) ? sprintf("%6s others",$olen) : '');
    }

    return 1;

}

_write_line_GenBank_regex

description

top

sub _write_line_GenBank_regex {

  my ($pre1,$pre2,$line,$regex,$length) = @_;
  
  my $subl = $length - (length $pre1) - 2;
  my @lines;
  my $return;

  while($line =~ m/(.{1,$subl})($regex)/g) {
    ## be strict about not padding spaces according to 
    ## genbank format
    my $l = $1.$2;
       $l =~ s/\s+$//;
       push(@lines, $l);
     }
  
  my $s = shift @lines;
  $return .= "$pre1$s\n";
  foreach my $s ( @lines ) {
    $return .= "$pre2$s\n";
  }

  return $return;

}

convert

description

top

sub convert {

    my $bpobj = shift;
    my $this = shift;

    my $num = -1;
    my $cds = 0;
    my ($feat,$tag);
    $this->{"CDS0"}->{dummy} = 1;

    foreach $feat ($bpobj->all_SeqFeatures()){
	$num ++;
	if ($feat->primary_tag eq 'CDS'){
	    $cds ++;
	    $this->{"CDS$cds"}->{start} = $feat->start;
	    $this->{"CDS$cds"}->{end} = $feat->end;
	    if ($feat->strand == 0 || $feat->strand == 1){
		$this->{"CDS$cds"}->{direction} = "direct";
	    }else{
		$this->{"CDS$cds"}->{direction} = "complement";
	    }
	    $this->{"CDS$cds"}->{feature} = $num;
	    $this->{"FEATURE$num"}->{cds} = $cds;
	}

	$this->{"FEATURE$num"}->{type} = $feat->primary_tag;
	$this->{"FEATURE$num"}->{start} = $feat->start;
	$this->{"FEATURE$num"}->{end} = $feat->end;
	
	if ($feat->strand == 0 || $feat->strand == 1){
	    $this->{"FEATURE$num"}->{direction} = "direct";
	}else{
	    $this->{"FEATURE$num"}->{direction} = "complement";
	}

	foreach $tag ($feat->all_tags()){
	    $this->{"FEATURE$num"}->{$tag} = join('', $feat->each_tag_value($tag));
	}

	unless (defined $feat->{_location}->{_start}){
	    my @new_join;
	    foreach my $loc (@{$feat->{_location}->{_sublocations}}){
		if ($loc->strand == -1){
		    push(@new_join, sprintf("c%d\.\.%d", $loc->start, $loc->end));
		}else{
		    push(@new_join, sprintf("%d\.\.%d", $loc->start, $loc->end));
		}
	    }
	    $this->{"FEATURE$num"}->{join} = join(',', @new_join);
	    $this->{"CDS$cds"}->{join} = join(',', @new_join) if ($feat->primary_tag eq 'CDS');
	}

    }
    
    $this->{"SEQ"} = lc($bpobj->seq());

    _set_glang_header($this, $bpobj);

}

General documentation

AUTHOR	top
Kazuharu Gaou Arakawa, gaou@g-language.org
SEE ALSO	top
perl(1).