G::IO
GenBankI
Globals (from use vars definitions) |
$AUTOLOAD |
@INC |
$VERSION = '1.0' |
use G::IO::GenBankI; @ISA = (G::IO::GenBankI);
|
This module is what is formerly known as 'Prelude'. GenBank input parser. Intended for internal use only.
|
close_gb | No description | Code |
getnucs | No description | Code |
goto_features | No description | Code |
goto_origin | No description | Code |
next_locus | No description | Code |
next_seq | No description | Code |
open_gb | No description | Code |
read_features | No description | Code |
read_header | No description | Code |
read_locus | No description | Code |
rewind_genome | No description | Code |
Methods description
Methods code
sub close_gb
{ my $this = shift;
close(GENBANK);
}
sub getnucs
{ my $this = shift;
while(<GENBANK>){
last if (/\/\//);
s/[^A-Za-z]//g;
$this->{"SEQ"} .= lc($_);
}
}
sub goto_features
{ my $this = shift;
while(<GENBANK>){
last if (/^FEATURES/);
}
}
sub goto_origin
{ my $this = shift;
while(<GENBANK>){
last if (/^ORIGIN/);
}
}
sub next_locus
{ my $this = shift;
my $msg = shift;
undef %{$this};
$this->read_locus($msg);
$this->read_header();
$this->read_features();
$this->getnucs();
if (length($this->{SEQ}) > 0){
$this->seq_info() unless($msg);
return 1;
}else{
return 0;
}
}
sub next_seq
{ my $this = shift;
my $len = '100';
my $opt = shift;
my $char = '1';
$len = $opt if ($opt);
$this->{SEQ} = '';
while($len > 0 && $char ne ''){
$char = getc(GENBANK);
next unless ($char =~ /[a-zA-Z]/);
$len --;
$this->{SEQ} .= lc($char);
}
$this->{position} = tell GENBANK;
return $char;
}
sub open_gb
{ my $this = shift;
my $filename = shift;
open(GENBANK, $filename) || die("Error at G::IO::GenBankI: $!\n");
return *GENBANK;
}
sub read_features
{ local($_);
my $this = shift;
my $num = -1;
my $cds = 0;
my $transexc = 0;
$this->{"CDS0"}->{dummy} = 1;
while(<GENBANK>){
if (/^BASE COUNT/){
s/BASE COUNT //g;
$this->{"BASE_COUNT"} = $_;
}elsif (/^ORIGIN/){
last;
}elsif (/^ {5}(\S+)\s+(.*)$/ && $_ !~ /\//){
my $key = $1;
my $feature = $2;
$transexc = 0;
$num ++;
$this->{"FEATURE$num"}->{"feature"} = $num;
$this->{"FEATURE$num"}->{"type"} = $1;
$this->{"FEATURE$num"}->{"on"} = 1;
if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
$cds ++;
$this->{"CDS$cds"}->{"on"} = 1;
$this->{"FEATURE$num"}->{"cds"} = $cds;
}
s/\. \./\.\./g; ##for (1. .2) type irregular format
s/\^/\.\./g; ##for (1^2) type irregular format
my $part_left = tr/\<//d; ##for (<1..2) type irregular format
my $part_right = tr/\>//d; ##for (1..2>) type irregular format
$this->{"FEATURE$num"}->{"partial"} = "$part_left $part_right";
if (/join/){
if (/complement\(join/){
$this->{"FEATURE$num"}->{"direction"} = "complement";
$this->{"CDS$cds"}->{"direction"} = "complement";
s/complement//;
}else{
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"CDS$cds"}->{"direction"} = "direct";
}
my $line = $_;
my $fth = '';
my $linenum = tell GENBANK;
while($line !~ /\//){
$fth .= $line;
$line = <GENBANK>;
$linenum = tell GENBANK;
$linenum -= length($_);
}
seek GENBANK, $linenum, 0;
substr($fth, 0, 19) = '';
$fth =~ s/join//g;
$fth =~ s/\(//g;
$fth =~ s/\)//g;
$fth =~ s/ //g;
$fth =~ s/\n//g;
$fth =~ s/complement/c/g;
my $tmpfth = $fth;
$tmpfth =~ s/c//g;
my @choparray = split(/\.\./, $tmpfth);
$this->{"FEATURE$num"}->{"start"} = shift @choparray;
$this->{"CDS$cds"}->{"start"} = $this->{"FEATURE$num"}->{"start"};
$this->{"FEATURE$num"}->{"end"} = pop @choparray;
$this->{"CDS$cds"}->{"end"} = $this->{"FEATURE$num"}->{"end"};
$this->{"FEATURE$num"}->{"join"} = $fth;
$this->{"CDS$cds"}->{"join"} = $fth;
$this->{"CDS$cds"}->{"feature"} = $num;
if ($line =~ /\:/){
$this->{"FEATURE$num"}->{"partial"} = $line;
$this->{"CDS$cds"}->{"partial"} = $line;
}
}elsif (/\?/){
$this->{"FEATURE$num"}->{"type"} = "partial_$key";
$this->{"FEATURE$num"}->{"partial"} = $feature;
msg_error("Partial feature: $feature\n");
}elsif (/complement\((\d+)\.\.(\d+)\)/){
$this->{"FEATURE$num"}->{"direction"} = "complement";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $2;
if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
$this->{"CDS$cds"}->{"direction"} = "complement";
$this->{"CDS$cds"}->{"start"} = $1;
$this->{"CDS$cds"}->{"end"} = $2;
$this->{"CDS$cds"}->{"feature"} = $num;
}
}elsif (/(\d+)\.\.(\d+)/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $2;
if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
$this->{"CDS$cds"}->{"direction"} = "direct";
$this->{"CDS$cds"}->{"start"} = $1;
$this->{"CDS$cds"}->{"end"} = $2;
$this->{"CDS$cds"}->{"feature"} = $num;
}
}elsif (/\s+complement\((\d+)\)/){
$this->{"FEATURE$num"}->{"direction"} = "complement";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $1;
}elsif (/\s+(\d+)/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $1;
}elsif (/replace\((\d+)\,\"/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/);
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $1;
$this->{"FEATURE$num"}->{"partial"} = $_;
}elsif (/(\d+).*\.\..*(\d+)/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/);
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $2;
$this->{"FEATURE$num"}->{"partial"} = $_;
}else{
msg_error("Irregular location feature: $key $feature\n");
}
}else{
if (/\/(\w+)=\"([^\"]+)\"/){
$this->{"FEATURE$num"}->{"$1"} = $2;
}elsif (/\/(\w+)=\"([^\"]+)/){
my $tag = $1;
my $tmp = $2;
my $line;
while(<GENBANK>){
if (!/\"/){
$tmp .= $_;
}elsif (/([^\"]+)\"/){
$tmp .= $1;
last;
}
}
$tmp =~ s/\s+/ /g;
$tmp =~ s/ //g if ($tag eq 'translation');
$this->{"FEATURE$num"}->{$tag} = $tmp;
}elsif (/\/(\w+)=([\d|\d+])/){
$this->{"FEATURE$num"}->{$1} = $2;
}elsif (/\/(\w+)=\((.*)\)/){
my $key = $1;
my $val = $2;
if ($key eq 'transl_except'){
if ($transexc == 0){
$this->{"FEATURE$num"}->{$key} = $2;
}else{
$this->{"FEATURE$num"}->{"$key" . $transexc} = $2;
}
$transexc ++;
}else{
$this->{"FEATURE$num"}->{$key} = $val;
}
}elsif (/\/(\w+)=(.*)/){
$this->{"FEATURE$num"}->{$1} = $2;
}elsif (/\/(\w+)/){
$this->{"FEATURE$num"}->{$1} .= '1';
}
}
}
}
sub read_header
{ my $this = shift;
my $line = '';
while($line = <GENBANK>){
## if ($line =~ /^COMMENT/){
## s/COMMENT //g;
## while($line = <GENBANK>){
## last if ($line =~ /^FEATURES/);
## last unless (substr($line, 0, 1) eq ' ');
## $line =~ s/ +//g;
## $this->{"COMMENT"} .= $line;
## }
## }
last if ($line =~ /^FEATURES/);
$this->{HEADER} .= $line;
}
}
sub read_locus
{ my $this = shift;
my $msg = shift;
my $tmp = '';
local($_);
while(<GENBANK>){
next unless (/LOCUS/);
my @locus_line = split;
shift @locus_line;
if ($##locus_line == 6){
$this->{"LOCUS"}->{"circular"} = 1;
($this->{"LOCUS"}->{"id"},
$this->{"LOCUS"}->{"length"},
undef,
$this->{"LOCUS"}->{"nucleotide"},
undef,
$this->{"LOCUS"}->{"type"},
$this->{"LOCUS"}->{"date"}) = @locus_line;
}elsif ($##locus_line == 5){
$this->{"LOCUS"}->{"circular"} = 0;
($this->{"LOCUS"}->{"id"},
$this->{"LOCUS"}->{"length"},
undef,
$this->{"LOCUS"}->{"nucleotide"},
$this->{"LOCUS"}->{"type"},
$this->{"LOCUS"}->{"date"}) = @locus_line;
}else{
msg_error("ERROR: Unknown LOCUS definition\n") if ($msg ne 'no msg');
$this->{"LOCUS"}->{"circular"} = 0;
$this->{"LOCUS"}->{"id"} = shift @locus_line;
}
rewind_genome | description | top | prev | next |
sub rewind_genome
{ my $this = shift;
seek GENBANK, $this->{origin}, 0;
return 1;
}
General documentation