G::IO
EmblI
Globals (from use vars definitions) |
$AUTOLOAD |
@INC |
$VERSION = '1.0' |
Privates (from my definitions) |
$infile; |
use G::IO::EmblI; @ISA = (G::IO::EmblI);
|
This module is what is formerly known as 'Prelude'. GenBank input parser. Intended for internal use only.
|
close_gb | No description | Code |
filepath | No description | Code |
getnucs | No description | Code |
goto_features | No description | Code |
goto_origin | No description | Code |
new | No description | Code |
next_locus | No description | Code |
next_seq | No description | Code |
open_gb | No description | Code |
read_features | No description | Code |
read_header | No description | Code |
read_locus | No description | Code |
rewind_genome | No description | Code |
Methods description
Methods code
sub close_gb
{ my $this = shift;
close(EMBL);
}
sub filepath
{ my $this = shift;
return $infile;
}
sub getnucs
{ my $this = shift;
while(<EMBL>){
next if (/^\S/);
last if (/\/\//);
s/[^A-Za-z]//g;
$this->{"SEQ"} .= lc($_);
}
}
sub goto_features
{ my $this = shift;
while(<EMBL>){
last if (/^FH/);
}
}
sub goto_origin
{ my $this = shift;
while(<EMBL>){
last if (/^SQ/);
}
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this = {};
bless $this;
return $this;
}
sub next_locus
{ my $this = shift;
my $msg = shift;
undef %{$this};
$this->read_locus($msg);
$this->read_header();
$this->read_features();
$this->getnucs();
if (length($this->{SEQ}) > 0){
$this->seq_info() unless($msg);
return 1;
}else{
return 0;
}
}
sub next_seq
{ my $this = shift;
my $len = '100';
my $opt = shift;
my $char = '1';
$len = $opt if ($opt);
$this->{SEQ} = '';
while($len > 0 && $char ne ''){
$char = getc(EMBL);
next unless ($char =~ /[a-zA-Z]/);
$len --;
$this->{SEQ} .= lc($char);
}
$this->{position} = tell EMBL;
return $char;
}
sub open_gb
{ my $this = shift;
my $filename = shift;
$infile = $filename;
open(EMBL, $filename) || die("Error at G::IO::EmblI: $!\n");
return *EMBL;
}
sub read_features
{ local($_);
my $this = shift;
my $num = -1;
my $cds = 0;
$this->{"CDS0"}->{dummy} = 1;
while(<EMBL>){
next if (/^FH/);
if (/^SQ/){
s/SQ //g;
$this->{"BASE_COUNT"} = $_;
last;
}elsif (/^FT {3}(\S+)\s+(.*)$/ && $_ !~ /\//){
my $key = $1;
my $feature = $2;
$num ++;
$this->{"FEATURE$num"}->{"feature"} = $num;
$this->{"FEATURE$num"}->{"type"} = $1;
$this->{"FEATURE$num"}->{"on"} = 1;
if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
$cds ++;
$this->{"CDS$cds"}->{"on"} = 1;
$this->{"FEATURE$num"}->{"cds"} = $cds;
}
s/\. \./\.\./g; ##for (1. .2) type irregular format
s/\^/\.\./g; ##for (1^2) type irregular format
my $part_left = tr/\<//d; ##for (<1..2) type irregular format
my $part_right = tr/\>//d; ##for (1..2>) type irregular format
$this->{"FEATURE$num"}->{"partial"} = "$part_left $part_right";
if (/join/){
if (/complement\(join/){
$this->{"FEATURE$num"}->{"direction"} = "complement";
$this->{"CDS$cds"}->{"direction"} = "complement";
s/complement//;
}else{
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"CDS$cds"}->{"direction"} = "direct";
}
my $line = $_;
my $fth = '';
my $linenum = tell EMBL;
while($line !~ /\//){
$fth .= substr($line, 2);
$line = <EMBL>;
$linenum = tell EMBL;
$linenum -= length($_);
}
seek EMBL, $linenum, 0;
substr($fth, 0, 19) = '';
$fth =~ s/join//g;
$fth =~ s/\(//g;
$fth =~ s/\)//g;
$fth =~ s/ //g;
$fth =~ s/\n//g;
$fth =~ s/complement/c/g;
my $tmpfth = $fth;
$tmpfth =~ s/c//g;
my @choparray = split(/\.\./, $tmpfth);
$this->{"FEATURE$num"}->{"start"} = shift @choparray;
$this->{"CDS$cds"}->{"start"} = $this->{"FEATURE$num"}->{"start"};
$this->{"FEATURE$num"}->{"end"} = pop @choparray;
$this->{"CDS$cds"}->{"end"} = $this->{"FEATURE$num"}->{"end"};
$this->{"FEATURE$num"}->{"join"} = $fth;
$this->{"CDS$cds"}->{"join"} = $fth;
$this->{"CDS$cds"}->{"feature"} = $num;
if ($line =~ /\:/){
$this->{"FEATURE$num"}->{"partial"} = $line;
$this->{"CDS$cds"}->{"partial"} = $line;
}
}elsif (/\?/){
$this->{"FEATURE$num"}->{"type"} = "partial_$key";
$this->{"FEATURE$num"}->{"partial"} = $feature;
msg_error("Partial feature: $feature\n");
}elsif (/complement\((\d+)\.\.(\d+)\)/){
$this->{"FEATURE$num"}->{"direction"} = "complement";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $2;
if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
$this->{"CDS$cds"}->{"direction"} = "complement";
$this->{"CDS$cds"}->{"start"} = $1;
$this->{"CDS$cds"}->{"end"} = $2;
$this->{"CDS$cds"}->{"feature"} = $num;
}
}elsif (/(\d+)\.\.(\d+)/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $2;
if ($this->{"FEATURE$num"}->{"type"} eq "CDS"){
$this->{"CDS$cds"}->{"direction"} = "direct";
$this->{"CDS$cds"}->{"start"} = $1;
$this->{"CDS$cds"}->{"end"} = $2;
$this->{"CDS$cds"}->{"feature"} = $num;
}
}elsif (/\s+complement\((\d+)\)/){
$this->{"FEATURE$num"}->{"direction"} = "complement";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $1;
}elsif (/\s+(\d+)/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $1;
}elsif (/replace\((\d+)\,\"/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/);
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $1;
$this->{"FEATURE$num"}->{"partial"} = $_;
}elsif (/(\d+).*\.\..*(\d+)/){
$this->{"FEATURE$num"}->{"direction"} = "direct";
$this->{"FEATURE$num"}->{"direction"} = "complement" if (/complement/);
$this->{"FEATURE$num"}->{"start"} = $1;
$this->{"FEATURE$num"}->{"end"} = $2;
$this->{"FEATURE$num"}->{"partial"} = $_;
}else{
msg_error("Irregular location feature: $key $feature\n");
}
}else{
if (/\/(\w+)=\"([^\"]+)\"/){
$this->{"FEATURE$num"}->{"$1"} = $2;
}elsif (/\/(\w+)=\"([^\"]+)/){
my $tag = $1;
my $tmp = $2;
my $line;
while(<EMBL>){
substr($_, 0, 2) = '';
if (!/\"/){
$tmp .= $_;
}elsif (/([^\"]+)\"/){
$tmp .= $1;
last;
}
}
$tmp =~ s/\s+/ /g;
$tmp =~ s/ //g if ($tag eq 'translation');
$this->{"FEATURE$num"}->{$tag} = $tmp;
}elsif (/\/(\w+)=([\d|\d+])/){
$this->{"FEATURE$num"}->{$1} = $2;
}elsif (/\/(\w+)=\((.*)\)/){
$this->{"FEATURE$num"}->{$1} = $2;
}elsif (/\/(\w+)=(.*)/){
$this->{"FEATURE$num"}->{$1} = $2;
}elsif (/\/(\w+)/){
$this->{"FEATURE$num"}->{$1} .= '1';
}
}
}
}
sub read_header
{ my $this = shift;
}
sub read_locus
{ my $this = shift;
my $msg = shift;
local($_);
while(<EMBL>){
$this->{COMMENT} .= $_;
last if (/^FH/);
if (/^ID/){
s/\;//g;
my @locus_line = split;
shift @locus_line;
if ($##locus_line == 7){
$this->{"LOCUS"}->{"circular"} = 1;
($this->{"LOCUS"}->{"id"},
undef,
undef,
$this->{"LOCUS"}->{"nucleotide"},
$this->{"LOCUS"}->{"type"},
$this->{"LOCUS"}->{"length"},
undef) = @locus_line;
}elsif ($##locus_line == 6){
$this->{"LOCUS"}->{"circular"} = 0;
($this->{"LOCUS"}->{"id"},
undef,
undef,
$this->{"LOCUS"}->{"nucleotide"},
$this->{"LOCUS"}->{"type"},
$this->{"LOCUS"}->{"length"},
undef) = @locus_line;
}else{
msg_error("ERROR: Unknown LOCUS definition\n") if ($msg ne 'no msg');
$this->{"LOCUS"}->{"circular"} = 0;
}
}elsif(/^AC\s+(.*)\;/ && length($this->{LOCUS}->{id}) < 1){
$this->{LOCUS}->{id} = $1;
}elsif(/^DT\s+(.*) /){
$this->{LOCUS}->{date} = $1;
}
rewind_genome | description | top | prev | next |
sub rewind_genome
{ my $this = shift;
seek EMBL, $this->{origin}, 0;
return 1;
}
General documentation