G

Skyline

Summary Included libraries Package variables Synopsis Description General documentation Methods

Summary
G::Skyline
Package variables top
Globals (from use vars definitions)
$AUTOLOAD
@INC
$VERSION = '1.42'
Privates (from my definitions)
$loaded = 0
$outfile = ''
$infile = ''
Included modulestop
G::IO::Annotation
G::IO::Bioperl
G::Messenger
strict
Inherit top
G::IO::GenBankI G::IO::GenBankO G::Prelude
Synopsistop
 use G::Skyline;
@ISA = (G::Skyline);
Descriptiontop
 Inherits all necessary classes.
Intended for internal use only.


Methodstop
BEGIN Code
DESTROYNo descriptionCode
bioperlNo descriptionCode
filepathNo descriptionCode
interpret_formatNo descriptionCode
loaded_msgNo descriptionCode
multi_locusNo descriptionCode
newNo descriptionCode
outputNo descriptionCode

Methods description


Methods code

BEGINtop
BEGIN {
    eval "use Bio::SeqIO;";
    if($@){ warn "$@" };
    eval "use Bio::DB::GenBank;";
    if($@){ warn "$@" };
    eval "use Bio::DB::RefSeq;";
    if($@){ warn "$@" };
}
DESTROYdescriptiontopprevnext
sub DESTROY {
    my $self = shift;

    undef %{$self};
    unlink($outfile);
    $self->close_gb();
}
bioperldescriptiontopprevnext
sub bioperl {
    my $this = shift;
    my $outfile = "/tmp/" . time . '.gbk';
    $this->output($outfile);
    
    my $in = Bio::SeqIO->new(-format => "genbank", -file =>$outfile);
    my $bp = $in->next_seq();
##    unlink($outfile);
return $bp;
}
filepathdescriptiontopprevnext
sub filepath {
    my $this = shift;

    return $infile;
}
interpret_formatdescriptiontopprevnext
sub interpret_format {
    my $filename = shift;

    if (ref $filename =~ /Bio::Seq/){
	return 'bioperl';
    }elsif (lc($filename) =~ /\.(gb|gbk|gbank|genbank)$/i){
	return 'genbank';
    }elsif (lc($filename) =~ /\.(fasta|fast|seq|fst|fa|fsa|nt|aa)/i){
	return 'fasta';
    }elsif ($filename =~ /^NC_\d+$/i){
	return 'RefSeq';
    }elsif ($filename =~ /^(?:[A-Z]|[A-Z][A-Z])\d+$/i){
	return 'net GenBank';
    }else{
	my $format = Bio::SeqIO->_guess_format($filename);

	if (length $format){
	    return $format;
	}else{
	    warn("Unknown file format. Interpreting $filename as GenBank...\n");
	    return 'genbank';
	}
    }
}
loaded_msgdescriptiontopprevnext
sub loaded_msg {
    my $this = shift;

    $loaded ++;
    return if ($loaded > 1);

    my $print =
	qq(
	     __/__/__/__/__/__/__/__/__/__/__/__/__/
                
             G-language Genome Analysis Environment

	      Version: $VERSION
	      License: GPL
 
	      Copyright (C) 2001-2002 
              G-language Project
	      Institute for Advanced Biosciences,
	      Keio University, JAPAN 

          	 http://www.g-language.org/

__/
__/__/__/__/__/__/__/__/__/__/__/__/\n ); &msg_error($print);
}
multi_locusdescriptiontopprevnext
sub multi_locus {
    my $this = shift;
    my $gb = shift;
    my $lng;
    my $i = 1;
    my $f = 1;
    my $c = 1;

    do{
        my $F = 1;
        my $C = 1;
        $lng = length($this->{"SEQ"});
        $this->{"LOCUS$i"} = $gb->{"LOCUS"};
        $this->{"HEADER$i"} = $gb->{"HEADER"};
        $this->{"COMMENT$i"} = $gb->{"COMMENT"};

        while(defined (%{$gb->{"FEATURE$F"}})){
            $this->{"FEATURE$f"}            = $gb->{"FEATURE$F"};
            $this->{"FEATURE$f"}->{"start"} = $gb->{"FEATURE$F"}->{"start"} + $lng;
            $this->{"FEATURE$f"}->{"end"}   = $gb->{"FEATURE$F"}->{"end"} + $lng;
            $this->{"FEATURE$f"}->{"locus"} = $i;

	    if($gb->{"FEATURE$F"}->{"type"} eq "CDS"){
                $this->{"CDS$c"}              = $gb->{"CDS$C"};
                $this->{"CDS$c"}->{"start"}   = $gb->{"CDS$C"}->{"start"} + $lng;
                $this->{"CDS$c"}->{"end"}     = $gb->{"CDS$C"}->{"end"} + $lng;
                $this->{"CDS$c"}->{"feature"} = $f;
                $this->{"CDS$c"}->{"locus"}   = $i;


                if(defined $gb->{"CDS$C"}->{"join"}){
                    my @join = split(/\,/,$gb->{"CDS$C"}->{"join"});
		    my @num = ();
		    my @new_join = ();

                    foreach(@join){
                        if(tr/c/c/){
                            @num = split(/\.\./,$_);
                            push (@new_join, sprintf ("c%d\.\.%d", $num[0] + $lng, $num[1] + $lng));
                        } else {
                            @num = split(/\.\./,$_);
                            push (@new_join, sprintf ("%d\.\.%d",  $num[0] + $lng, $num[1] + $lng));
                        }
                    }
                    $this->{"CDS$c"}->{join} = join(',', @new_join);
                }
		$this->{"FEATURE$f"}->{"cds"} = $c;
                $c++;
                $C++;
            }
            $f++;
            $F++;
        }
        $this->{"SEQ"} .= $gb->{"SEQ"};
        $i++;

    }while($gb->next_locus("no msg"));

    $this->{"FEATURE0"}->{"type"} = "source";
    $this->{"FEATURE0"}->{"start"} = 1;
    $this->{"FEATURE0"}->{"end"} = length($this->{"SEQ"});
    $this->{"LOCUS"} = $this->{"LOCUS1"};
    $this->{"HEADER"} = $this->{"HEADER1"};
    $this->{"COMMENT"} = $this->{"COMMENT1"};
    $this->{"CDS0"}->{dummy} = 1;
}
newdescriptiontopprevnext
sub new {
    my $pkg = shift;
    my $filename = shift;
    my @options = @_;
    my $this = {};
    my $tmp = {};
    $infile = $filename;

    bless $this;

    my $no_msg = 0;
    my $without_annotation = 0;
    my $multiple_locus = 0;
    my $long_sequence = 0;
    my $bioperl = 0;
    my $netgbk = 0;
    my $longest_ORF_annotation = 0;
    my $glimmer_annotation = 0;
    my $format = '';
    my $locus_msg = '';

    return $this if ($filename eq 'blessed');

    foreach my $opt_tmp (@options){
	my $opt = lc($opt_tmp);

	if ($opt =~ /no msg/){
	    $no_msg = 1;
	}elsif ($opt =~ /without annotation/){
	    $without_annotation = 1;
	}elsif ($opt =~ /multiple locus/){
	    $multiple_locus = 1;
	}elsif ($opt =~ /long sequence/){
	    $long_sequence = 1;
	}elsif ($opt =~ /bioperl/){
	    $bioperl = 1;
	}elsif ($opt =~ /longest ORF annotation/){
	    $longest_ORF_annotation = 1;
	}elsif ($opt =~ /glimmer annotation/){
	    $glimmer_annotation = 1;
	}elsif ($opt =~ /net GenBank/){
	    $netgbk = 1; 
	}elsif (   lc($opt) eq 'fasta' || lc($opt) eq 'embl' 
		|| lc($opt) eq 'swiss' || lc($opt) eq 'scf' 
		|| lc($opt) eq 'pir'   || lc($opt) eq 'gcg' 
		|| lc($opt) eq 'raw'   || lc($opt) eq 'ace'
		|| lc($opt) eq 'game'  || lc($opt) eq 'phd'
		|| lc($opt) eq 'qual'  || lc($opt) eq 'bsml' 
		|| lc($opt) eq 'genbank'){
	    $format = lc($opt);
	}else{
	    unless (lc($opt) eq 'net genbank'){
		warn("Warning: Unknown Option $opt at\" new G\"\n");
	    }
	}
    }

    $this->loaded_msg() unless ($no_msg);

    $format = interpret_format($filename) unless(length $format);

    if ($bioperl || $format eq 'bioperl'){
        G::IO::Bioperl::convert($filename, $this);
	$this->seq_info() unless ($no_msg);
    }elsif ($format eq 'RefSeq'){
	my $db = new Bio::DB::RefSeq();
	my $bp;

	eval {
	    $bp = $db->get_Seq_by_id($filename);
	};
	die("$@ Could not retrieve $filename\n") if $@;

        G::IO::Bioperl::convert($bp, $this);
	$this->seq_info() unless($no_msg);
    }elsif ($format eq 'net GenBank'){
	my $db = new Bio::DB::GenBank();
	my $bp;

	eval {
	    $bp = $db->get_Seq_by_acc($filename);
	};
	die("$@ Could not retrieve $filename\n") if $@;

        G::IO::Bioperl::convert($bp, $this);
	$this->seq_info() unless($no_msg);
    }else{
	if ($format eq 'embl'){
	    $this = new G::IO::EmblI;
	}elsif ($format eq 'fasta'){
	    $this = new G::IO::FastaI;
	}elsif ($format ne 'genbank'){

	    $outfile = '/tmp/' . time . '.gbk';
	    my $in = Bio::SeqIO->newFh(-format => $format, -file => $filename);
	    my $out = Bio::SeqIO->newFh(-format => "GenBank", 
					-file => '>' . $outfile);

	    print $out $_ while <$in>;
	    $filename = $outfile;
	    $locus_msg = 'no msg';
	    $format = 'genbank';
	}

	*GENBANK = $this->open_gb($filename);

	if ($multiple_locus){
	    $tmp = new G::Skyline($filename, "no msg", $format);
	    $this->multi_locus($tmp);
	    $this->seq_info() unless($no_msg);
	}else{
	    if ($without_annotation){
		$this->goto_origin();
	    }else{
		$this->read_locus($locus_msg);
		$this->read_header();
		$this->read_features();
	    }
	    
	    if ($long_sequence){
		$this->{origin} = tell *GENBANK;
	    }else{
		$this->getnucs();
		$this->seq_info() unless ($no_msg);
	    }
	}
    }

    if ($longest_ORF_annotation){
	my $new = new G::Skyline;
	bless $new;
	annotate_with_LORF($new, $this);
	return $new;
    }elsif ($glimmer_annotation){
	my $new = new G::Skyline;
	bless $new;
	open(FASTA, '>/tmp/out.fasta') || die ($!);
	printf FASTA ">%s\n%s\n", $this->{LOCUS}->{id}, $this->{SEQ};
	close(FASTA);
	run_glimmer($this, '/tmp/out.fasta');
	annotate_with_glimmer($new, '/tmp/out.fasta');
	unlink('/tmp/out.fasta');
	return $new;
    }

    return $this;
}
outputdescriptiontopprevnext
sub output {
    my $gb = shift;
    my $file = shift;
    my $option = shift;

    $option = interpret_format($file) unless(length $option);

    if (lc($option) eq 'genbank'){
	$gb->make_gb($file);
    }elsif(length $option){
	my $outfile = '/tmp/' . time . '.gbk';
	$gb->make_gb($outfile);
	my $in = Bio::SeqIO->newFh(-format => "genbank", -file => $outfile);
	my $out = Bio::SeqIO->newFh(-format => "$option", 
				    -file => '>' . $file);

	print $out $_ while <$in>;
    }else{
	&msg_error("G::output - Unknown format to output.");
    }
}

General documentation

AUTHOR top
Kazuharu Gaou Arakawa, gaou@g-language.org
SEE ALSO top
perl(1).