G::Tools Alignment
Package variablesGeneral documentationMethods
Package variables
No package variables defined.
Included modules
G::Messenger
G::Seq::Primitive
SelfLoader
SubOpt
autouse ' File::Temp ' => qw ( tempfile )
base qw ( Exporter )
Synopsis
No synopsis!
Description
No description!
Methods
_blast
No description
Code
_clustalw
No description
Code
_fasta
No description
Code
_formatdb
No description
Code
blastallDescriptionCode
blat
No description
Code
Methods description
blastallcode    nextTop
  Name: blastall   -   runs BLAST via DDBJ web service

  Description:
    This is a wrapper around DDBJ-REST BLAST web service provided by 
    http://www.xml.nig.ac.jp/doc/detail/Blast.html
BLAST is the most popular local alignment search tool.
Options and usages are identical to that of regular blastall command line tool. Supported databases includes: DDBJ, DDBJ_EXEST, DDBJNEW, DDBJNEW_EXEST, DAD, PDB, PDBSH, PRF PROTEIN, SWISS, WORMPEP, UNIPROT, TREMBL Run this function without any arguments to see full listing. Usage: blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, -i=>'test.fasta'); or blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, -i=>$AminoAcidSeq); or blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, $AminoAcidSeq); Options: All optional parameters are identical to that of commandline BLAST. Run this function without any arguments to see full listing. References: 1. Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ (1997) "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs.", Nucleic Acids Res., 25(17):3389-3402. 2. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ (1990) "Basic local alignment search tool.", J Mol Biol., 215(3):403-410. Author: Kazuki Oshita (t07122ko@sfc.keio.ac.jp)
Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History: 20080213-01 initial posting
Methods code
_blastdescriptionprevnextTop
sub _blast {
    &opt_default(p=>'blastn',qr=>'off',input=>"file");
    my @args=opt_get(@_);

    my $qr=&opt_val("qr");
    my $input=&opt_val("input");
    my $seq;
    my @param;
    my $param;
    my %opt = opt_val();
    my @tmp;
    my $num;

    $opt{d} = shift @args;

    if($input eq 'seq'){
	$seq=shift @args;
	opendir(DIR,'/tmp');
	@tmp=readdir(DIR);
	$num=$#tmp+1+time;
	@tmp=keys(%$seq);
	close(DIR);
	open(FILE,'>/tmp/blast_'.$num.'.seq');
        print FILE ">$tmp[0]\n";
        print FILE $$seq{$tmp[0]},"\n";
	close(FILE);
	$opt{i}='/tmp/blast_'.$num.'.seq'; 
    }else{
	$opt{i}=shift @args;
    }
    
    foreach(sort keys(%opt)){
	next if($opt{$_} eq '');
	push(@param,'-'.$_);
	push(@param,$opt{$_});
    }
    $param=join(' ',@param);

    system('blastall',@param) if($qr eq "off");
    system('qr',"blastall $param") if($qr eq "on");

    unlink('/tmp/blast_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on');
    return $param;
}
_clustalwdescriptionprevnextTop
sub _clustalw {
    &opt_default(input=>"file",alnfile=>"clustalw.aln",dndfile=>"clustalw.dnd",output=>"f");
    my @args=opt_get(@_);

    my $input=&opt_val("input");
    my $alnfile=&opt_val("alnfile");
    my $dndfile=&opt_val("dndfile");
    my $output=&opt_val("output");
    my $seq;
    my @param;
    my $file;
    my %opt;
    my @aln;
    my @dnd;
    my $tmp;

    if($input eq 'seq'){
	$seq=shift @args;
	open(FILE,'>/tmp/tmp.clw');
	foreach(keys(%{$seq})){
	    print FILE '>',"$_","\n";
	    print FILE ${$seq}{$_},"\n\n";
	}
	close(FILE);
	$file="/tmp/tmp.clw"; 
    }
    else{
	$file=shift @args;
    }

    foreach(sort keys(%opt)){
	next if($opt{$_} eq '');
	push(@param,'-'.$_);
	push(@param,$opt{$_});
    }

    system('clustalw',"$file",@param);

    if($input eq 'seq'){
        unlink('/tmp/tmp.clw');
	if($output eq 'n'){	    
	    open(FILE,'/tmp/tmp.aln');
	    @aln=<FILE>;
	    close(FILE);
	    open(FILE,'/tmp/tmp.dnd');
	    @dnd=<FILE>;
	    close(FILE);
	    unlink('/tmp/tmp.aln');
	    unlink('/tmp/tmp.dnd');
	    return (\@aln,\@dnd);
	}
	else{
	    system('cp','/tmp/tmp.aln',"\.\/$alnfile");
	    system('cp','/tmp/tmp.dnd',"\.\/$dndfile");
	    unlink('/tmp/tmp.aln');
	    unlink('/tmp/tmp.dnd');
	}
    } 
    else{
	if($output eq "n"){
	    $tmp=substr($file,0,rindex($file,'.')).'.aln';
	    open(FILE,$tmp);
	    @aln=<FILE>;
	    close(FILE);
	    unlink("$tmp");
	    $tmp=substr($file,0,rindex($file,'.')).'.dnd';
	    open(FILE,$tmp);
	    @dnd=<FILE>;
	    close(FILE);
	    unlink("$tmp");
	    return (\@aln,\@dnd);
	}
    }

    return $file.' '.join(' ',@param);
}
_fastadescriptionprevnextTop
sub _fasta {
    &opt_default(qr=>'off',input=>"file");
    my @param;
    my @tmp;
    foreach(@_){
        if($_ eq '-a' || $_ eq '-A' || $_ eq '-B' || $_ eq '-f' || $_ eq '-g' ||
           $_ eq '-h' || $_ eq '-H' || $_ eq '-i' || $_ eq '-L' || $_ eq '-n' || 
           $_ eq '-o' || $_ eq '-p' || $_ eq '-Q' || $_ eq '-q' || $_ eq '-r' ||
           $_ eq '-S' || $_ eq '-1' || $_ eq '-3' ){
	    push(@param,$_);
        }
        else{
            push(@tmp,$_);
        }
    }
    @_=@tmp;
    my @args=opt_get(@_);

    my $qr=&opt_val("qr");
    my $input=&opt_val("input");
    my ($data, $file, $seq);
    my %opt = opt_val();
    my $param;
    my $num;

    $data=shift @args;
    if($input eq 'seq'){
	$seq=shift @args;
	opendir(DIR,'/tmp');
	@tmp=readdir(DIR);
	$num=$#tmp+1+time;
	@tmp=keys(%$seq);
	open(FILE,'>/tmp/fasta_'.$num.'.seq');
	print FILE ">$tmp[0]\n";
	print FILE $$seq{"$tmp[0]"},"\n";
	close(FILE);
	$file='/tmp/fasta_'.$num.'.seq'; 
    }
    else{
	$file=shift @args;
    }

    foreach(sort keys(%opt)){
	next if($opt{$_} eq '');
	push(@param,'-'.$_);
	push(@param,$opt{$_});
    }
    $param=join(' ',@param);

    system('fasta33',"$data","$file",@param) if($qr eq "off");
    system('qr',"fasta33 $data $file $param") if($qr eq "on");

    unlink('/tmp/fasta_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on');

    return "$data $file $param";
}
_formatdbdescriptionprevnextTop
sub _formatdb {
    &opt_default(p=>'F',o=>'T');
    my @args = opt_get(@_);
    my $file = shift @args;
    my @param;
    my %opt = opt_val();

    foreach(sort keys(%opt)){
	next if($opt{$_} eq '');
	push(@param,'-'.$_);
	push(@param,$opt{$_});
    }

    system('formatdb','-i',"$file",@param);

    return '-i '."$file ".join(' ',@param);
}
blastalldescriptionprevnextTop
sub blastall {
    require LWP::UserAgent;
    require G::IO;
    my @argv = opt_get(@_);
    my %data = opt_val();

    my $program  = $data{'p'} || 'blastp';
    my $database = $data{'d'} || 'SWISS';
    my $outfile  = $data{'o'} || 'stdout';
    my $infile   = $data{'i'} || shift @argv;

    delete($data{'p'});
    delete($data{'d'});
    delete($data{'o'});
    delete($data{'i'});

    my $query = $infile;
    if (-e $infile) {
	my $gb = new G::IO($infile, 'fasta', 'no msg');
	$query = $gb->{SEQ};
    }

    my $option = '';
    foreach my $key (keys %data){
	$option .= '-' . $key . ' ' . $data{$key};
    }

    my $ua = LWP::UserAgent->new;
    my $request = HTTP::Request->new('POST',"http://xml.nig.ac.jp/rest/Invoke");
    $request->content_type("application/x-www-form-urlencoded");
    
    if ($program && $database && $infile) {
	$request->content("service=Blast&method=searchParam&program=$program&database=$database&query=$query&param=$option");
    } else {
	msg_error << "__HELP__"

blastall arguments:

-p Program Name [String]
-d Database [String]
default = nr
-i Query File [File In]
default = stdin
-e Expectation value (E) [Real]
default = 10.0
-m alignment view options:
0 = pairwise,
1 = query-anchored showing identities,
2 = query-anchored no identities,
3 = flat query-anchored, show identities,
4 = flat query-anchored, no identities,
5 = query-anchored no identities and blunt ends,
6 = flat query-anchored, no identities and blunt ends,
7 = XML Blast output,
8 = tabular,
9 tabular with comment lines
10 ASN, text
11 ASN, binary [Integer]
default = 0
range from 0 to 11
-o BLAST report Output File [File Out] Optional
default = stdout
-F Filter query sequence (DUST with blastn, SEG with others) [String]
default = T
-G Cost to open a gap (-1 invokes default behavior) [Integer]
default = -1
-E Cost to extend a gap (-1 invokes default behavior) [Integer]
default = -1
-X X dropoff value for gapped alignment (in bits) (zero invokes default behavior)
blastn 30, megablast 20, tblastx 0, all others 15 [Integer]
default = 0
-I Show GI's in deflines [T/F]
default = F
-q Penalty for a nucleotide mismatch (blastn only) [Integer]
default = -3
-r Reward for a nucleotide match (blastn only) [Integer]
default = 1
-v Number of database sequences to show one-line descriptions for (V) [Integer]
default = 500
-b Number of database sequence to show alignments for (B) [Integer]
default = 250
-f Threshold for extending hits, default if zero
blastp 11, blastn 0, blastx 12, tblastn 13
tblastx 13, megablast 0 [Real]
default = 0
-g Perform gapped alignment (not available with tblastx) [T/F]
default = T
-Q Query Genetic code to use [Integer]
default = 1
-D DB Genetic code (for tblast[nx] only) [Integer]
default = 1
-a Number of processors to use [Integer]
default = 1
-O SeqAlign file [File Out] Optional
-J Believe the query defline [T/F]
default = F
-M Matrix [String]
default = BLOSUM62
-W Word size, default if zero (blastn 11, megablast 28, all others 3) [Integer]
default = 0
-z Effective length of the database (use zero for the real size) [Real]
default = 0
-K Number of best hits from a region to keep (off by default, if used a value of 100 is recommended) [Integer]
default = 0
-P 0 for multiple hit, 1 for single hit (does not apply to blastn) [Integer]
default = 0
-Y Effective length of the search space (use zero for the real size) [Real]
default = 0
-S Query strands to search against database (for blast[nx], and tblastx)
3 is both, 1 is top, 2 is bottom [Integer]
default = 3
-T Produce HTML output [T/F]
default = F
-l Restrict search of database to list of GI's [String] Optional
-U Use lower case filtering of FASTA sequence [T/F] Optional
-y X dropoff value for ungapped extensions in bits (0.0 invokes default behavior)
blastn 20, megablast 10, all others 7 [Real]
default = 0.0
-Z X dropoff value for final gapped alignment in bits (0.0 invokes default behavior)
blastn/megablast 50, tblastx 0, all others 25 [Integer]
default = 0
-R PSI-TBLASTN checkpoint file [File In] Optional
-n MegaBlast search [T/F]
default = F
-L Location on query sequence [String] Optional
-A Multiple Hits window size, default if zero (blastn/megablast 0, all others 40 [Integer]
default = 0
-w Frame shift penalty (OOF algorithm for blastx) [Integer]
default = 0
-t Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments. (0 invokes default behavior; a negative value disables linking.) [Integer]
default = 0
-B Number of concatenated queries, for blastn and tblastn [Integer] Optional
default = 0
-V Force use of the legacy BLAST engine [T/F] Optional
default = F
-C Use composition-based statistics for blastp or tblastn:
As first character:
D or d: default (equivalent to T)
0 or F or f: no composition-based statistics
1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
2: Composition-based score adjustment as in Bioinformatics 21:902-911,
2005, conditioned on sequence properties
3: Composition-based score adjustment as in Bioinformatics 21:902-911,
2005, unconditionally
For programs other than tblastn, must either be absent or be D, F or 0.
As second character, if first character is equivalent to 1, 2, or 3:
U or u: unified p-value combining alignment p-value and compositional p-value in round 1 only
[String]
default = D
-s Compute locally optimal Smith-Waterman alignments (This option is only
available for gapped tblastn.) [T/F]
default = F

__HELP__
msg_error "Displays information on the currently available databases\n"; msg_error "=========================================================\n"; $request->content("service=Blast&method=getSupportDatabaseList"); } my $response; my $callback = sub { for (@_) { msg_send($_) unless ref($_) =~ /HTTP::/ || /LWP::/; } }; if ($outfile && lc($outfile) ne "stdout") { $response = $ua->request( $request,$outfile ); } else { $response = $ua->request( $request, $callback, 30000 ); }
}
blatdescriptionprevnextTop
sub blat {
    my @argv = opt_get(@_);
    my $db   = shift @argv;
    my $tmp  = shift @argv;
    my %data = opt_val();

    if(-e $tmp){

    }else{
	my $seq  = opt_as_gb($tmp);
	my($fh, $fname) = tempfile();
	to_fasta($seq, -filename=>$fname, -output=>"f");
	$tmp = $fname;
    }

    my $option = '';
    foreach my $key (keys %data){
	next if($key =~ /[qidentity|tidentity]/);
	$option .= "-$key=$data{$key} ";
    }

    my @result;
    foreach my $line (`blat $db $tmp stdout $option`){
	chomp($line);
	next unless($line =~ /^\d/);

	my @res = split (/\t/, $line);

	if($data{qidentity}){
	    next unless($res[0]/$res[10] >= $data{qidentity}/100);
	}
	if($data{tidentity}){
	    next unless($res[0]/$res[14] >= $data{tidentity}/100);
	}
	push(@result, [@res]);
    }

    return @result;
}
General documentation
No general documentation available.