Name: blastall - runs BLAST via DDBJ web service
Description:
This is a wrapper around DDBJ-REST BLAST web service provided by
http://www.xml.nig.ac.jp/doc/detail/Blast.html BLAST is the most popular local alignment search tool.
Options and usages are identical to that of regular blastall
command line tool.
Supported databases includes:
DDBJ, DDBJ_EXEST, DDBJNEW, DDBJNEW_EXEST, DAD, PDB, PDBSH, PRF
PROTEIN, SWISS, WORMPEP, UNIPROT, TREMBL
Run this function without any arguments to see full listing.
Usage:
blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, -i=>'test.fasta');
or
blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, -i=>$AminoAcidSeq);
or
blastall(-p=>'blastp', -d=>'SWISS', -o=>'stdout', -m=>8, $AminoAcidSeq);
Options:
All optional parameters are identical to that of commandline BLAST.
Run this function without any arguments to see full listing.
References:
1. Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, Lipman DJ
(1997) "Gapped BLAST and PSI-BLAST: a new generation of protein database
search programs.", Nucleic Acids Res., 25(17):3389-3402.
2. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ (1990) "Basic local
alignment search tool.", J Mol Biol., 215(3):403-410.
Author:
Kazuki Oshita (t07122ko@sfc.keio.ac.jp) Kazuharu Arakawa (gaou@sfc.keio.ac.jp)
History:
20080213-01 initial posting |
sub _blast
{ &opt_default(p=>'blastn',qr=>'off',input=>"file");
my @args=opt_get(@_);
my $qr=&opt_val("qr");
my $input=&opt_val("input");
my $seq;
my @param;
my $param;
my %opt = opt_val();
my @tmp;
my $num;
$opt{d} = shift @args;
if($input eq 'seq'){
$seq=shift @args;
opendir(DIR,'/tmp');
@tmp=readdir(DIR);
$num=$#tmp+1+time;
@tmp=keys(%$seq);
close(DIR);
open(FILE,'>/tmp/blast_'.$num.'.seq');
print FILE ">$tmp[0]\n";
print FILE $$seq{$tmp[0]},"\n";
close(FILE);
$opt{i}='/tmp/blast_'.$num.'.seq';
}else{
$opt{i}=shift @args;
}
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
$param=join(' ',@param);
system('blastall',@param) if($qr eq "off");
system('qr',"blastall $param") if($qr eq "on");
unlink('/tmp/blast_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on');
return $param; } |
sub _clustalw
{ &opt_default(input=>"file",alnfile=>"clustalw.aln",dndfile=>"clustalw.dnd",output=>"f");
my @args=opt_get(@_);
my $input=&opt_val("input");
my $alnfile=&opt_val("alnfile");
my $dndfile=&opt_val("dndfile");
my $output=&opt_val("output");
my $seq;
my @param;
my $file;
my %opt;
my @aln;
my @dnd;
my $tmp;
if($input eq 'seq'){
$seq=shift @args;
open(FILE,'>/tmp/tmp.clw');
foreach(keys(%{$seq})){
print FILE '>',"$_","\n";
print FILE ${$seq}{$_},"\n\n";
}
close(FILE);
$file="/tmp/tmp.clw";
}
else{
$file=shift @args;
}
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
system('clustalw',"$file",@param);
if($input eq 'seq'){
unlink('/tmp/tmp.clw');
if($output eq 'n'){
open(FILE,'/tmp/tmp.aln');
@aln=<FILE>;
close(FILE);
open(FILE,'/tmp/tmp.dnd');
@dnd=<FILE>;
close(FILE);
unlink('/tmp/tmp.aln');
unlink('/tmp/tmp.dnd');
return (\@aln,\@dnd);
}
else{
system('cp','/tmp/tmp.aln',"\.\/$alnfile");
system('cp','/tmp/tmp.dnd',"\.\/$dndfile");
unlink('/tmp/tmp.aln');
unlink('/tmp/tmp.dnd');
}
}
else{
if($output eq "n"){
$tmp=substr($file,0,rindex($file,'.')).'.aln';
open(FILE,$tmp);
@aln=<FILE>;
close(FILE);
unlink("$tmp");
$tmp=substr($file,0,rindex($file,'.')).'.dnd';
open(FILE,$tmp);
@dnd=<FILE>;
close(FILE);
unlink("$tmp");
return (\@aln,\@dnd);
}
}
return $file.' '.join(' ',@param); } |
sub _fasta
{ &opt_default(qr=>'off',input=>"file");
my @param;
my @tmp;
foreach(@_){
if($_ eq '-a' || $_ eq '-A' || $_ eq '-B' || $_ eq '-f' || $_ eq '-g' ||
$_ eq '-h' || $_ eq '-H' || $_ eq '-i' || $_ eq '-L' || $_ eq '-n' ||
$_ eq '-o' || $_ eq '-p' || $_ eq '-Q' || $_ eq '-q' || $_ eq '-r' ||
$_ eq '-S' || $_ eq '-1' || $_ eq '-3' ){
push(@param,$_);
}
else{
push(@tmp,$_);
}
}
@_=@tmp;
my @args=opt_get(@_);
my $qr=&opt_val("qr");
my $input=&opt_val("input");
my ($data, $file, $seq);
my %opt = opt_val();
my $param;
my $num;
$data=shift @args;
if($input eq 'seq'){
$seq=shift @args;
opendir(DIR,'/tmp');
@tmp=readdir(DIR);
$num=$#tmp+1+time;
@tmp=keys(%$seq);
open(FILE,'>/tmp/fasta_'.$num.'.seq');
print FILE ">$tmp[0]\n";
print FILE $$seq{"$tmp[0]"},"\n";
close(FILE);
$file='/tmp/fasta_'.$num.'.seq';
}
else{
$file=shift @args;
}
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
$param=join(' ',@param);
system('fasta33',"$data","$file",@param) if($qr eq "off");
system('qr',"fasta33 $data $file $param") if($qr eq "on");
unlink('/tmp/fasta_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on');
return "$data $file $param"; } |
sub _formatdb
{ &opt_default(p=>'F',o=>'T');
my @args = opt_get(@_);
my $file = shift @args;
my @param;
my %opt = opt_val();
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
system('formatdb','-i',"$file",@param);
return '-i '."$file ".join(' ',@param); } |
sub blastall
{ require LWP::UserAgent;
require G::IO;
my @argv = opt_get(@_);
my %data = opt_val();
my $program = $data{'p'} || 'blastp';
my $database = $data{'d'} || 'SWISS';
my $outfile = $data{'o'} || 'stdout';
my $infile = $data{'i'} || shift @argv;
delete($data{'p'});
delete($data{'d'});
delete($data{'o'});
delete($data{'i'});
my $query = $infile;
if (-e $infile) {
my $gb = new G::IO($infile, 'fasta', 'no msg');
$query = $gb->{SEQ};
}
my $option = '';
foreach my $key (keys %data){
$option .= '-' . $key . ' ' . $data{$key};
}
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new('POST',"http://xml.nig.ac.jp/rest/Invoke");
$request->content_type("application/x-www-form-urlencoded");
if ($program && $database && $infile) {
$request->content("service=Blast&method=searchParam&program=$program&database=$database&query=$query¶m=$option");
} else {
msg_error << "__HELP__"
blastall arguments:
-p Program Name [String] -d Database [String] default = nr -i Query File [File In] default = stdin -e Expectation value (E) [Real] default = 10.0 -m alignment view options: 0 = pairwise, 1 = query-anchored showing identities, 2 = query-anchored no identities, 3 = flat query-anchored, show identities, 4 = flat query-anchored, no identities, 5 = query-anchored no identities and blunt ends, 6 = flat query-anchored, no identities and blunt ends, 7 = XML Blast output, 8 = tabular, 9 tabular with comment lines 10 ASN, text 11 ASN, binary [Integer] default = 0 range from 0 to 11 -o BLAST report Output File [File Out] Optional default = stdout -F Filter query sequence (DUST with blastn, SEG with others) [String] default = T -G Cost to open a gap (-1 invokes default behavior) [Integer] default = -1 -E Cost to extend a gap (-1 invokes default behavior) [Integer] default = -1 -X X dropoff value for gapped alignment (in bits) (zero invokes default behavior) blastn 30, megablast 20, tblastx 0, all others 15 [Integer] default = 0 -I Show GI's in deflines [T/F] default = F -q Penalty for a nucleotide mismatch (blastn only) [Integer] default = -3 -r Reward for a nucleotide match (blastn only) [Integer] default = 1 -v Number of database sequences to show one-line descriptions for (V) [Integer] default = 500 -b Number of database sequence to show alignments for (B) [Integer] default = 250 -f Threshold for extending hits, default if zero blastp 11, blastn 0, blastx 12, tblastn 13 tblastx 13, megablast 0 [Real] default = 0 -g Perform gapped alignment (not available with tblastx) [T/F] default = T -Q Query Genetic code to use [Integer] default = 1 -D DB Genetic code (for tblast[nx] only) [Integer] default = 1 -a Number of processors to use [Integer] default = 1 -O SeqAlign file [File Out] Optional -J Believe the query defline [T/F] default = F -M Matrix [String] default = BLOSUM62 -W Word size, default if zero (blastn 11, megablast 28, all others 3) [Integer] default = 0 -z Effective length of the database (use zero for the real size) [Real] default = 0 -K Number of best hits from a region to keep (off by default, if used a value of 100 is recommended) [Integer] default = 0 -P 0 for multiple hit, 1 for single hit (does not apply to blastn) [Integer] default = 0 -Y Effective length of the search space (use zero for the real size) [Real] default = 0 -S Query strands to search against database (for blast[nx], and tblastx) 3 is both, 1 is top, 2 is bottom [Integer] default = 3 -T Produce HTML output [T/F] default = F -l Restrict search of database to list of GI's [String] Optional -U Use lower case filtering of FASTA sequence [T/F] Optional -y X dropoff value for ungapped extensions in bits (0.0 invokes default behavior) blastn 20, megablast 10, all others 7 [Real] default = 0.0 -Z X dropoff value for final gapped alignment in bits (0.0 invokes default behavior) blastn/megablast 50, tblastx 0, all others 25 [Integer] default = 0 -R PSI-TBLASTN checkpoint file [File In] Optional -n MegaBlast search [T/F] default = F -L Location on query sequence [String] Optional -A Multiple Hits window size, default if zero (blastn/megablast 0, all others 40 [Integer] default = 0 -w Frame shift penalty (OOF algorithm for blastx) [Integer] default = 0 -t Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments. (0 invokes default behavior; a negative value disables linking.) [Integer] default = 0 -B Number of concatenated queries, for blastn and tblastn [Integer] Optional default = 0 -V Force use of the legacy BLAST engine [T/F] Optional default = F -C Use composition-based statistics for blastp or tblastn: As first character: D or d: default (equivalent to T) 0 or F or f: no composition-based statistics 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 2: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally For programs other than tblastn, must either be absent or be D, F or 0. As second character, if first character is equivalent to 1, 2, or 3: U or u: unified p-value combining alignment p-value and compositional p-value in round 1 only [String] default = D -s Compute locally optimal Smith-Waterman alignments (This option is only available for gapped tblastn.) [T/F] default = F
__HELP__
msg_error "Displays information on the currently available databases\n";
msg_error "=========================================================\n";
$request->content("service=Blast&method=getSupportDatabaseList");
}
my $response;
my $callback = sub {
for (@_) {
msg_send($_) unless ref($_) =~ /HTTP::/ || /LWP::/;
}
};
if ($outfile && lc($outfile) ne "stdout") {
$response = $ua->request( $request,$outfile );
} else {
$response = $ua->request( $request, $callback, 30000 );
} } |
sub blat
{ my @argv = opt_get(@_);
my $db = shift @argv;
my $tmp = shift @argv;
my %data = opt_val();
if(-e $tmp){
}else{
my $seq = opt_as_gb($tmp);
my($fh, $fname) = tempfile();
to_fasta($seq, -filename=>$fname, -output=>"f");
$tmp = $fname;
}
my $option = '';
foreach my $key (keys %data){
next if($key =~ /[qidentity|tidentity]/);
$option .= "-$key=$data{$key} ";
}
my @result;
foreach my $line (`blat $db $tmp stdout $option`){
chomp($line);
next unless($line =~ /^\d/);
my @res = split (/\t/, $line);
if($data{qidentity}){
next unless($res[0]/$res[10] >= $data{qidentity}/100);
}
if($data{tidentity}){
next unless($res[0]/$res[14] >= $data{tidentity}/100);
}
push(@result, [@res]);
}
return @result; } |