G::Tools
Blast
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
_blast | No description | Code |
_gblaster | No description | Code |
new | No description | Code |
Methods description
Methods code
sub _blast
{ &opt_default(p=>'blastn',qr=>'off',input=>"file");
my @args=opt_get(@_);
my $qr=&opt_val("qr");
my $input=&opt_val("input");
my $seq;
my @param;
my $param;
my %opt;
my @tmp;
my $num;
$opt{d}=shift @args;
if($input eq 'seq'){
$seq=shift @args;
opendir(DIR,'/tmp');
@tmp=readdir(DIR);
$num=$##tmp+1+time;
@tmp=keys(%$seq);
close(DIR);
open(FILE,'>/tmp/blast_'.$num.'.seq');
print FILE ">$tmp[0]\n";
print FILE $$seq{$tmp[0]},"\n";
close(FILE);
$opt{i}='/tmp/blast_'.$num.'.seq';
}
else{
$opt{i}=shift @args;
}
$opt{p}=opt_val("p");
$opt{e}=opt_val("e");
$opt{m}=opt_val("m");
$opt{o}=opt_val("o");
$opt{F}=opt_val("F");
$opt{G}=opt_val("G");
$opt{E}=opt_val("E");
$opt{X}=opt_val("X");
$opt{I}=opt_val("I");
$opt{q}=opt_val("q");
$opt{r}=opt_val("r");
$opt{v}=opt_val("v");
$opt{b}=opt_val("b");
$opt{f}=opt_val("f");
$opt{g}=opt_val("g");
$opt{Q}=opt_val("Q");
$opt{D}=opt_val("D");
$opt{a}=opt_val("a");
$opt{J}=opt_val("J");
$opt{M}=opt_val("M");
$opt{W}=opt_val("W");
$opt{z}=opt_val("z");
$opt{K}=opt_val("K");
$opt{P}=opt_val("P");
$opt{Y}=opt_val("Y");
$opt{S}=opt_val("S");
$opt{T}=opt_val("T");
$opt{U}=opt_val("U");
$opt{y}=opt_val("y");
$opt{Z}=opt_val("Z");
$opt{n}=opt_val("n");
$opt{A}=opt_val("A");
$opt{w}=opt_val("w");
$opt{t}=opt_val("t");
$opt{L}=opt_val("L");
$opt{O}=opt_val("O");
$opt{l}=opt_val("l");
$opt{R}=opt_val("R");
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
$param=join(' ',@param);
system('blastall',@param) if($qr eq "off");
system('qr',"blastall $param") if($qr eq "on");
unlink('/tmp/blast_'.$num.'.seq') if($input eq 'seq' && $qr ne 'on');
return $param;
}
sub _formatdb{
&opt_default(p=>'F',o=>'T');
my @args=opt_get(@_);
my $file=shift @args;
my @param;
my %opt;
$opt{p}=opt_val('p');
$opt{o}=opt_val('o');
$opt{t}=opt_val('t');
$opt{l}=opt_val('l');
$opt{a}=opt_val('a');
$opt{b}=opt_val('b');
$opt{e}=opt_val('e');
$opt{n}=opt_val('n');
$opt{v}=opt_val('v');
$opt{s}=opt_val('s');
$opt{V}=opt_val('V');
$opt{A}=opt_val('A');
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@param,'-'.$_);
push(@param,$opt{$_});
}
system('formatdb','-i',"$file",@param);
return '-i '."$file ".join(' ',@param);
}
sub _blast_tp_finder{
&opt_default(output => 'file',outfilename=>'parsed_blast_list',header => 'on');
my @args = opt_get(@_);
my $file=shift;
my $output = opt_val('output');
my $outfilename = opt_val('outfilename');
my $header = opt_val('header');
my @id_query;
my $query_name;
my @query;
my @query_num;
my $s1=0;
my @query_num_sort;
my $query_start;
my $query_end;
my @sbject_num;
my @sbject_num_sort;
my $sbject_start;
my $sbject_end;
my $length;
my @identities;
my $subject_name;
my @strand;
my @sbject;
my @infile;
my @score;
my $evalue;
my $this={};
my $p=0;
my $redundancy='';
open(OUTFILE,'>'.$outfilename) if($output eq 'file');
open(FILE,"$file");
while(<FILE>){
## if(/Query=/){
## @id_query=split(/\s/,$_);
## $_=~s/Query= //;
## $_=~tr/\n//d;
## $query_name=$_;
## msg_send("$query_name\n");
## }
if(/Query= (\d+)_(\d+)_(.+)/){
$query_name=$3;
$query_name=~tr/\n//d;
$redundancy = $1;
msg_send("$redundancy\n");
}
if(/^>/){
$s1=0;
$subject_name=$_;
$subject_name=~tr/>//d;
$subject_name=~tr/\n//d;
msg_send("$redundancy".'_vs_'."$subject_name\n");
print OUTFILE "$redundancy".'_vs_'."$subject_name\n" ;
next if($redundancy == $subject_name);
}
&msg_send("\n\tEVALUE\tQUERY\tDATABASE\tLENGTH\tQUERY_START\tQUERY_END\tDATABASE_START\tDATABASE_END\tMATCH\tIDENTITY\tSTRAND\n-------------------------------------------------------------------------------------------------------\n") if ($header eq 'on' && $output eq 'STDOUT' && $s1 == 0);
print OUTFILE "\n\tEVALUE\tQUERY\tDATABASE\tLENGTH\tQUERY_START\tQUERY_END\tDATABASE_START\tDATABASE_END\tMATCH\tIDENTITY\tSTRAND\n-------------------------------------------------------------------------------------------------------\n" if($header eq 'on' && $output eq 'file');
if(/Length =(.+)/){
$s1 = 1;
$length = $1;
$length =~tr/\n//d;
}
if(/Identities =/ && $s1 ==1){
@identities = split(/\s/,$_);
}
if(/Strand/ && $s1 ==1){
@strand=split(/\s/,$_);
}
if(/Query:/){
@query=split(/[\s]+/,$_);
push(@query_num,$query[1]);
push(@query_num,$query[3]);
$s1=1;
}
if(/Score / || />/ || /Lambda/){
$this->{$p}->{"evalue"} = $evalue;
&msg_send("$evalue") if($output eq 'STDOUT');
print OUTFILE "$evalue" if($output eq 'file');
@score=split(/Expect/,$_);
$evalue=$score[1];
$evalue=~tr/=//d;
$evalue=~tr/\n//d;
@query_num_sort=sort {$b<=>$a} @query_num;
$query_start=pop @query_num_sort;
$query_start=~tr/\s//;
$query_end=shift @query_num_sort;
$query_end=~tr/\s//;
@sbject_num_sort=sort {$b<=>$a} @sbject_num;
$sbject_start=pop @sbject_num_sort;
$sbject_start=~tr/\s//;
$sbject_end=shift @sbject_num_sort;
$sbject_end=~tr/\s//;
$identities[4] =~ tr/\s\(\)//d;
if($query_start!=""){
$this->{$p}->{"query_name"} = $query_name;
$this->{$p}->{"subject_name"} = $subject_name;
$this->{$p}->{"length"} = $length;
$this->{$p}->{"query_start"} = $query_start;
$this->{$p}->{"query_end"} = $query_end;
$this->{$p}->{"subject_start"} = $sbject_start;
$this->{$p}->{"subject_end"} = $sbject_end;
$this->{$p}->{"identity"} = $identities[3];
$this->{$p}->{"percentage"} = $identities[4];
$this->{$p}->{"strand"} = $strand[5];
&msg_send("\t$query_name\t$subject_name\t$length\t$query_start\t$query_end\t$sbject_start\t$sbject_end\t$identities[3]\t$identities[4]\t$strand[5]\n") if ($output eq 'STDOUT');
print OUTFILE "\t$query_name\t$subject_name\t$length\t$query_start\t$query_end\t$sbject_start\t$sbject_end\t$identities[3]\t$identities[4]\t$strand[5]\n" if ($output eq 'file');
$p++;
}
@query=();
@query_num=();
@sbject=();
@sbject_num=();
@identities=();
## msg_send('.');
}
if($s1==1){
if(/Sbjct:/){
@sbject=split(/\s+/,$_);
push(@sbject_num,$sbject[1]);
push(@sbject_num,$sbject[3]);
}
}
}
return $this;
close(FILE);
close(OUTFILE);
}
sub mapping_blast2{
msg_send("####################BLAST_SEARCHSTART#####################\n");
&opt_default(clustering=>"off", p=>"blastn",database=>"genome_file",query=>"cDNA_dir\/",
makedir=>"Blast_result\/",resultfile=>"blastresultlist");
### my args
my @args=opt_get(@_);
my $clustering=&opt_val("clustering");
my $p=&opt_val("p");
my $database=&opt_val("database");
my $query=&opt_val("query");
my $makedir=&opt_val("makedir");
my $resultfile=&opt_val("resultfile");
###Using Blast
my %opt;
my($num,$clusterdata,$count,$factory,$paramnum,$blast_report,$item,$item2);
my(@cdnapass,@readpart,@selectedcdna,@blastoptname,@param,@params);
###構造体
my($cdnaid,$parseresult1,$parseresult2,$icount,$jcount,$kcount,$queryplace,$memory);
my(@blastresultfiles,@blastdata,@genomeid);
if($clustering ne 'off'){
open(FILE, $clustering);
$num=0;
while(<FILE>){
if($_=~ /^[a-zA-Z0-9]/){
if($_=~ /,/){
$queryplace=$query;
$clusterdata=$_;
@readpart= split(/,/, $clusterdata);
@cdnapass[$num]="$query"."$readpart[0]".".seq";
$selectedcdna[$num]="$readpart[0]".".seq";
chomp @cdnapass[$num];
$num ++;
}
}
}
close FILE;
}
if($clustering eq 'off'){
if(-f $query){
$cdnapass[0]=$query;
$selectedcdna[0]=~ s/\s+\///g;
$num=1;
}
else{
opendir(R_DIR,"$query");
$queryplace=$query;
@selectedcdna=readdir(R_DIR);
$num=@selectedcdna;
if($selectedcdna[0]=~ /\./){
shift(@selectedcdna);
$num --;
}
if($selectedcdna[0]=~ /\.\./){
shift(@selectedcdna);
$num --;
}
$count=0;
foreach $item2(@selectedcdna){
$cdnapass[$count]="$query"."$item2";
$count++;
}
closedir(R_DIR);
}
}
$opt{p}=opt_val("p");
$opt{e}=opt_val("e");
$opt{m}=opt_val("m");
$opt{o}=opt_val("o");
$opt{F}=opt_val("F");
$opt{G}=opt_val("G");
$opt{E}=opt_val("E");
$opt{X}=opt_val("X");
$opt{I}=opt_val("I");
$opt{q}=opt_val("q");
$opt{r}=opt_val("r");
$opt{v}=opt_val("v");
$opt{b}=opt_val("b");
$opt{f}=opt_val("f");
$opt{g}=opt_val("g");
$opt{Q}=opt_val("Q");
$opt{D}=opt_val("D");
$opt{a}=opt_val("a");
$opt{J}=opt_val("J");
$opt{M}=opt_val("M");
$opt{W}=opt_val("W");
$opt{z}=opt_val("z");
$opt{K}=opt_val("K");
$opt{P}=opt_val("P");
$opt{Y}=opt_val("Y");
$opt{S}=opt_val("S");
$opt{T}=opt_val("T");
$opt{U}=opt_val("U");
$opt{y}=opt_val("y");
$opt{Z}=opt_val("Z");
$opt{n}=opt_val("n");
$opt{A}=opt_val("A");
$opt{w}=opt_val("w");
$opt{t}=opt_val("t");
$opt{L}=opt_val("L");
$opt{O}=opt_val("O");
$opt{l}=opt_val("l");
$opt{R}=opt_val("R");
foreach(sort keys(%opt)){
next if($opt{$_} eq '');
push(@blastoptname,$_);
push(@param,$opt{$_});
}
unless($makedir=~ /none/){
system("mkdir $makedir");
}
for($count=0;$count<$num;$count++){
msg_send("Searching_start: NO $count\n ") ;
if($makedir=~ /none/){
@params = ('program' => "$p",'database' => "$database",'outfile' => "$selectedcdna[$count]".".out", '_READMETHOD' => 'Blast');
}
else{
@params = ('program' => "$p",'database' => "$database",'outfile' => $makedir."/$selectedcdna[$count]".".out", '_READMETHOD' => 'Blast');
}
$factory = Bio::Tools::Run::StandAloneBlast->new(@params);
$paramnum=0;
foreach $item(@blastoptname){
$factory->$item($param[$paramnum]);
$paramnum ++;
}
msg_send("$cdnapass[$count]\n");
$blast_report = $factory->blastall($cdnapass[$count]);
msg_send("finish\n");
}
###############################################################
##################BLAST PARSER #######################
###############################################################
opendir(R_DIR,"$makedir");
@blastresultfiles=readdir(R_DIR);
$num=@blastresultfiles;
if($blastresultfiles[0]=~ /\./){
shift(@blastresultfiles);
$num --;
}
if($blastresultfiles[0]=~ /\.\./){
shift(@blastresultfiles);
$num --;
}
print "###################BLAST_PARSING_START####################\n";
for($count=0;$count<$num;$count++){
&blastparser(-open=>"$makedir"."$blastresultfiles[$count]" ,-resultfile=>"$resultfile")
}
$count=0;
$icount=-1;
$kcount=0;
$jcount=0;
open(FILE9, "$resultfile");
while(<FILE9>){
if($_=~ /=/){
$icount++;
$jcount=0;
$kcount=0;
$parseresult1=<FILE9>;
chomp $parseresult1;
@genomeid=split(/\t/,$parseresult1);
$genomeid[0]=~ s/>//g;
$genomeid[0]=~ s/ +//g;
$parseresult2=<FILE9>;
chomp $parseresult2;
@blastdata=split(/\t/,$parseresult2);
$memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]";
$memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0];
$memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1];
$memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9];
$memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11];
$memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13];
$memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database;
$count++;
}
if($_=~ />/){
$jcount++;
$kcount=0;
chomp $_;
$parseresult1=$_;
chomp $parseresult1;
@genomeid=split(/\t/,$parseresult1);
$genomeid[0]=~ s/>//g;
$genomeid[0]=~ s/ +//g;
$parseresult2=<FILE9>;
chomp $parseresult2;
@blastdata=split(/\t/,$parseresult2);
$memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]";
$memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0];
$memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1];
$memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9];
$memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11];
$memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13];
$memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database;
$kcount++;
$count
}
if($_=~ /^[0-9]/){
$kcount++;
$parseresult2=$_;
chomp $parseresult2;
@blastdata=split(/\t/,$parseresult2);
$memory->{$icount}->{$jcount}->{$kcount}->{query_name} = "$queryplace"."$selectedcdna[$count]";
$memory->{$icount}->{$jcount}->{$kcount}->{database_name} = $genomeid[0];
$memory->{$icount}->{$jcount}->{$kcount}->{database_bits} = $genomeid[1];
$memory->{$icount}->{$jcount}->{$kcount}->{strand} = $blastdata[9];
$memory->{$icount}->{$jcount}->{$kcount}->{database_start} = $blastdata[11];
$memory->{$icount}->{$jcount}->{$kcount}->{database_end} = $blastdata[13];
$memory->{$icount}->{$jcount}->{$kcount}->{database_place}= $database;
}
}
close FILE9;
msg_send("finish\n");
return $memory;
}
sub blastcutting{
&opt_default(basenum=>"30000",distance=>"50000", open=>"none", multicut=>"on",multidir=>"multientry\/",upper=>"0",logfilename=>"blastcut.log",resultdir=>"blastcut\/" );
my @args=opt_get(@_);
my $multidir=&opt_val("multidir");
my $basenum=&opt_val("basenum");
my $open=&opt_val("open");
my $upper=&opt_val("upper");
my $logfilename=&opt_val("logfilename");
my $resultdir=&opt_val("resultdir");
my $multicut=&opt_val("multicut");
my $distance=&opt_val("distance");
my $this = shift @args;
my %opt;
my($count,$acount,$bcount,$ccount,$hcount,$icount,$flag,$flag2,$entryname,$multigene,$tmp,$genomenum);
my($cutsentence,$sentence1,$sentence2,$cDNA_dir);
my(@hinum,@strand,@database_start,@database_end,@database_name,@database_bits,@query_name);
my(@max,@max_start,@max_end,@cDNA_pass);
$flag=1;
$flag2=1;
my $memory;
msg_send("#####################GENOME_CUT_START#####################\n");
msg_send("DEVIDE FASTA FILE...\n");
$hcount=0;
$count=0;
$acount=0;
$icount=0;
if($multicut eq "on"){
system("mkdir $multidir");
if($open=~ /none/){
open(FILE,"$this->{'0'}->{'0'}->{'0'}->{database_place}");
}
else{
open(FILE,"$open");
}
while(<FILE>){
if($_=~ />/){
if($flag=0){
close W_FILE;
$flag=1;
}
$entryname=$_;
chomp $entryname;
$entryname=~ s/>//g;
$entryname=~ s/ //g;
open(W_FILE,">$multidir"."$entryname");
print W_FILE ">$entryname\n";
$flag=0;
}
else{
$multigene=$_;
$multigene=~ s/\r\n/\n/g;
chomp $multigene;
$multigene=~ s/[^ATGCatgcNn]/N/g;
print W_FILE $multigene;
}
}
close FILE;
close W_FILE;
}
$flag=1;
while($flag==1){
$flag2=1;
while($flag2==1){
if($this->{$hcount}->{$icount}->{$count}->{strand}=~ /s/ ){
$strand[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{strand};
$database_start[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_start};
$database_end[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_end};
$database_name[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_name};
$query_name[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{query_name};
$database_bits[$hcount][$icount][$count]=$this->{$hcount}->{$icount}->{$count}->{database_bits};
$count ++;
}
else{
$max[$hcount][$icount]=$count;
$icount++;
$count=0;
unless($this->{$hcount}->{$icount}->{$count}->{strand}=~ /[PM]/ ){
$flag2=0;
}
}
}
$hinum[$hcount]=$icount;
$hcount ++;
$icount=0;
unless($this->{$hcount}->{$icount}->{$count}->{strand}=~ /[PM]/ ){
$flag=0;
}
}
for($ccount=0;$ccount<$hcount;$ccount++){
for($bcount=0;$bcount<$hinum[$ccount];$bcount++){
if($strand[$ccount][$bcount][0] eq "Plus"){
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][0];
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][0];
for($acount=0;$acount<$max[$ccount][$bcount];$acount++){
if($strand[$ccount][$bcount][$acount] eq "Plus"){
if($max_start[$ccount][$bcount]>$database_start[$ccount][$bcount][$acount] && $max_start[$ccount][$bcount]-$database_start[$ccount][$bcount][$acount]+1<$distance ){
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][$acount];
}
if($max_end[$ccount][$bcount]<$database_end[$ccount][$bcount][$acount] && $database_end[$ccount][$bcount][$acount]-$max_end[$ccount][$bcount]+1<$distance){
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][$acount];
}
}
}
}
else{
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][0];
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][0];
for($acount=0;$acount<$max[$ccount][$bcount];$acount++){
if($strand[$ccount][$bcount][$acount] eq "Minus"){
if($max_start[$ccount][$bcount]<$database_start[$ccount][$bcount][$acount] && $database_start[$ccount][$bcount][$acount] - $max_start[$ccount][$bcount]+1<$distance){
$max_start[$ccount][$bcount]=$database_start[$ccount][$bcount][$acount];
}
if($max_end[$ccount][$bcount]>$database_end[$ccount][$bcount][$acount] && $max_end[$ccount][$bcount]-$database_end[$ccount][$bcount][$acount] +1 <$distance){
$max_end[$ccount][$bcount]=$database_end[$ccount][$bcount][$acount];
}
}
}
$tmp=$max_start[$ccount][$bcount];
$max_start[$ccount][$bcount]=$max_end[$ccount][$bcount];
$max_end[$ccount][$bcount]=$tmp;
}
}
}
msg_send("PRINTING SEQUENCE...\n");
$cDNA_dir=$query_name[0][0][0];
$cDNA_dir=~ s/=//g;
@cDNA_pass=split(/\//,$cDNA_dir);
pop(@cDNA_pass);
$cDNA_dir=join('/',@cDNA_pass);
open(ROG,">$logfilename");
print ROG "cDNA: $cDNA_dir\n";
print ROG "Genome: $resultdir\n\n";
system("mkdir $resultdir");
for($ccount=0;$ccount<$hcount;$ccount++){
if($upper==0){$upper=$hinum[$ccount];}
if($upper>$hinum[$ccount]){$upper=$hinum[$ccount];}
for($bcount=0;$bcount<$upper;$bcount++){
print ROG "$query_name[$ccount][$bcount][0]\n";
$memory->{$ccount}->{Query_Path}->{$bcount} ="$query_name[$ccount][$bcount][0]";
$memory->{$query_name[$ccount][$bcount][0]}->{Query_Path}->{$bcount} ="$query_name[$ccount][$bcount][0]";
open(FILE,"$multidir"."$database_name[$ccount][$bcount][0]");
while(<FILE>){
if($_=~ />/){
$sentence1=$_;
$sentence2=<FILE>;
}
}
chomp $sentence2;
$genomenum=length($sentence2);
$max_start[$ccount][$bcount]=$max_start[$ccount][$bcount]-$basenum;
$max_end[$ccount][$bcount]=$max_end[$ccount][$bcount]+$basenum;
if($max_start[$ccount][$bcount]<1){$max_start[$ccount][$bcount]=1;}
if($max_end[$ccount][$bcount]>$genomenum){$max_end[$ccount][$bcount]=$genomenum;}
print ROG "$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]\n\n";
$memory->{$ccount}->{Genome_Path}->{$bcount} ="$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]";
$memory->{$query_name[$ccount][$bcount][0]}->{Genome_Path}->{$bcount} ="$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]";
$memory->{$ccount}->{Genome_Start}->{$bcount} = $max_start[$ccount][$bcount];
$memory->{$query_name[$ccount][$bcount][0]}->{Genome_Start}->{$bcount} = $max_start[$ccount][$bcount];
$memory->{$ccount}->{Genome_End}->{$bcount} =$max_end[$ccount][$bcount];
$memory->{$query_name[$ccount][$bcount][0]}->{Genome_End}->{$bcount} =$max_end[$ccount][$bcount];
open(W_FILE,">$resultdir"."$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]");
$cutsentence=substr($sentence2,$max_start[$ccount][$bcount]-1,$max_end[$ccount][$bcount]-$max_start[$ccount][$bcount]);
print W_FILE ">$database_name[$ccount][$bcount][0]_"."$max_start[$ccount][$bcount]_"."$max_end[$ccount][$bcount]\n";
print W_FILE "$cutsentence";
close FILE;
close W_FILE;
}
}
$memory->{Genome_ID}=$multidir;
msg_send("BLAST_COMPLETE\n");
return $memory;
}
sub _post_blast_clusterer{
&opt_default(e=>'e-100',filename=>'blast_clusters.tmp',output=>'f');
my @args = &opt_get(@_);
my $file = shift @args;
my $eval = &opt_val('e');
my $filename = &opt_val('filename');
my $output = &opt_val('output');
my $clstlist;
my $this = {};
my $i = 0;
my ($query,$subject,$tmp,$clstlist);
my @list = ();
my @clusters = ();
my %red;
open(FILE,$file);
open(OUTFILE,">$filename");
while(<FILE>){
if(/^=(.+)/){
if($i > 0){
$clstlist = join(',',@list);
push (@clusters,$clstlist);
print OUTFILE ("$clstlist\n")if($output eq 'f');
}
$i++;
@list = ();
$clstlist = '';
$query = $1;
push(@list,$query);
}
elsif(/^>(.+)\s+([0-9]+)/){
$subject = $1;
$subject =~ s/\s\n//g;
next if($subject eq $query);
while(<FILE>){
if(/^.+\s+[0-9]+\s+(.+)\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+(.+)\s+([0-9])+\s+([0-9])+\s+([0-9])+\s+([0-9])+/){
if($1 <= $eval){
push(@list,$subject);
msg_send(".");
$this->{$query}->{$subject}->{strand} = $2;
$this->{$query}->{$subject}->{qstart} = $3;
$this->{$query}->{$subject}->{sstart} = $4;
$this->{$query}->{$subject}->{qend} = $5;
$this->{$query}->{$subject}->{send} = $6;
}
}
last;
}
}
}
$clstlist = join(',',@list);
push (@clusters,$clstlist);
print OUTFILE ("$clstlist\n")if($output eq 'f');
@list = ();
close(FILE);
close(OUTFILE);
return @clusters;
}
sub _list_clusterer {
&opt_default(filename=>'cluster_list',sort=>'on',split=>',');
&opt_get(@_);
my $input = shift;
my $filename = opt_val('filename');
my $sort = opt_val('sort');
my $split = opt_val('split');
my @clusters=();
my @array=();
my ($cluster,$new,$key);
my $a;
my $c=1;
my $i=0;
my $key;
my %hash;
open(INFILE,"$input");
open(OUTFILE,">$filename");
open(TMP,">tmp.txt");
while(<INFILE>){
chomp;
@array = split(/$split/);
$cluster = $c;
$new = 1;
msg_send(".");
foreach $key (sort keys %hash){
foreach $a (@array){
$a =~ s/\s//g;
if($key eq $a){
$cluster = $hash{$key};
$new = 0;
last;
}
}
if($new == 0){
last;
}
}
if($new == 1){
foreach $a (@array){
$hash{$a} = $c;
}
$c++;
print TMP $c,"\n";
}
else{
foreach $a (@array){
$hash{$a} = $cluster;
}
print TMP $cluster, "\n";
}
}
close(INFILE);
for($i=1;$i<=$c;$i++){
foreach $key (keys %hash){
if($hash{$key} == $i){
print OUTFILE $key, ",";
}
}
print OUTFILE "\n";
}
close(INFILE);
close(OUTFILE);
_list_sorter("$filename",-filename=>"$filename.sort",-split=>"$split") if ($sort ne 'off');
}
sub _list_sorter {
&opt_default(split=>',',filename=>"sort_list");
my @args = &opt_get(@_);
my $input = shift @args;
my $filename = &opt_val('filename');
my $split = &opt_val('split');
my $list;
my (@array,@narray,@flist);
open(IN,"$input");
open(OUT,">$filename");
while(<IN>){
chomp;
@array = split(/$split/);
@narray = ();
foreach(sort @array){
$_ =~ s/\s//g;
push(@narray,$_)if($_ ne '');
}
if(scalar @narray > 1){
$list = join(',',@narray);
}else{
$list = $_;
}
push(@flist,$list);
print OUT "$list\n";
}
close(IN);
close(OUT);
return(@flist);
foreach(@flist){
msg_send("$_\n");
}
}
sub blastparser{
&opt_default(open=>"result" ,filename=>"blastresultlist");
my @args=opt_get(@_);
my $open=&opt_val("open");
my $resultfile=&opt_val("filename");
my $this = shift @args;
my $seq;
my $param;
my @param;
my %opt;
my @blastoptname;
my $sentence; ###
my $sentence1; ###
my $sentence2; ###
my $sentence3; ###
my $sentence4; ###
my $sentence5; ###
my $sentence20; ###
my $lA; ###
my $lB; ###
my $lC; ###
my $lD; ###
my $lE; ###
my $lF; ###
my $lG;
my @lva; ###
my @lvb; ###
my @sentenceA; ###
my @sentenceB; ###
my @sentenceC; ###
my $sentenceem; ###
my @partse; ###
my @partse2; ###
my @partig; ###
my @partig2; ###
my @partig3; ###
my @partig12; ###
my @partig13; ###
open(FILE, $open);
open(FILE2, ">>$resultfile");
print FILE2 "QUERY\n";
print FILE2 "DATABASE\t";
print FILE2 "DATABASE_ALLBITS\n";
print FILE2 "SCOREBITS\t";
print FILE2 "SCORE\t";
print FILE2 "EXPECT\t";
print FILE2 "IDENTITIES(A\/\t";
print FILE2 "B)\t";
print FILE2 "%MATCH\t";
print FILE2 "GAP(A\/\t";
print FILE2 "B)\t";
print FILE2 "%GAP\t";
print FILE2 "STRAND\t";
print FILE2 "QUERY_START\t";
print FILE2 "SBJCT_START\t";
print FILE2 "QUERY_END\t";
print FILE2 "SBJCT_END\t\n";
print FILE2 "---------------------------------------------------------------------------------------------\n";
$lA=0;
$lB=0;
$lC=0;
$lD=0;
$lE=0;
$lF=0;
$lG=0;
while(<FILE>){
if($_=~ /Query= /){
$sentence=$_;
$sentence=~ s/Query= //g;
print FILE2 "=$sentence";
}
if($_=~ />/ || $sentence3 =~ />/){
if($_=~ />/){
chomp $_;
print FILE2 "$_\t";###h
$sentence20=<FILE>;
chomp $sentence20;
$sentence20=~ s/ //g;
$sentence20=~ s/Length=//g;
print FILE2 "$sentence20\n"; ###h
}
if($sentence3 =~ />/){
chomp $sentence3;
print FILE2 "$sentence3\t"; ###h
chomp $sentence20;
$sentence20=~ s/ //g;
$sentence20=~ s/Length=//g;
print FILE2 "$sentence20\n"; ###h
$sentence3=$sentenceem;
}
}
if($_=~ /^ Score/ || $sentence4 =~ /Score/){
if($_=~ /Score/){
$sentenceA[$lA]=$_;
chomp $sentenceA[$lA];
@partse=split(/,/,$sentenceA[$lA]);
$partse[0]=~ s/ +//g;
$partse[0]=~ s/Score=//g;
$partse[0]=~ s/bits//g;
$partse[0]=~ s/\)//g;
@partse2=split(/\(/,$partse[0]);
$partse[1]=~ s/ +//g;
$partse[1]=~ s/Expect=//g;
print FILE2 "$partse2[0]\t";
print FILE2 "$partse2[1]\t";
print FILE2 "$partse[1]\t";
$lA++;
$sentenceA[$lA]=<FILE>;
chomp $sentenceA[$lA];
@partig=split(/,/,$sentenceA[$lA]);
$partig[0]=~ s/ +//g;
$partig[0]=~ s/Identities=//g;
$partig[0]=~ s/%\)//g;
@partig2=split(/\//,$partig[0]);
print FILE2 "$partig2[0]\t";
@partig3=split(/\(/,$partig2[1]);
print FILE2 "$partig3[0]\t";
print FILE2 "$partig3[1]\t";
$partig[1]=~ s/ +//g;
$partig[1]=~ s/Gaps=//g;
$partig[1]=~ s/%\)//g;
@partig12=split(/\//,$partig[1]);
@partig13=split(/\(/,$partig12[1]);
if($partig[1]=~ /[0-9]/){
print FILE2 "$partig12[0]\t";
print FILE2 "$partig13[0]\t";
print FILE2 "$partig13[1]\t";
}
else{
print FILE2 "0\t";
print FILE2 "0\t";
print FILE2 "0\t";
}
$lA++;
$sentenceA[$lA]=<FILE>;
chomp $sentenceA[$lA];
$sentenceA[$lA]=~ s/ //g;
$sentenceA[$lA]=~ s/Strand=//g;
$sentenceA[$lA]=~ s/Plus\///g;
print FILE2 "$sentenceA[$lA]\t";
$lA++;
}
if($sentence4=~ /Score/){
$sentenceA[$lA]=$sentence4;
chomp $sentenceA[$lA];
@partse=split(/,/,$sentenceA[$lA]);
$partse[0]=~ s/ +//g;
$partse[0]=~ s/Score=//g;
$partse[0]=~ s/bits//g;
$partse[0]=~ s/\)//g;
@partse2=split(/\(/,$partse[0]);
$partse[1]=~ s/ +//g;
$partse[1]=~ s/Expect=//g;
print FILE2 "$partse2[0]\t";
print FILE2 "$partse2[1]\t";
print FILE2 "$partse[1]\t";
$lA++;
$sentenceA[$lA]=$_;
chomp $sentenceA[$lA];
@partig=split(/,/,$sentenceA[$lA]);
$partig[0]=~ s/ +//g;
$partig[0]=~ s/Identities=//g;
$partig[0]=~ s/%\)//g;
@partig2=split(/\//,$partig[0]);
print FILE2 "$partig2[0]\t";
@partig3=split(/\(/,$partig2[1]);
print FILE2 "$partig3[0]\t";
print FILE2 "$partig3[1]\t";
$partig[1]=~ s/ +//g;
$partig[1]=~ s/Gaps=//g;
$partig[1]=~ s/%\)//g;
@partig12=split(/\//,$partig[1]);
@partig13=split(/\(/,$partig12[1]);
if($partig[1]=~ /[0-9]/){
print FILE2 "$partig12[0]\t";
print FILE2 "$partig13[0]\t";
print FILE2 "$partig13[1]\t";
}
else{
print FILE2 "0\t";
print FILE2 "0\t";
print FILE2 "0\t";
}
$lA++;
$sentenceA[$lA]=<FILE>;
chomp $sentenceA[$lA];
$sentenceA[$lA]=~ s/ //g;
$sentenceA[$lA]=~ s/Strand=//g;
$sentenceA[$lA]=~ s/Plus\///g;
print FILE2 "$sentenceA[$lA]\t";
$lA++;
$sentence4=$sentenceem;
}
}
if($_=~ /Query:/){
$sentenceB[$lB]=$_;
$lB ++;
$sentence1=<FILE>;
$sentence2=<FILE>;
$sentenceC[$lC]=$sentence2;
$lC++;
while(<FILE>){
if($_=~ /Query:/){
$sentenceB[$lB]=$_;
###print $sentenceB[$lB];
$lB ++;
$sentence1=<FILE>;
$sentence2=<FILE>;
$sentenceC[$lC]=$sentence2;
$lC++;
}
if($_=~ />/ || $_ =~ /Score/ || $_ =~ / Database/){
if($_=~ />/){
$sentence3=$_;
@lva=split(/\s+/,$sentenceB[$lD]);
@lvb=split(/\s+/,$sentenceC[$lE]);
print FILE2 "$lva[1]\t";
print FILE2 "$lvb[1]\t";
@lva=split(/\s+/,$sentenceB[$lB-1]);
@lvb=split(/\s+/,$sentenceC[$lC-1]);
print FILE2 "$lva[3]\t";
print FILE2 "$lvb[3]\t\n";
$lD=$lB;
$lE=$lC;
$sentence20=<FILE>;
last;
}
if($_=~ /Score/){
$sentence4=$_;
@lva=split(/\s+/,$sentenceB[$lD]);
@lvb=split(/\s+/,$sentenceC[$lE]);
print FILE2 "$lva[1]\t";
print FILE2 "$lvb[1]\t";
@lva=split(/\s+/,$sentenceB[$lB-1]);
@lvb=split(/\s+/,$sentenceC[$lC-1]);
print FILE2 "$lva[3]\t";
print FILE2 "$lvb[3]\t\n";
$lD=$lB;
$lE=$lC;
last;
}
if($_=~ /Database/){
$sentence5=$_;
@lva=split(/\s+/,$sentenceB[$lD]);
@lvb=split(/\s+/,$sentenceC[$lE]);
print FILE2 "$lva[1]\t";
print FILE2 "$lvb[1]\t";
@lva=split(/\s+/,$sentenceB[$lB-1]);
@lvb=split(/\s+/,$sentenceC[$lC-1]);
print FILE2 "$lva[3]\t";
print FILE2 "$lvb[3]\t\n";
$lD=$lB;
$lE=$lC;
last;
}
}
}
}
}
close FILE;
close FILE2;
}
sub DESTROY {
my $self = shift;
}
1;
__END__
## Below is the stub of documentation for your module. You better edit it!
=head1 NAME
G::Tools::H2v - Perl extension for blah blah blah
=head1 SYNOPSIS
use G::Tools::Blast;
blah blah blah
=head1 DESCRIPTION
Stub documentation for G::Tools::Blast was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.
Blah blah blah.
=head1 AUTHOR
A. U. Thor, a.u.thor@a.galaxy.far.far.away
=head1 SEE ALSO
perl(1).
=cut
}
sub _gblaster
{ my $options = shift;
my @blast = `blastall $options`;
my @result;
foreach my $tmp (@blast){
my ($query, $subject, $percent, $length, undef, undef, $qstart, $qend,
$sstart, $send, $eval, $score) = split(/\s+/, $tmp, 12);
push(@result, [$query, $subject, $percent, $length, $qstart, $qend,
$sstart, $send, $eval, $score]);
}
return @result;
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this;
return $this;
}
General documentation
No general documentation available.