G::Tools
Mapping
G::Tools::Mapping - Perl extension for blah blah blah
|
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
use G::Tools::Mapping; blah blah blah
|
Stub documentation for G::Tools::Mapping was created by h2xs. It looks like the author of the extension was negligent enough to leave the stub unedited.
Blah blah blah.
|
DESTROY | No description | Code |
_blast_db_for_mapping | No description | Code |
_blast_for_mapping | No description | Code |
_blastpointer_for_mapping | No description | Code |
_cutquery_for_mapping | No description | Code |
_file_list_for_mapping | No description | Code |
_foreach_blastpointer_for_mapping | No description | Code |
_foreach_mask_repeat_for_mapping | No description | Code |
_formatdb_for_mapping | No description | Code |
_jstat_for_mapping | No description | Code |
_mask_repeat_for_mapping | No description | Code |
new | No description | Code |
Methods description
Methods code
sub DESTROY
{ my $self = shift;
}
sub _blast_db_for_mapping
{ &opt_default(limit=>10000);
my @args=opt_get(@_);
my $data_file=shift @args;
my $work_dir=shift @args;
my $limit=opt_val('limit');
my $len;
my $i;
my $extra;
my @lab;
my $lens;
my @file;
my $file;
my $data;
my @filename;
my $k;
my $w;
if($data_file!~/\*/){
push(@file,$data_file);
}
else{
$file=qx!ls $data_file!;
@file=split("\n",$file);
}
foreach $k (@file){
open(INFILE, $k);
$w=substr($k,rindex($k,'/'));
open(OUTFILE,'>'."$work_dir"."$w".'.db');
push(@filename,"$work_dir"."$w".'.db');
$i=1;
while(<INFILE>){
if(/\>/){
@lab=split(/\s/,$_);
print OUTFILE $lab[0],"_$i\n";
}
unless(/\>/){
tr/\n//d;
$len=$len+length($_);
if($len>=$limit){
$i++;
$extra=$len-$limit;
print OUTFILE substr($_,0,length($_)-$extra);
print OUTFILE "\n",$lab[0],"_$i\n";
print OUTFILE substr($_,length($_)-$extra),"\n";
$len=$extra;
}
else{
print OUTFILE $_,"\n";
}
}
}
close(OUTFILE);
close(INFILE);
}
return\@ filename;
}
sub _blast_for_mapping
{ my $work_dir=shift;
my $filenames=shift;
my @query;
my $data;
opendir(DIRD, $work_dir);
@query=readdir(DIRD);
closedir(DIRD);
foreach(@{$filenames}){
$data.=$_."\\ ";
}
$data='"'.$data.'"';
chdir($work_dir);
foreach(@query){
if(/\.fst$/){
_blast($data,$_,-o=>"$_.blast",-v=>20,-b=>20,-qr=>'on');
}
}
}
sub _blastpointer_for_mapping
{ my $filename=shift;
my $limit=shift;
my $switch;
my $switch2;
my $switch3;
my $switch4;
my @ID;
my @aln;
my @CHR;
my @line;
my $seq;
my $len;
my $hit;
my @Evalue;
my @sbjct_line;
my @query_line;
my %qpos;
my %spos;
my $q;
my $tmp;
my @tmp2;
my $name;
my $cond;
my $start;
my $stop;
my %hash;
my $ind;
my $rind;
my $Nnum;
my $t;
open(OUTFILE,'>>gene_list.txt');
open(INFILE,$filename);
while(<INFILE>){
tr/\n//d;
$cond=0;
if($switch4==1){
$len=$_;
$len=~tr/\(\) letters\n//d;
$switch4=0;
}
if(/Query=/){
$name=$_;
$name=~s/Query= //;
$name=~tr/\n//d;
$switch4=1;
}
if(/Sequences producing significant alignments\:/){
$switch=1;
}
elsif($switch==1 && $_ ne "" && $_ !~ /\>/){
if(/(\S+)\s+\d+\s(.+)/){
$line[0]=$1;
$line[2]=$2;
}
if($line[2]=~m/e-/){
@Evalue=split(/-/,$line[2]);
if($Evalue[1]>100){
push(@ID,$line[0]);
}
}
elsif($line[2]=="0.0"){
push(@ID,$line[0]);
}
}
if(/^\>/){
if(%qpos){
@tmp2=sort{$a <=> $b}keys(%qpos);
$tmp=substr($seq,$tmp2[0]-1($tmp2[-1]-$tmp2[0])+1);
$Nnum=$tmp=~tr/N/N/;
if($Nnum<50){
@tmp2=sort{$a <=> $b}keys(%qpos);
for(my $i=$tmp2[0]-1;$i<=$tmp2[-1]-1;$i++){
substr($seq,$i,1)="N";
}
foreach(keys(%spos)){
$hash{$q}{pos}{$_}=1;
}
}
}
%spos=();
%qpos=();
$hit=$seq=~tr/N/N/;
foreach $tmp (sort{$a <=> $b} keys(%hash)){
@tmp2=sort{$a <=> $b} keys(%{$hash{$tmp}{pos}});
$ind=index($seq,"N")+1;
$rind=rindex($seq,"N")+1;
print OUTFILE $name,"\t",$hash{$tmp}{chromosome},"\t",$tmp2[0],"\t",$tmp2[-1],"\t",$tmp2[-1]-$tmp2[0]+1,"\t",$hash{$tmp}{strand},"\t",$hash{$tmp}{evalue},"\t",$ind,"\t",$rind,"\t",sprintf("%.2f",$hit/$len),"\($hit/$len\)","\t",sprintf("%.2f",$hit/($tmp2[-1]-$tmp2[0]+1)),"\($hit/",$tmp2[-1]-$tmp2[0]+1,"\)","\t",((split(/\//,$filename))[-1]),"\n" if($hash{$tmp}{evalue});
}
%hash=();
$seq="";
for(my $i=0;$i<$len;$i++){
$seq.="Y";
}
$switch=0;
$q= $_;
$q=~ tr/\>\n //d;
@CHR=split(/_/,$_);
$CHR[1]=~tr/\n //d;
foreach $t (@ID){
if($q eq $t){
$switch=2;
$hash{$q}{chromosome}=$_;
$hash{$q}{chromosome}=~s/\>//;
$hash{$q}{chromosome}=~s/\_\w+//;
$hash{$q}{chromosome}=~tr/\n//d;
}
}
}
elsif($switch==2){
if(/Expect \= /){
unless($hash{$q}{evalue}){
$hash{$q}{evalue}=(split(/ \= /,$_))[2];
}
if(%qpos){
@tmp2=sort{$a <=> $b}keys(%qpos);
$tmp=substr($seq,$tmp2[0]-1($tmp2[-1]-$tmp2[0])+1);
$Nnum=$tmp=~tr/N/N/;
if($Nnum<50){
@tmp2=sort{$a <=> $b}keys(%qpos);
for(my $i=$tmp2[0]-1;$i<=$tmp2[-1]-1;$i++){
substr($seq,$i,1)="N";
}
foreach(keys(%spos)){
$hash{$q}{pos}{$_}=1;
}
}
}
%spos=();
%qpos=();
}
if(/Strand \= /){
tr/\n//d;
unless($hash{$q}{strand}){
$hash{$q}{strand}=(split(/ \/ /,$_))[1];
}
if($hash{$q}{strand} eq (split(/ \/ /,$_))[1]){
$switch3=0;
}
else{
$switch3=1;
}
}
if($switch3==0){
if(/Query:/ || $switch2==1){
$switch2=1;
push(@aln,$_);
if(/Sbjct:/){
@query_line=split(/\s+/,$aln[0]);
$qpos{$query_line[1]}=1;
$qpos{$query_line[3]}=1;
@sbjct_line=split(/\s+/,$aln[2]);
$spos{$sbjct_line[1]+$limit*($CHR[1]-1)}=1;
$spos{$sbjct_line[3]+$limit*($CHR[1]-1)}=1;
@aln=();
$switch2=0;
}
}
}
}
}
close(INFILE);
close(OUTFILE);
}
sub _cutquery_for_mapping
{ my $cdna=shift;
my $work=shift;
my $filenumber=0;
open(IN, $cdna);
while(<IN>){
if($_ =~ /^>/){
$filenumber++;
open(OUT, ">$work$filenumber.fst");
printf OUT "$_";
}
else{
printf OUT "$_";
}
}
close(OUT);
close(IN);
}
sub _file_list_for_mapping
{ my $query=shift;
my @file;
my $line;
opendir(DIR,$query);
@file=readdir(DIR);
open(OUT,'>ID_list.txt');
chdir($query);
foreach(@file){
if(/\.fst$/){
open(FILE,$_);
$line=<FILE>;
$line=~tr/>\n//d;
close(FILE);
print OUT "$line\t$_\.rst\n";
}
}
close(OUT);
}
_foreach_blastpointer_for_mapping | description | top | prev | next |
sub _foreach_blastpointer_for_mapping
{ &opt_default(limit=>10000);
my @args=opt_get(@_);
my $dir=shift @args;
my $limit=opt_val("limit");
my @filedata;
my %hash;
opendir(DIRD, $dir);
@filedata=readdir(DIRD);
closedir(DIRD);
foreach(sort{$a <=> $b}@filedata){
if(/\.blast$/){
&_blastpointer_for_mapping($dir.$_,$limit);
}
}
}
sub _foreach_mask_repeat_for_mapping
{ my $cdna_dir=shift;
my $lib=shift;
my @file;
opendir(DIR,$cdna_dir);
@file=readdir(DIR);
foreach(@file){
if(/\.fst/){
&_mask_repeat_for_mapping($_,$cdna_dir,$lib);
}
}
}
sub _formatdb_for_mapping
{ my $filenames=shift;
foreach(@{$filenames}){
_formatdb($_,@_);
}
}
sub _jstat_for_mapping
{ my $jstat;
my $who=qx!whoami!;
my $switch=1;
my @line;
$who=~tr/\n//d;
while($switch==1){
$switch=0;
$jstat=qx!jstat!;
@line=split(/\n/,$jstat);
foreach(@line){
$switch=1 if(/^${who}.*\sblastall\s.*/);
$switch=1 if(/^${who}.*def_$who.*/);
$switch=1 if(/jobs in queue def_${who}, queue is active,/);
}
sleep 60;
}
}
sub _mask_repeat_for_mapping
{ my $cdna=shift;
my $work=shift;
chdir($work);
_repeatmasker($cdna,@_);
rename $cdna,$cdna.'.original';
rename $cdna.'.masked',$cdna;
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this;
return $this;
}
General documentation