G::System
ReL8
Globals (from use vars definitions) |
@EXPORT |
$VERSION |
@EXPORT_OK |
DESTROY | No description | Code |
Totals | No description | Code |
blast_parse | No description | Code |
cap3_parse | No description | Code |
cluster | No description | Code |
fasta_parse | No description | Code |
file_maker | No description | Code |
file_maker_fasta | No description | Code |
new | No description | Code |
output_maker | No description | Code |
redundancy | No description | Code |
redundancy_cap3 | No description | Code |
redundancy_fasta | No description | Code |
redundancy_sim4 | No description | Code |
rmpolya | No description | Code |
sim4_parse | No description | Code |
Methods description
Methods code
sub DESTROY
{ my $self = shift;
}
sub Totals
{ &opt_default(style=>"Remove",output=>"STDOUT");
my @args = &opt_get(@_);
my $tmp = shift @args;
my $style = &opt_val("style");
my %Re_list =%$tmp;
my @list=();
my %tmp2=();
my $best;
my $best_name;
my $moin;
my $hohoho;
$best = 1 if($style eq "Remove");
foreach $moin (keys %Re_list){
my @tmp2 = @{$Re_list{$moin}};
$best = 1 if($style eq "Remove");
$best_name=();
foreach (@tmp2){
if(/(\w+)\_(\d+)/){
if($style eq "Remove"){
if($best < $2){
$tmp2{$best_name}++ if($best_name);
$best = $2;
$best_name = $1;
}else{
$tmp2{$1}++ if($best_name);
}
}elsif($style eq "Include"){
$tmp2{$1}++ if($best_name);
}
}
}
}
@list = keys %tmp2;
foreach $hohoho(@list){
msg_send($hohoho,"\n");
}
return (@list);
}
sub blast_parse
{ &opt_default(output=>"STDOUT",Identities=>"NULL",db_file=>"master.seq");
my @args=opt_get(@_);
my $file_number = shift;
my $Identities = &opt_val("Identities");
my $db_file = &opt_val("db_file");
my $i=0;
my %Re_list;
my $flag=0;
my $tmp = 0;
my $m=0;
my %score=();
my @R=();
while( $i < $file_number){
$score{bunsi}=0;
$score{bunbo}=0;
$flag=0;
open(FILE,$i.".bla");
while(<FILE>){
if(/^Sequences producing significant alignments\:/){
$flag=1;
}elsif(/^(\d+)\_(\d+) +\d+ +(\S+)/ && $flag == 1){
push(@R,$1."_".$2);
msg_send($_);
}elsif(/^\>(\S+)/){
if($score{bunbo}!=0){
if($Identities ne "NULL"){
$m=0;
if($score{bunsi}/$score{bunbo}*100 < $Identities){
foreach $tmp (@R){
$m++;
splice(@R,$m,1)if($tmp eq $1);
}
}
}
}
$score{bunsi}=0;
$score{bunbo}=0;
$flag++;
$tmp=$1;
}elsif(/^ Identities \= (\d+)\/(\d+) +\((\d+)\%\)/){
$score{bunsi}=$score{bunsi}+$1;
$score{bunbo}=$score{bunbo}+$2;
}elsif(/^ Database\: +$db_file/ && $flag >1){
if($score{bunbo}!=0){
if($Identities ne "NULL"){
$m=0;
if($score{bunsi}/$score{bunbo}*100 < $Identities){
foreach $tmp (@R){
$m++;
splice(@R,$m,1)if($tmp eq $1);
}
}
}
}
msg_send($i," ",@R,"\n");
$Re_list{$i}= [@R];
@R=();
last;
}
}
sub cap3_parse
{}
sub cluster
{ &opt_default(output=>"STDOUT");
my @args = &opt_get(@_);
my $tmp = shift @args;
my %Re_list =%$tmp;
my @list = keys %Re_list;
my $list_name1;
my $list_name2;
my $list_name3;
my $list_name4;
my @sub_class=();
my %class=();
my $m=0;
foreach $list_name1 (@list){
open(FILE,'>'.$list_name1.'-class.seq');
foreach $list_name2 (@{$Re_list{$list_name1}}){
push(@sub_class,$list_name2);
open(FILE2,$list_name2.'.seq');
while(<FILE2>){
print FILE $_;
}
close(FILE2);
$m=0;
foreach $tmp (@list){
$m++;
splice(@list,$m,1)if($tmp eq $list_name2);
}
}
$class{$list_name1}=[@sub_class];
close(FILE);
}
foreach $list_name3 (keys %class){
foreach $list_name4 (@{$class{$list_name3}}){
_sim4($list_name4.".seq",$list_name3.'-class.seq',-output=>$list_name4."-".$list_name3."-class.sim");
}
}
}
sub fasta_parse
{ &opt_default(output=>"STDOUT",Identities=>"NULL");
my @args=opt_get(@_);
my $file_number = shift;
my $Identities = &opt_val("Identities");
my $i=0;
my %Re_list;
my $flag=0;
my @R=();
while($i< $file_number){
$flag=0;
open(FILE,$i.".fst");
while(<FILE>){
if(/The best scores are\:/){
$flag=1;
}elsif(/(\w+)\_(\d+) +\(\d+\) +\[f\] +\d+ +\d+ +(\S+)/ && $flag == 1){
push(@R,$1."_".$2);
}elsif(/^\>\>\w+/ & $flag == 1){
$Re_list{$i}= [@R];
@R=();
last;
}
}
$i++;
}
return (%Re_list);
}
sub file_maker
{
&opt_default(directory_name=>"/tmp/ReL8/",sdirectory_name());
my @args=&opt_get(@_);
my $gb = shift @args;
my $directory_name=&opt_val("directory_name");
my $sdirectory_name=&opt_val("sdirectory_name");
my @filename=();
my $id_number=0;
mkdir($directory_name,0777)if(opendir(DIR,$directory_name) != 1);
mkdir($directory_name.$sdirectory_name,0777);
chdir($directory_name.$sdirectory_name);
open(FILE2,'>master.seq');
open(FILE3,'>ID_list.txt');
do{
$filename[$id_number]=$gb->{LOCUS}->{id};
print FILE3 $id_number." ".$gb->{LOCUS}->{id}."\n";
open(FILE,'>'.$id_number.'.seq');
print FILE ">$id_number"._($gb->{SEQ})."\n".uc($gb->{SEQ})."\n";
print FILE2 ">$id_number"._($gb->{SEQ})."\n".uc($gb->{SEQ})."\n";
close(FILE);
$id_number++;
}while($gb->next_locus("no msg"));
$gb->rewind_genome();
$gb->next_locus("no msg");
close(FILE2);
close(FILE3);
return (@filename);
}
sub file_maker_fasta
{ &opt_default(directory_name=>"ReL8/",sdirectory_name=>'query_fasta_files',split=>'on');
local $/ = ">";
my $this = shift @_;
my $file = shift @_;
my @args=&opt_get(@_);
my $directory_name=&opt_val("directory_name");
my $sdirectory_name=&opt_val("sdirectory_name");
my $split=&opt_val("split");
my @filelist=();
my @idlist;
my $id_number=0;
my $id_name='';
my $switch=0;
my $i = 0;
my %seq;
my $p;
mkdir($directory_name,0777)if(opendir(DIR,$directory_name) != 1);
mkdir($directory_name.$sdirectory_name,0777);
chdir($directory_name.$sdirectory_name);
open(FILE1,$file);
open(FILE2,'>master.seq');
open(FILE3,'>ID_list.txt');
while(<FILE1>){
$i++;
next if($i == 1);
$id_number++;
split(/[\n]/,$_,2);
$id_name = $_[0];
$id_name =~ s/^\s+//g;
$this->{$id_number}->{ID} = $id_name;
$this->{$id_name}->{NUM} = $id_number;
push(@filelist,$id_name);
$this->{$id_number}->{SEQ} = $_[1];
$this->{$id_name}->{SEQ} = $_[1];
$this->{$id_number}->{SEQ} =~ s/[^a-zA-Z]//g;
$this->{$id_name}->{SEQ} =~ s/[^a-zA-Z]//g;
open(FILE,">".$this->{$id_number}->{ID}.'.seq') if($split ne 'off');
print FILE ">$this->{$id_number}->{ID}\n".uc($this->{$id_number}->{SEQ})."\n";
print FILE3 $id_number." ".$this->{$id_number}->{ID}." ".length($this->{$id_number}->{SEQ})."\n";
print FILE2 ">$this->{$id_number}->{ID}\n".uc($this->{$id_number}->{SEQ})."\n";
close(FILE);
msg_send(".");
}
close(FILE1);
close(FILE2);
close(FILE3);
chdir('../');
return(@filelist);
}
sub new
{ my $pkg = shift;
my $filename = shift;
my $option = shift;
my $this;
return ($this);
}
sub output_maker
{ &opt_default(output=>"STDOUT",file_name=>"default.gbk");
my @args = &opt_get(@_);
my $gb = shift @args;
my $Re_name = shift @args;
my $output = &opt_val("output");
my $file_name = &opt_val("file_name");
my $flag=0;
do{
$flag=0;
foreach(@$Re_name){
$flag=1 if($gb->{LOCUS}->{id} eq $_);
}
$gb->make_gb($file_name, "attach")if($flag==0);
}while($gb->next_locus("no msg"));
}
sub redundancy
{ &opt_default(qsub=>"off",analysis_method=>"fasta",directory_name=>"/tmp/ReL8/",sdirectory_name(),value=>"1e-50",output=>"default.gbk",clustering=>"ON",identities=>"NULL");
my @args=&opt_get(@_);
my $gb = shift @args;
my $qsub=opt_val("qsub");
my $analysis_method = &opt_val("analysis_method");
my $directory_name = &opt_val("directory_name");
my $sdirectory_name = &opt_val("sdirectory_name");
my $output=&opt_val("output");
my $value=&opt_val("value");
my $clustering=&opt_val("clustering");
my $identities=&opt_val("identities");
my @filename=();
my $i=0;
my $change_name;
my @file_list=();
my %tmp=();
my @tmp2=();
@filename = file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);
_formatdb("master.seq",-p=>"F",-o=>"T")if( $analysis_method eq "blast");
while($i < $##filename + 1){
msg_send(".");
_blast("master.seq","$i".".seq",-p=>"blastn",-o=>$i.".bla",-e=>$value)if( $analysis_method eq "blast");
_fasta('-Q','-n',-O=>$i.".fst",-E=>$value,$i.".seq","master.seq")if( $analysis_method eq "fasta");
_sim4($i.".seq",'master.seq',-output=>$i.".sim")if( $analysis_method eq "sim4");
## _cap3($i.".seq",'master.seq',-output=>$i.".cap",-qsub=$qsub)if( $analysis_method eq "cap3");
$i++;
}
sub redundancy_cap3
{ &opt_default(qsub=>"off",directory_name=>"/tmp/ReL8/",sdirectory_name(),value=>0.98,output=>"~/asimo/default.gbk",filename=>"redundancy_cap3.lst" );
my @args=&opt_get(@_);
my $gb = shift @args;
my $qsub=opt_val("qsub");
my $directory_name=&opt_val("directory_name");
my $sdirectory_name=&opt_val("sdirectory_name");
my $output=opt_val("output");
my $filename=opt_val("filename");
my $value=opt_val("value");
my @filename=();
my %tmp=();
my @tmp2=();
chdir($directory_name.$sdirectory_name);
@filename = &file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);
foreach(@filename){
msg_send(".");
_cap3($_.".seq",'master.seq',-output=>$_.".cap",-qsub=>$qsub);
}
%tmp = cap3_parse(\@filename,-value=>$value,);
@tmp2 = Totals(\%tmp);
&output_maker($gb,\@tmp2,-file_name=>$output);
msg_send("Finish !! output file is ",$output,"\n");
}
sub redundancy_fasta
{ &opt_default(directory_name=>"/tmp/ReL8/",sdirectory_name(),value=>"1e-50",output=>"~/asimo/default.gbk",filename=>"redundancy_fasta.lst");
my @args=&opt_get(@_);
my $gb = shift @args;
my $directory_name=&opt_val("directory_name");
my $sdirectory_name=&opt_val("sdirectory_name");
my $output=opt_val("output");
my $filename=opt_val("filename");
my $value=opt_val("value");
my @filename=();
my %tmp=();
my @tmp2=();
@filename = file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);
foreach(@filename){
_fasta('-Q','-n',-O=>$_.".fst",-E=>$value,$_.".seq","master.seq");
}
%tmp = fasta_parse(\@filename);
@tmp2 = Totals(\%tmp);
output_maker($gb,\@tmp2,-file_name=>$output);
msg_send("Finish !! output file is ",$output,"\n");
}
sub redundancy_sim4
{ &opt_default(directory_name=>"/tmp/ReL8/",sdirectory_name(),value=>0.98,output=>"~/asimo/default.gbk",filename=>"redundancy_sim4.lst" );
my @args=&opt_get(@_);
my $gb = shift @args;
my $directory_name=&opt_val("directory_name");
my $sdirectory_name=&opt_val("sdirectory_name");
my $output=opt_val("output");
my $filename=opt_val("filename");
my $value=opt_val("value");
my @filename=();
my %tmp=();
my @tmp2=();
chdir($directory_name.$sdirectory_name);
@filename = &file_maker($gb,-sdirectory_name=>$sdirectory_name,-directory_name=>$directory_name);
foreach(@filename){
msg_send(".");
_sim4($_.".seq",'master.seq',-output=>$_.".sim");
}
%tmp = sim4_parse(\@filename,-value=>$value,);
@tmp2 = Totals(\%tmp);
&output_maker($gb,\@tmp2,-file_name=>$output);
msg_send("Finish !! output file is ",$output,"\n");
}
sub rmpolya
{
#####
##remove poly A tail program
##### variables ####
my(%opt);
my(@arg);
my(@read_seq);
my($rdir),my(@rfile),my($wdir),my($wfile);
my($header_flg),my($tmp),my($ch);
my($i),my($j),my($t),my($end),my($paend),my($point),my($pacheck);
my($num_entry);
##### error action ####
##### input action ####
&opt_default(filename => "polyA_removed_files",
log_file => "rmpolya.log",
##analysis_type => "-d" ,
over_write => "on" ,
capital => "uc" ,
min_length => "5",
check_window => "4",
check_threshold => "3",
poly_nucleotide => "A");
@arg = opt_get(@_);
@read_seq = ();
%opt = ();
### AUTO SELECT ##
if(-d $arg[0]){
$opt{analysis_type} = "-d";
$opt{filename} = opt_val("filename");
}elsif(-f $arg[0] || -l $arg[0]){
$opt{analysis_type} = "-f";
$opt{filename} = substr($arg[0],rindex($arg[0],"/")+1).".rmp";
}
### AUTO SELECT END ##
$opt{log_file} = opt_val("log_file");
##$opt{analysis_type} = opt_val("analysis_type");
$opt{over_write} = opt_val("over_write");
$opt{capital} = opt_val("capital");
$opt{min_length} = opt_val("min_length");
$opt{check_window} = opt_val("check_window");
$opt{check_threshold} = opt_val("check_threshold");
$opt{poly_nucleotide} = opt_val("poly_nucleotide");
### OPTION ERROR ##
if($opt{check_threshold} > $opt{check_window}){
$opt{check_threshold} = $opt{check_window};
print "Now changed\" check_threshold\" to $opt{check_window} ,\n";
print "because the value is too large(larger than check_window).\n\n";
print "Will you continue ? (input\" yes\" or\" no\"):";
exit(1) if(<STDIN> =~ /no/i);
print "OK. This program will continue" if(<STDIN> =~ /yes/i);
print "Will you continue ? (input\" yes\" or\" no\"):" if(<STDIN> !~ /yes/i);
}
if(length($opt{poly_nucleotide}) != 1){
print "Please input one nucleotide (Option\" poly_nucleotide\")\n";
exit(1);
}
if($opt{poly_nucleotide} !~ /[a-zA-Z]/ ){
print "Please input alphabet (Option\" poly_nucleotide\")\n";
exit(1);
}
### OPTION ERROR END ##
### DIRECTORY or FILE PROCESS ##
if($opt{analysis_type} eq "-d"){ ##derectroy anlalysis#
$rdir=$arg[0];
$wdir=$opt{filename};
$rdir .= "/" if($rdir !~ /\/$/);
$wdir .= "/" if($wdir !~ /\/$/);
print "$rdir , $wdir\n";
opendir(R_DIR ,"$rdir") || die($!);
if(-d $wdir){
opendir(W_DIR ,"$wdir") || die($!);
}
else{
mkdir($wdir , 0755) || die($!);
opendir(W_DIR ,"$wdir") || die($!);
}
@rfile=readdir(R_DIR);
### remove ./ ../ process ##
$i=0;
foreach(@rfile){
unless($_ =~ /^\.{1,2}/){
$rfile[$i] = $_;
$i++;
}
}
$j = $##rfile;
while($i <= $j){
pop(@rfile);
$i++;
}
### remove ./ ../ process END ##
}
elsif($opt{analysis_type} eq "-f"){ ##file analysis#
$rfile[0]=$arg[0];
$wfile=$opt{filename};
}
else{
print "Don't exist READ FILE or READ DIRECTORY\n";
exit(1);
}
open(LOGFILE , "> $opt{log_file}") || die($!);
### DIRECTORY or FILE PROCESS END ##
foreach(%opt){
print $_."\n";
print LOGFILE $_."\n";
}
### ANALYSIAS START ##
foreach(@rfile){
print "......";
if($opt{analysis_type} eq "-d"){
$wfile = "$wdir"."$_";
$_ = "$rdir"."$_";
}
print(LOGFILE "------------$_---------------\n");
print(LOGFILE "------------$wfile---------------\n");
open(R_FILE ,"$_") || die($!);
### PROCESS OF OPTION : over write ##
if($opt{over_write} =~ /on/i){
open(W_FILE, "> $wfile") || die($!);
}
elsif($opt{over_write} =~ /off/i){
if(-f $wfile || -l $wfile){
rename("$wfile","$wfile~") || die($!);
}
open(W_FILE, "> $wfile") || die($!);
}
else{
print "Please input\" on\" or\" off\" on the option -over_write";
exit(1);
}
### PROCESS OF OPTION END ##
$header_flg = 0;
while(<R_FILE>){
chomp;
#### Select Process ###
if($_ =~ /^>/){
if($header_flg == 0){
$header_flg = 1;
}
else{
$header_flg = 3;
}
$num_entry++;
}
elsif($header_flg > 0){
$header_flg = 2;
}
#### Select Process END ###
##print $header_flg;
#### Each Process ###
if($header_flg == 0){
next;
}
elsif($header_flg == 2){
##### Function 2 ####
$tmp = $_;
chomp($tmp);
$tmp =~ s/[^a-zA-Z]+//g;
$tmp =~ tr/a-z/A-Z/ if($opt{capital} eq "uc");
### get one nuc ##
for($t=0; $t < length($tmp);$t++){
$ch = substr($tmp ,$t ,1);
if($opt{capital} eq "uc"){
if($ch =~ /[A-Z]/){
push(@read_seq,$ch);
}
}elsif($opt{capital} eq "lc"){
if($ch =~ /[a-zA-Z]/){
push(@read_seq,$ch);
}
}else{
print "Please input\" uc\" or\" lc\"";
exit(1);
}
}
}
elsif($header_flg == 3){
##### Function 3 ####
for($i=$##read_seq ; $i>0 ; $i--){
$end=0;
if($read_seq[$i] ne $opt{poly_nucleotide}){
$paend=$i;
$point=$i;
while($end == 0){
$pacheck=0;
for($j=1; $j<=$opt{check_window}; $j++){
if($read_seq[$point-$j] eq $opt{poly_nucleotide}){
$pacheck++;
}
}
if($pacheck < $opt{check_threshold}){$end=2;}
if($pacheck == $opt{check_threshold}){$point=$point-$opt{check_window}}
if($pacheck > $opt{check_threshold}){$end=1;}
}
}
last if($end==2);
}
### OUT PUT ACTION ##
### STOP CODON TAA ##
##if($read_seq[$paend] eq T){
##if($read_seq[$paend+1] eq A){
##if($read_seq[$paend+2] eq A){
##$paend = $paend+2;
##}
##}
##}
### STOP CODON TAA END ##
if($##read_seq - $paend >= $opt{min_length}){
print(W_FILE "$read_seq[0](rmp$opt{poly_nucleotide})\n");
for($i=1; $i<=$paend; $i++){
print(W_FILE "$read_seq[$i]");
}
print(W_FILE "\n");
### LOG OUTPUT ##
print(LOGFILE "$read_seq[0]\n");
print(LOGFILE "From ");
print(LOGFILE $paend+1);
print(LOGFILE " bp to the end,");
print(LOGFILE $##read_seq - $paend);
print(LOGFILE " bp of poly$opt{poly_nucleotide} was removed in total $#read_seq bp.\n");
### LOG OUTPUT END ##
}
else{
print(W_FILE "$read_seq[0]\n");
for($i=1 ; $i <= $##read_seq ; $i++){
print(W_FILE "$read_seq[$i]");
}
print(W_FILE "\n");
}
@read_seq = ();
### OUT PUT ACTION END ##
$header_flg = 1;
}
sub sim4_parse
{ &opt_default(value=>"0.99",output=>"STDOUT");
my $file_number = shift;
my @args=opt_get(@_);
my $value = &opt_val("value");
my $flag;
my $seq1_name;
my $seq2_name;
my $seq1_leng=1;
my $seq2_leng;
my $score=0;
my @R=();
my $name=();
my $i=0;
my %Re_list=();
while($i < $file_number){
open(FILE,$i.".sim");
$flag = 1;
while(<FILE>){
if(/^seq1 +\= +(\w+)\.seq\, +(\d+) +bp/){
if($score > $value && $flag > 1){
push(@R,$seq2_name."_".$seq2_leng);
}
elsif($score > $value && $flag == 1){
push(@R,$seq1_name."_".$seq1_leng);
}
$flag=1;
$seq1_name=$1;
$seq1_leng=$2;
$score = 0;
}elsif(/^seq2 +\= +\w+\.seq\ +\((\w+)\_\d+\)\, +(\d+) +bp/){
$seq2_name=$1;
$seq2_leng=$2;
}elsif(/\(complement\)/){
$flag=0;
}elsif(/^(\d+)\-(\d+) +\(\d+\-\d+\) +(\d+)\%/ && $flag >0){
if($seq1_leng < $seq2_leng){
$score=$score+(($2-$1+1)/100*$3/$seq1_leng);
}else{
$score=$score+(($2-$1+1)/100*$3/$seq2_leng);
}
$flag++;
}
}
if($score > $value && $flag > 1){
push(@R,$seq2_name."_".$seq2_leng);
}
$Re_list{$i} = [@R];
@R=();
$i++;
}
return(%Re_list);
}
General documentation
No general documentation available.