sub set_operon
{ my @args = opt_get(@_);
my $gb = opt_as_gb(shift @args);
if ($gb->{LOCUS}->{id} eq 'U00096' || $gb->{LOCUS}->{id} eq 'NC_000913'){
my $url = "http://regulondb.ccg.unam.mx:80/data/OperonSet.txt";
my $dir = $ENV{HOME} . '/.glang/data/OperonSet.txt';
mirror($url, $dir);
die("setOperon: cannot retrieve data from RegulonDB.") unless(-e $dir);
my $flag = 0;
open(FILE, $dir) || die($!);
while (<FILE>) {
chomp;
my $line = $_;
if (/^Columns\:/) {
$flag++;
next;
}
elsif(/^\s+\(\d\)\s/) {
$flag++;
next;
}
if($flag == 6){
my %geneOrder;
my ($operon, $num, $direction, $genes, $evidence) = split(/\t/, $_, 5);
next unless($num >= 2);
foreach my $genepair (split(/,/, $genes)){
my ($gene, $locustag) = split(/\|/, $genepair, 2);
my $cds = $gb->gene2id($locustag);
$cds = $gb->gene2id($gene) unless(length $cds);
if($cds){
$gb->{$cds}->{operon} = $operon;
$gb->{$cds}->{operonEvidence} = $evidence;
$geneOrder{$cds} = $gb->{$cds}->{start};
}
}
my $i = 1;
if($direction eq 'forward'){
foreach my $cds (sort {$geneOrder{$a} <=> $geneOrder{$b}} keys %geneOrder){
$gb->{$cds}->{operonN} = $i;
$i ++;
}
}else{
foreach my $cds (sort {$geneOrder{$b} <=> $geneOrder{$a}} keys %geneOrder){
$gb->{$cds}->{operonN} = $i;
$i ++;
}
}
}else{
$line =~ s/[^a-zA-Z0-9\-,\.\(\):\"\' ]//g;
msg_error($line, "\n");
}
}
close(FILE);
foreach my $cds ($gb->cds()){
$gb->{$cds}->{operonN} = 0 unless(length $gb->{$cds}->{operon});
}
}else{
my $url = 'http://csbl1.bmb.uga.edu/OperonDB/downloadNCoperon.php?NC_id=' . $gb->{LOCUS}->{id};
my $dir = $ENV{HOME} . '/.glang/data/Operon' . $gb->{LOCUS}->{id} . '.txt';
mirror($url, $dir);
die("No Operon data for this species.\n\n") unless(-e $dir);
my $data = {};
open(FILE, $dir) || die($!);
while (<FILE>) {
chomp;
my ($operonName, $gi, $gene, undef) = split(/\s+/, $_, 4);
push(@{$data->{$operonName}}, $gene);
}
close(FILE);
foreach my $operonName (keys %{$data}){
my @list = @{$data->{$operonName}};
@list = reverse(@operons) if($gb->{$operons[0]}->{direction} eq 'complement');
my $i = 1;
foreach my $cds (@list){
$gb->{$cds}->{operon} = $operonName;
$gb->{$cds}->{operonN} = $i;
$i ++;
}
}
foreach my $cds ($gb->cds()){
$gb->{$cds}->{operonN} = 0 unless(length $gb->{$cds}->{operon});
}
}
return $gb; } |