G::Tools Literature
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
G::Tools::Literature - Perl extension for blah blah blah
Package variables
No package variables defined.
Included modules
G::Messenger
SelfLoader
SubOpt
Inherit
AutoLoader Exporter
Synopsis
  use G::Tools::Literature;
  blah blah blah
Description
Stub documentation for G::Tools::Literature was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.
Blah blah blah.
Methods
BEGIN Code
DESTROY
No description
Code
KeySearch
No description
Code
PDFtoTEXT
No description
Code
PubMedSearch
No description
Code
WordCount
No description
Code
Methods description
None available.
Methods code
BEGINTop
BEGIN {
    eval "use LWP::Simple;";
    if($@){ warn "$@" };
    eval "use LWP::UserAgent;";
    if($@){ warn "$@" };
    eval "use HTTP::Cookies;";
}
DESTROYdescriptionprevnextTop
sub DESTROY {
    my $self = shift;
}
KeySearchdescriptionprevnextTop
sub KeySearch {
    &opt_default();
    my @args=opt_get(@_);

    my $dir=shift @args;
    my $key=shift @args;
    my @files;
    my @txts;
    my %hash;
    my $i;
    my $pdf;
    my $txt;
    my $tmp;

    opendir DIR, $dir;
    @files=readdir DIR;
    
    open(KEY,">$dir/$dir".'.key');

    foreach(@files){
	next if($_ eq '.' || $_ eq '..');
	next if(/\.log$/);
	next if(/\.key$/);
	
	if(/\.pdf/){
	    $tmp=PDFtoTEXT("$dir/$_");
	    push(@txts,$tmp);
	    $pdf++;
	}
	else{
	    $tmp="$dir/$_";
	    push(@txts,$tmp);
	    $txt++;
	}
    }

    foreach $tmp (@txts){
	$hash{$tmp}=WordCount($tmp, $key);
	$i++;
    }
    
    print KEY '**************************************************************'."\n".'**** Key Search (1.00)  Key Word Count from PDF Documents ****'."\n".'**************************************************************'."\n\n";

    print KEY "Key: $key\n";
    print KEY "Directory: $dir\n";
    print KEY "Paper: $i\( PDF: $pdf files   TXT: $txt files\)\n"; 
    print KEY "---------------------------------\n\n";

    foreach(sort{$hash{$b} <=> $hash{$a}}keys(%hash)){
	$tmp=substr($_, index($_,'/')+1);
	print KEY "$tmp: $hash{$_}\n";
    }
    close(KEY);

    return $i;
}
PDFtoTEXTdescriptionprevnextTop
sub PDFtoTEXT {
    &opt_default();
    my @args=opt_get(@_);

    my $pdf=shift @args;

    system('pdftotext '."$pdf");
    $pdf=~s/\.pdf$/\.txt/;
return $pdf;
}
PubMedSearchdescriptionprevnextTop
sub PubMedSearch {
    my $time=time;
    &opt_default(limit=>500, dir=>'PUBMED'.$time, key=>'');
    my @args=opt_get(@_);
    
    my $query=shift @args;
    my $limit=opt_val('limit');
    my $dir=opt_val('dir');
    my $key=opt_val('key');

    my $com;
    my @date;
    my $req;
    my $res;
    my $ua;
    my $i;
    my $frag;
    my $frag2;
    my @line;
    my @line2;
    my @line3;
    my $url;
    my $url2;
    my $tmp;
    my $tmp2;
    my $head;
    my $abst;
    my $abstract;
    my $title;
    my $authors;
    my $affiliation;
    my $journal;
    my $pmid;
    my $download=0;
    my $invalid=0;
    my $permit=0;
    my $nopdf=0;
    my $notfound=0;
    
    $tmp=-d "$dir";
    if($tmp == 1){
	print "\"$dir\" : The directory has already existed.\n";
	return;
    }

    @date=localtime($time);
    $date[5]=$date[5]+1900;
    $date[4]=$date[4]+1;

    print '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print "Query word is\" $query\".\n";
    print "Search limit is\" $limit\".\n";
    print "Key word is\" $key\".\n" if($key);
    print "---------------------------------\n";

    mkdir("$dir",0777);
    open(LOG, ">$dir/$dir".'.log');

    print LOG '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print LOG "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print LOG "Query: $query\n";
    print LOG "Limit: $limit\n";
    print LOG "Key: $key\n" if($key);
    print LOG "Directory: $dir\n";
    print LOG "Log file: $dir\/$dir\.log\n";
    print LOG "Key file: $dir\/$dir\.key\n" if($key);
    print LOG "---------------------------------\n\n";

    $query =~ tr/ /+/;

    $ua = LWP::UserAgent->new;
    $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1));
    
    $com='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?SUBMIT=y&DB=PubMed&cmd=&term='.$query.'&dispmax='.$limit;
    $req = HTTP::Request->new(GET => $com);
    $res = $ua->request($req);
    
    unless($res->is_success){
	print "Error occured: PubMed isn't available.\n";
	print "---------------------------------\n";
	return;
    }

    @line=split(/\n/,$res->as_string);
    foreach(@line){
	if(/\<td width\=\"100\%\"\>\<font size\=\"\-1\"\>\<a href\=\"(.*)\"\>.*/){
	    $i++;
	    $frag = 0;
	    $frag2 = 0;
	    $journal = "";
	    $title = "";
	    $authors = "";
	    $affiliation = "";
	    $abstract = "";
	    $pmid = "";
	    
	    $url=$1;
	    $url =~ s/amp\;//g;
	    $req = HTTP::Request->new(GET => "$url");
	    $abst = $ua->request($req);

#######################################################
# Abstract #
#######################################################
if($abst->content =~ /\<input name\=\"uid\" type\=\"checkbox\" value\=\"\d+\"\>\<b\>1\: \<\/b\>(.*)\<\/td\>/){ $journal=$1; if($journal =~ /Error occured\: cannot get document summary/){ $journal = 'Error occured! cannot get document summary'; print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n"; close(TXT); print LOG "\[PAPER $i\]\n"; print LOG "Journal: $journal\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the abstract.\n"; print LOG "---------------------------------\n\n"; $notfound++; $frag = 1; $frag = 2; next; } } if($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)\<br\>\<br\>(PMID\: .*)\<\/dd\>\n/){ $title=$1; $authors=$2; $affiliation=$3; $abstract=$4; $pmid=$5; } elsif($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)(PMID\: .*)\<\/dd\>\n/){ $title=$1; $authors=$2; $affiliation=$3; $abstract=$4; $pmid=$5; } print LOG "\[PAPER $i\]\n"; print LOG "Journal: $journal\n"; print LOG "Title: $title\n"; print LOG "Authors: $authors\n"; print LOG "$pmid\n"; @line2=split(/\n/,$abst->content); foreach(@line2){ if(/\<dd\>\<SPAN\>\<a href\=\"(.*)\" OnClick.*/){ $frag2 = 1; $url=$1; $url='http://www.ncbi.nlm.nih.gov:80'.$url; #######################################################
# Direct Access #
#######################################################
if($url =~ /\.pdf$/){ $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url. Only abstract is saved.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } last; } #######################################################
# Access to the site #
#######################################################
$url =~ s/amp\;//g; $url =~ s/amp\%3[Bb]//g; $req = HTTP::Request->new(HEAD => "$url"); $head = $ua->request($req); $url = $head->{_request}->{_uri}; $url =~ s/amp\;//g; $url =~ s/amp\%3[Bb]//g; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); unless($res->is_success){ print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the page. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $notfound++; $url="not found"; $frag = 1; } #######################################################
# Springer #
#######################################################
if($url =~ /\.springer\./){ my $spfrag; $url =~ s/index.html//; @line3=split(/\n/, $res->content); foreach(@line3){ if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){ $url2=$url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); last; } elsif(/Otherwise click \<a href\=\"\.\.\/\.\.(.*)\"\>here\!\<\/a\>\<\/p\>/){ $tmp = $1; $url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $url =~ s/index.html//; @line2=split(/\n/, $res->content); foreach(@line2){ if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){ $url2=$url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); last; } } last; } elsif(/\<a href\=\"\.\.\/\.\.(.*\.pdf)\"\>PDF/ || /\<a HREF\=\"\.\.\/\.\.(.*\.pdf)\"\>Article in PDF format/){ $spfrag =1; $tmp=$1; $url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; last; } } } last if($spfrag == 1); if($res->content =~ /.*\"(.*\.pdf)\".*/){ $url2 = $url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Springer-ny #
#######################################################
if($url =~ /\.springer-ny\./){ $url =~ s/index\.html//; $url2=$url.'paper/index.html'; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } #######################################################
# Catchword #
#######################################################
if($url =~ /\.catchword\./){ my $catchword; if($res->content =~ /SRC\=\"(.*)\" NAME\=\"toolbar\"/){ $url='http://www.catchword.com/'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); @line3=split('\n', $res->content); foreach(@line3){ if($res->content =~ /\<a href\=\"(.*)\"alt\=\"full document\"/){ $catchword=1; $url='http://www.catchword.com/'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } if($catchword == 0){ $frag = 1; print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; } } } #######################################################
# InterScience #
#######################################################
elsif($url =~ /\.interscience\./){ if($res->as_string =~ /.*\"(.*\.pdf)\".*/){ $url2 = 'http://www3.interscience.wiley.com'.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Wiley #
#######################################################
if($url =~ /doi\.wiley\.com/){ if($res->content =~ /\<h1\>Error\<\/h1\>/){ print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the page. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $notfound++; $url="not found"; $frag = 1; } } #######################################################
# Synergy #
#######################################################
elsif($url =~ /\.blackwell-synergy\./){ if($res->as_string =~ /\<a href\=\"javascript\:newWindow\(\'(.*\.x\/pdf)\'.*/){ $url2 = 'http://www.blackwell-synergy.com'.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->as_string =~ /\<a href\=\"(.*pdf.*)\"\>/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } #######################################################
# EMBO #
#######################################################
elsif($url =~ /\/\/emboj\./){ if($res->as_string =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/){ $url = 'http://emboj.oupjournals.org'.$1; $url =~ s/content/reprint/; $url = $url.'.pdf'; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# JVMS #
#######################################################
elsif($url =~ /\/\/jvms\./){ if($res->content =~ /\<a href\=\"(.*)\"\>PDF/){ $url = 'http://jvms.jstage.jst.go.jp'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstr\
act is saved.\n"
; print LOG "--------------------------------\
-\n\n"
; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# J Biol Chem, J Clinical Inv and Neurology #
#######################################################
elsif($url =~ /\/\/(www\.jbc\.org)/ || $url =~ /\/\/(www\.jci\.org)/ || $url =~ /\/\/(www\.neurology\.org)/ || $url =~ /\/\/(circ\.ahajournals\.org)/ || $url =~ /\/\/(www\.pnas\.org)/ || $url =~ /\/\/(www\.fasebj\.org)/ || $url =~ /\/\/(www\.jneurosci\.org)/ || $url =~ /\/\/(bioinformatics\.oupjournals\.org)/){ $tmp=0; $tmp2=$1; if($res->content =~ /window\.location \= \"(.*)\"\;/){ $url='http://'.$tmp2.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $tmp=1; } elsif($res->content =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/ || $res->content =~ /\<A HREF\=\"(.*)\"\>Screen \(PDF\)/){ $url='http://'.$tmp2.$1.'.pdf'; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $tmp=1; } if($tmp == 1){ if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Nature #
#######################################################
elsif($url =~ /\/\/www\.nature\.com/){ if($res->content =~ /Full text.*\"(.*)\"\>PDF/){ $url='http://www.nature.com'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Portlandpress #
#######################################################
elsif($url =~ /\/\/cs\.portlandpress\.com/){ if($res->content =~ /\<A class\=\"sidelinks\" HREF\=\"(.*\.pdf)\"\>\<img src/){ $url='http://cs.portlandpress.com'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } #######################################################
# Elsevier #
#######################################################
elsif($url =~ /\/\/linkinghub\.elsevier\.com/){ if($res->content =~ /\<a HREF\=\"(.*)\"\>\<img border.*src\=\"http\:\/\/www\.sciencedirect\.com\//){ $tmp = $1; $tmp =~ s/amp\;//g; $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); } if($res->content =~ /.*\"(.*\.pdf)\".*/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } else{ $frag=1; print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } } #######################################################
# ScienceDirect and Others #
#######################################################
else{ if($res->as_string =~ /.*\"(.*\.pdf)\".*/){ $tmp = $1; $tmp =~ s/UADB\/xppview\/// if($url =~ /\.acs\.org\//); $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); if($res->is_success){ if($res->content =~ /^\%PDF.*/){ $frag = 1; open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } else{ if($url =~ /(http\:\/\/.*?)\/.*/){ $tmp=$1.$tmp; } $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } elsif($res->as_string =~ /.*\"(http\:\/\/.*pdf.*)\".*/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } } if($frag == 0 & $frag2 == 1){ if($url =~ /.*\.sciencedirect\..*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; } else{ print "$i\.txt: Invalid format from $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Invalid format. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $invalid++; } } elsif($frag == 0 & $frag2 == 0){ print "$i\.txt: There is no PDF document.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL:\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: There is no PDF document. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $nopdf++; } } } print "Searched from ".$i." papers.","\n"; print "Downloaded: $download\n"; print "Not permitted: $permit\n"; print "Not found: $notfound\n"; print "Invalid format: $invalid\n"; print "No PDF: $nopdf\n"; if($i-$nopdf != 0){ print sprintf("%d",$download/($i-$nopdf)*100)."\% of papers have been downloaded.\n\n";
} else{ print "0% of papers have been downloaded.\n\n"; } print "Directory is\" $dir\".\n"; print "Log file is\" $dir\/$dir\.log\"\n"; print "Key file is\" $dir\/$dir\.key\"\n" if($key); print LOG "Total: $i\n"; print LOG "Downloaded: $download\n"; print LOG "Not permitted: $permit\n"; print LOG "Not found: $notfound\n"; print LOG "Invalid format: $invalid\n"; print LOG "No PDF: $nopdf\n"; if($i-$nopdf != 0){ print LOG "Accuracy: ".sprintf("%d",$download/($i-$nopdf)*100)."\%\n";
} else{ print LOG "Accuracy: 0%\n"; } close(LOG); if($key){ KeySearch($dir, $key); }
}
WordCountdescriptionprevnextTop
sub WordCount {
    &opt_default();
    my @args=opt_get(@_);

    my $file=shift @args;
    my $query=shift @args;
    my $count;

    open(FILE,"$file");
    while(<FILE>){
	$count+=s/${query}/${query}/g;
} return $count;
}
General documentation
AUTHORTop
A. U. Thor, a.u.thor@a.galaxy.far.far.away
SEE ALSOTop
perl(1).