G::Tools

Literature

Summary Included libraries Package variables Synopsis Description General documentation Methods

Summary
G::Tools::Literature - Perl extension for blah blah blah
Package variables top
Globals (from use vars definitions)
@EXPORT
$VERSION
@EXPORT_OK
Included modulestop
G::Messenger
SubOpt
strict
Inherit top
AutoLoader Exporter
Synopsistop
  use G::Tools::Literature;
blah blah blah
Descriptiontop
Stub documentation for G::Tools::Literature was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.
Methodstop
BEGIN Code
DESTROYNo descriptionCode
KeySearchNo descriptionCode
PDFtoTEXTNo descriptionCode
PubMedSearchNo descriptionCode
WordCountNo descriptionCode
newNo descriptionCode

Methods description


Methods code

BEGINtop
BEGIN {
    eval "use LWP::Simple;";
    if($@){ warn "$@" };
    eval "use LWP::UserAgent;";
    if($@){ warn "$@" };
    eval "use HTTP::Cookies;";
}
DESTROYdescriptiontopprevnext
sub DESTROY {
    my $self = shift;
}
KeySearchdescriptiontopprevnext
sub KeySearch {
    &opt_default();
    my @args=opt_get(@_);

    my $dir=shift @args;
    my $key=shift @args;
    my @files;
    my @txts;
    my %hash;
    my $i;
    my $pdf;
    my $txt;
    my $tmp;

    opendir DIR, $dir;
    @files=readdir DIR;
    
    open(KEY,">$dir/$dir".'.key');

    foreach(@files){
	next if($_ eq '.' || $_ eq '..');
	next if(/\.log$/);
	next if(/\.key$/);
	
	if(/\.pdf/){
	    $tmp=PDFtoTEXT("$dir/$_");
	    push(@txts,$tmp);
	    $pdf++;
	}
	else{
	    $tmp="$dir/$_";
	    push(@txts,$tmp);
	    $txt++;
	}
    }

    foreach $tmp (@txts){
	$hash{$tmp}=WordCount($tmp, $key);
	$i++;
    }
    
    print KEY '**************************************************************'."\n".'**** Key Search (1.00)  Key Word Count from PDF Documents ****'."\n".'**************************************************************'."\n\n";

    print KEY "Key: $key\n";
    print KEY "Directory: $dir\n";
    print KEY "Paper: $i\( PDF: $pdf files   TXT: $txt files\)\n"; 
    print KEY "---------------------------------\n\n";

    foreach(sort{$hash{$b} <=> $hash{$a}}keys(%hash)){
	$tmp=substr($_, index($_,'/')+1);
	print KEY "$tmp: $hash{$_}\n";
    }
    close(KEY);

    return $i;
}
PDFtoTEXTdescriptiontopprevnext
sub PDFtoTEXT {
    &opt_default();
    my @args=opt_get(@_);

    my $pdf=shift @args;

    system('pdftotext '."$pdf");
    $pdf=~s/\.pdf$/\.txt/;
return $pdf;
}
PubMedSearchdescriptiontopprevnext
sub PubMedSearch {
    my $time=time;
    &opt_default(limit=>500, dir=>'PUBMED'.$time, key=>'');
    my @args=opt_get(@_);
    
    my $query=shift @args;
    my $limit=opt_val('limit');
    my $dir=opt_val('dir');
    my $key=opt_val('key');

    my $com;
    my @date;
    my $req;
    my $res;
    my $ua;
    my $i;
    my $frag;
    my $frag2;
    my @line;
    my @line2;
    my @line3;
    my $url;
    my $url2;
    my $tmp;
    my $tmp2;
    my $head;
    my $abst;
    my $abstract;
    my $title;
    my $authors;
    my $affiliation;
    my $journal;
    my $pmid;
    my $download=0;
    my $invalid=0;
    my $permit=0;
    my $nopdf=0;
    my $notfound=0;
    
    $tmp=-d "$dir";
    if($tmp == 1){
	print "\"$dir\" : The directory has already existed.\n";
	return;
    }

    @date=localtime($time);
    $date[5]=$date[5]+1900;
    $date[4]=$date[4]+1;

    print '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print "Query word is\" $query\".\n";
    print "Search limit is\" $limit\".\n";
    print "Key word is\" $key\".\n" if($key);
    print "---------------------------------\n";

    mkdir("$dir",0777);
    open(LOG, ">$dir/$dir".'.log');

    print LOG '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print LOG "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print LOG "Query: $query\n";
    print LOG "Limit: $limit\n";
    print LOG "Key: $key\n" if($key);
    print LOG "Directory: $dir\n";
    print LOG "Log file: $dir\/$dir\.log\n";
    print LOG "Key file: $dir\/$dir\.key\n" if($key);
    print LOG "---------------------------------\n\n";

    $query =~ tr/ /+/;

    $ua = LWP::UserAgent->new;
    $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1));
    
    $com='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?SUBMIT=y&DB=PubMed&cmd=&term='.$query.'&dispmax='.$limit;
    $req = HTTP::Request->new(GET => $com);
    $res = $ua->request($req);
    
    unless($res->is_success){
	print "Error occured: PubMed isn't available.\n";
	print "---------------------------------\n";
	return;
    }

    @line=split(/\n/,$res->as_string);
    foreach(@line){
	if(/\<td width\=\"100\%\"\>\<font size\=\"\-1\"\>\<a href\=\"(.*)\"\>.*/){
	    $i++;
	    $frag = 0;
	    $frag2 = 0;
	    $journal = "";
	    $title = "";
	    $authors = "";
	    $affiliation = "";
	    $abstract = "";
	    $pmid = "";
	    
	    $url=$1;
	    $url =~ s/amp\;//g;
	    $req = HTTP::Request->new(GET => "$url");
	    $abst = $ua->request($req);

########################################################
## Abstract #
########################################################
if($abst->content =~ /\<input name\=\"uid\" type\=\"checkbox\" value\=\"\d+\"\>\<b\>1\: \<\/b\>(.*)\<\/td\>/){ $journal=$1; if($journal =~ /Error occured\: cannot get document summary/){ $journal = 'Error occured! cannot get document summary'; print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n"; close(TXT); print LOG "\[PAPER $i\]\n"; print LOG "Journal: $journal\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the abstract.\n"; print LOG "---------------------------------\n\n"; $notfound++; $frag = 1; $frag = 2; next; } } if($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)\<br\>\<br\>(PMID\: .*)\<\/dd\>\n/){ $title=$1; $authors=$2; $affiliation=$3; $abstract=$4; $pmid=$5; } elsif($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)(PMID\: .*)\<\/dd\>\n/){ $title=$1; $authors=$2; $affiliation=$3; $abstract=$4; $pmid=$5; } print LOG "\[PAPER $i\]\n"; print LOG "Journal: $journal\n"; print LOG "Title: $title\n"; print LOG "Authors: $authors\n"; print LOG "$pmid\n"; @line2=split(/\n/,$abst->content); foreach(@line2){ if(/\<dd\>\<SPAN\>\<a href\=\"(.*)\" OnClick.*/){ $frag2 = 1; $url=$1; $url='http://www.ncbi.nlm.nih.gov:80'.$url; ########################################################
## Direct Access #
########################################################
if($url =~ /\.pdf$/){ $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url. Only abstract is saved.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } last; } ########################################################
## Access to the site #
########################################################
$url =~ s/amp\;//g; $url =~ s/amp\%3[Bb]//g; $req = HTTP::Request->new(HEAD => "$url"); $head = $ua->request($req); $url = $head->{_request}->{_uri}; $url =~ s/amp\;//g; $url =~ s/amp\%3[Bb]//g; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); unless($res->is_success){ print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the page. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $notfound++; $url="not found"; $frag = 1; } ########################################################
## Springer #
########################################################
if($url =~ /\.springer\./){ my $spfrag; $url =~ s/index.html//; @line3=split(/\n/, $res->content); foreach(@line3){ if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){ $url2=$url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); last; } elsif(/Otherwise click \<a href\=\"\.\.\/\.\.(.*)\"\>here\!\<\/a\>\<\/p\>/){ $tmp = $1; $url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $url =~ s/index.html//; @line2=split(/\n/, $res->content); foreach(@line2){ if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){ $url2=$url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); last; } } last; } elsif(/\<a href\=\"\.\.\/\.\.(.*\.pdf)\"\>PDF/ || /\<a HREF\=\"\.\.\/\.\.(.*\.pdf)\"\>Article in PDF format/){ $spfrag =1; $tmp=$1; $url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; last; } } } last if($spfrag == 1); if($res->content =~ /.*\"(.*\.pdf)\".*/){ $url2 = $url.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## Springer-ny #
########################################################
if($url =~ /\.springer-ny\./){ $url =~ s/index\.html//; $url2=$url.'paper/index.html'; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } ########################################################
## Catchword #
########################################################
if($url =~ /\.catchword\./){ my $catchword; if($res->content =~ /SRC\=\"(.*)\" NAME\=\"toolbar\"/){ $url='http://www.catchword.com/'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); @line3=split('\n', $res->content); foreach(@line3){ if($res->content =~ /\<a href\=\"(.*)\"alt\=\"full document\"/){ $catchword=1; $url='http://www.catchword.com/'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } if($catchword == 0){ $frag = 1; print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; } } } ########################################################
## InterScience #
########################################################
elsif($url =~ /\.interscience\./){ if($res->as_string =~ /.*\"(.*\.pdf)\".*/){ $url2 = 'http://www3.interscience.wiley.com'.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url2.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url2\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url2 was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url2\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## Wiley #
########################################################
if($url =~ /doi\.wiley\.com/){ if($res->content =~ /\<h1\>Error\<\/h1\>/){ print "$i\.txt: Not found $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not found the page. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $notfound++; $url="not found"; $frag = 1; } } ########################################################
## Synergy #
########################################################
elsif($url =~ /\.blackwell-synergy\./){ if($res->as_string =~ /\<a href\=\"javascript\:newWindow\(\'(.*\.x\/pdf)\'.*/){ $url2 = 'http://www.blackwell-synergy.com'.$1; $req = HTTP::Request->new(GET => "$url2"); $res = $ua->request($req); if($res->as_string =~ /\<a href\=\"(.*pdf.*)\"\>/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } ########################################################
## EMBO #
########################################################
elsif($url =~ /\/\/emboj\./){ if($res->as_string =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/){ $url = 'http://emboj.oupjournals.org'.$1; $url =~ s/content/reprint/; $url = $url.'.pdf'; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## JVMS #
########################################################
elsif($url =~ /\/\/jvms\./){ if($res->content =~ /\<a href\=\"(.*)\"\>PDF/){ $url = 'http://jvms.jstage.jst.go.jp'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstr\
act is saved.\n"
; print LOG "--------------------------------\
-\n\n"
; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## J Biol Chem, J Clinical Inv and Neurology #
########################################################
elsif($url =~ /\/\/(www\.jbc\.org)/ || $url =~ /\/\/(www\.jci\.org)/ || $url =~ /\/\/(www\.neurology\.org)/ || $url =~ /\/\/(circ\.ahajournals\.org)/ || $url =~ /\/\/(www\.pnas\.org)/ || $url =~ /\/\/(www\.fasebj\.org)/ || $url =~ /\/\/(www\.jneurosci\.org)/ || $url =~ /\/\/(bioinformatics\.oupjournals\.org)/){ $tmp=0; $tmp2=$1; if($res->content =~ /window\.location \= \"(.*)\"\;/){ $url='http://'.$tmp2.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $tmp=1; } elsif($res->content =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/ || $res->content =~ /\<A HREF\=\"(.*)\"\>Screen \(PDF\)/){ $url='http://'.$tmp2.$1.'.pdf'; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); $tmp=1; } if($tmp == 1){ if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## Nature #
########################################################
elsif($url =~ /\/\/www\.nature\.com/){ if($res->content =~ /Full text.*\"(.*)\"\>PDF/){ $url='http://www.nature.com'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## Portlandpress #
########################################################
elsif($url =~ /\/\/cs\.portlandpress\.com/){ if($res->content =~ /\<A class\=\"sidelinks\" HREF\=\"(.*\.pdf)\"\>\<img src/){ $url='http://cs.portlandpress.com'.$1; $req = HTTP::Request->new(GET => "$url"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $url was successful.\n"; print "---------------------------------\n"; print LOG "URL: $url\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } ########################################################
## Elsevier #
########################################################
elsif($url =~ /\/\/linkinghub\.elsevier\.com/){ if($res->content =~ /\<a HREF\=\"(.*)\"\>\<img border.*src\=\"http\:\/\/www\.sciencedirect\.com\//){ $tmp = $1; $tmp =~ s/amp\;//g; $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); } if($res->content =~ /.*\"(.*\.pdf)\".*/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } else{ $frag=1; print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } } ########################################################
## ScienceDirect and Others #
########################################################
else{ if($res->as_string =~ /.*\"(.*\.pdf)\".*/){ $tmp = $1; $tmp =~ s/UADB\/xppview\/// if($url =~ /\.acs\.org\//); $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); if($res->is_success){ if($res->content =~ /^\%PDF.*/){ $frag = 1; open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } else{ if($url =~ /(http\:\/\/.*?)\/.*/){ $tmp=$1.$tmp; } $req = HTTP::Request->new(GET => "$tmp"); $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } elsif($res->as_string =~ /.*\"(http\:\/\/.*pdf.*)\".*/){ $req = HTTP::Request->new(GET => "$1"); $tmp = $1; $res = $ua->request($req); if($res->is_success){ $frag = 1; unless($res->content =~ /^\%PDF.*/){ print "$i\.txt: Not permitted in $tmp.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $tmp\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; last; } open(PDF, ">$dir/$i".'.pdf'); print PDF $res->content; close(PDF); print "$i\.pdf: Download from $tmp was successful.\n"; print "---------------------------------\n"; print LOG "URL: $tmp\n"; print LOG "FILE: $i\.pdf\n"; print LOG "State: Download successfully.\n"; print LOG "---------------------------------\n\n"; $download++; } } } } } if($frag == 0 & $frag2 == 1){ if($url =~ /.*\.sciencedirect\..*/){ print "$i\.txt: Not permitted in $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Not permitted. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $permit++; } else{ print "$i\.txt: Invalid format from $url.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL: $url\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: Invalid format. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $invalid++; } } elsif($frag == 0 & $frag2 == 0){ print "$i\.txt: There is no PDF document.\n"; print "---------------------------------\n"; open(TXT, ">$dir/$i".'.txt'); print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n"; close(TXT); print LOG "URL:\n"; print LOG "FILE: $i\.txt\n"; print LOG "State: There is no PDF document. Only abstract is saved.\n"; print LOG "---------------------------------\n\n"; $nopdf++; } } } print "Searched from ".$i." papers.","\n"; print "Downloaded: $download\n"; print "Not permitted: $permit\n"; print "Not found: $notfound\n"; print "Invalid format: $invalid\n"; print "No PDF: $nopdf\n"; if($i-$nopdf != 0){ print sprintf("%d",$download/($i-$nopdf)*100)."\% of papers have been downloaded.\n\n";
} else{ print "0% of papers have been downloaded.\n\n"; } print "Directory is\" $dir\".\n"; print "Log file is\" $dir\/$dir\.log\"\n"; print "Key file is\" $dir\/$dir\.key\"\n" if($key); print LOG "Total: $i\n"; print LOG "Downloaded: $download\n"; print LOG "Not permitted: $permit\n"; print LOG "Not found: $notfound\n"; print LOG "Invalid format: $invalid\n"; print LOG "No PDF: $nopdf\n"; if($i-$nopdf != 0){ print LOG "Accuracy: ".sprintf("%d",$download/($i-$nopdf)*100)."\%\n";
} else{ print LOG "Accuracy: 0%\n"; } close(LOG); if($key){ KeySearch($dir, $key); }
}
WordCountdescriptiontopprevnext
sub WordCount {
    &opt_default();
    my @args=opt_get(@_);

    my $file=shift @args;
    my $query=shift @args;
    my $count;

    open(FILE,"$file");
    while(<FILE>){
	$count+=s/${query}/${query}/g;
} return $count;
}
newdescriptiontopprevnext
sub new {
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}

General documentation

AUTHOR top
A. U. Thor, a.u.thor@a.galaxy.far.far.away
SEE ALSO top
perl(1).