Literature documentation.

G::Tools

Literature

Summary

Included libraries

Package variables

Synopsis

Description

General documentation

Methods

Summary

G::Tools::Literature - Perl extension for blah blah blah

Package variables

top

Globals (from use vars definitions)
@EXPORT
$VERSION
@EXPORT_OK

Included modules

top

G::Messenger
SubOpt
strict

Inherit

top

AutoLoader Exporter

Synopsis

top

  use G::Tools::Literature;
  blah blah blah

Description

top

Stub documentation for G::Tools::Literature was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

Methods

top

BEGIN		Code
DESTROY	No description	Code
KeySearch	No description	Code
PDFtoTEXT	No description	Code
PubMedSearch	No description	Code
WordCount	No description	Code
new	No description	Code

Methods description

Methods code

BEGIN

top

BEGIN {

    eval "use LWP::Simple;";
    if($@){ warn "$@" };
    eval "use LWP::UserAgent;";
    if($@){ warn "$@" };
    eval "use HTTP::Cookies;";

}

DESTROY

description

top

sub DESTROY {

    my $self = shift;

}

KeySearch

description

top

sub KeySearch {

    &opt_default();
    my @args=opt_get(@_);

    my $dir=shift @args;
    my $key=shift @args;
    my @files;
    my @txts;
    my %hash;
    my $i;
    my $pdf;
    my $txt;
    my $tmp;

    opendir DIR, $dir;
    @files=readdir DIR;
    
    open(KEY,">$dir/$dir".'.key');

    foreach(@files){
	next if($_ eq '.' || $_ eq '..');
	next if(/\.log$/);
	next if(/\.key$/);
	
	if(/\.pdf/){
	    $tmp=PDFtoTEXT("$dir/$_");
	    push(@txts,$tmp);
	    $pdf++;
	}
	else{
	    $tmp="$dir/$_";
	    push(@txts,$tmp);
	    $txt++;
	}
    }

    foreach $tmp (@txts){
	$hash{$tmp}=WordCount($tmp, $key);
	$i++;
    }
    
    print KEY '**************************************************************'."\n".'**** Key Search (1.00)  Key Word Count from PDF Documents ****'."\n".'**************************************************************'."\n\n";

    print KEY "Key: $key\n";
    print KEY "Directory: $dir\n";
    print KEY "Paper: $i\( PDF: $pdf files   TXT: $txt files\)\n"; 
    print KEY "---------------------------------\n\n";

    foreach(sort{$hash{$b} <=> $hash{$a}}keys(%hash)){
	$tmp=substr($_, index($_,'/')+1);
	print KEY "$tmp: $hash{$_}\n";
    }
    close(KEY);

    return $i;

}

PDFtoTEXT

description

top

sub PDFtoTEXT {

    &opt_default();
    my @args=opt_get(@_);

    my $pdf=shift @args;

    system('pdftotext '."$pdf");
    $pdf=~s/\.pdf$/\.txt/;

    return $pdf;

}

PubMedSearch

description

top

sub PubMedSearch {

    my $time=time;
    &opt_default(limit=>500, dir=>'PUBMED'.$time, key=>'');
    my @args=opt_get(@_);
    
    my $query=shift @args;
    my $limit=opt_val('limit');
    my $dir=opt_val('dir');
    my $key=opt_val('key');

    my $com;
    my @date;
    my $req;
    my $res;
    my $ua;
    my $i;
    my $frag;
    my $frag2;
    my @line;
    my @line2;
    my @line3;
    my $url;
    my $url2;
    my $tmp;
    my $tmp2;
    my $head;
    my $abst;
    my $abstract;
    my $title;
    my $authors;
    my $affiliation;
    my $journal;
    my $pmid;
    my $download=0;
    my $invalid=0;
    my $permit=0;
    my $nopdf=0;
    my $notfound=0;
    
    $tmp=-d "$dir";
    if($tmp == 1){
	print "\"$dir\" : The directory has already existed.\n";
	return;
    }

    @date=localtime($time);
    $date[5]=$date[5]+1900;
    $date[4]=$date[4]+1;

    print '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print "Query word is\" $query\".\n";
    print "Search limit is\" $limit\".\n";
    print "Key word is\" $key\".\n" if($key);
    print "---------------------------------\n";

    mkdir("$dir",0777);
    open(LOG, ">$dir/$dir".'.log');

    print LOG '**************************************************************'."\n".'** PubMed Search (1.00) Automatic Paper Acquisition System  **'."\n".'**************************************************************'."\n\n";

    print LOG "Date: $date[5]\/$date[4]\/$date[3]  $date[2]\:$date[1]\:$date[0]\n";
    print LOG "Query: $query\n";
    print LOG "Limit: $limit\n";
    print LOG "Key: $key\n" if($key);
    print LOG "Directory: $dir\n";
    print LOG "Log file: $dir\/$dir\.log\n";
    print LOG "Key file: $dir\/$dir\.key\n" if($key);
    print LOG "---------------------------------\n\n";

    $query =~ tr/ /+/;

    $ua = LWP::UserAgent->new;
    $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt", autosave => 1));
    
    $com='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?SUBMIT=y&DB=PubMed&cmd=&term='.$query.'&dispmax='.$limit;
    $req = HTTP::Request->new(GET => $com);
    $res = $ua->request($req);
    
    unless($res->is_success){
	print "Error occured: PubMed isn't available.\n";
	print "---------------------------------\n";
	return;
    }

    @line=split(/\n/,$res->as_string);
    foreach(@line){
	if(/\<td width\=\"100\%\"\>\<font size\=\"\-1\"\>\<a href\=\"(.*)\"\>.*/){
	    $i++;
	    $frag = 0;
	    $frag2 = 0;
	    $journal = "";
	    $title = "";
	    $authors = "";
	    $affiliation = "";
	    $abstract = "";
	    $pmid = "";
	    
	    $url=$1;
	    $url =~ s/amp\;//g;
	    $req = HTTP::Request->new(GET => "$url");
	    $abst = $ua->request($req);

########################################################
##  Abstract                                           #
########################################################
	    
	    if($abst->content =~ /\<input name\=\"uid\" type\=\"checkbox\" value\=\"\d+\"\>\<b\>1\: \<\/b\>(.*)\<\/td\>/){
		$journal=$1;
		if($journal =~ /Error occured\: cannot get document summary/){
		    $journal = 'Error occured! cannot get document summary';
		    print "$i\.txt: Not found $url.\n";
		    print "---------------------------------\n";
		    
		    open(TXT, ">$dir/$i".'.txt');
		    print TXT "$journal\n";
		    close(TXT);

		    print LOG "\[PAPER $i\]\n";
		    print LOG "Journal: $journal\n";
		    print LOG "URL: $url\n";
		    print LOG "FILE: $i\.txt\n";
		    print LOG "State: Not found the abstract.\n";
		    print LOG "---------------------------------\n\n";
		    $notfound++;
		    
		    $frag = 1;
		    $frag = 2;
		    next;
		}
	    }
	    if($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)\<br\>\<br\>(PMID\: .*)\<\/dd\>\n/){
		$title=$1;
		$authors=$2;
		$affiliation=$3;
		$abstract=$4;
		$pmid=$5;
	    }
	    elsif($abst->content =~ /\<br\>\<font size\=\"\+1\"\>\<b\>(.*)\<\/b\>\<\/font\>\<br\>\<br\>\<b\>(.*)\<\/b\>\<br\>\<br\>(.*)\<br\>\<br\>(.*)(PMID\: .*)\<\/dd\>\n/){
                $title=$1;
                $authors=$2;
                $affiliation=$3;
                $abstract=$4;
                $pmid=$5;
            }
	    
	    print LOG "\[PAPER $i\]\n";
	    print LOG "Journal: $journal\n";
	    print LOG "Title: $title\n";
	    print LOG "Authors: $authors\n";
	    print LOG "$pmid\n";

	    @line2=split(/\n/,$abst->content);
	    foreach(@line2){
		if(/\<dd\>\<SPAN\>\<a href\=\"(.*)\" OnClick.*/){
		    $frag2 = 1;
		    $url=$1;
		    $url='http://www.ncbi.nlm.nih.gov:80'.$url;
		    
########################################################
##  Direct Access                                      #
########################################################
		    
		    if($url =~ /\.pdf$/){
			$req = HTTP::Request->new(GET => "$url");
			$res = $ua->request($req);
			
			if($res->is_success){
			    $frag = 1;
			    unless($res->content =~ /^\%PDF.*/){
				print "$i\.txt: Not permitted in $url. Only abstract is saved.\n";
				print "---------------------------------\n";

				open(TXT, ">$dir/$i".'.txt');
				print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				close(TXT);
				print LOG "URL: $url\n";
				print LOG "FILE: $i\.txt\n";
				print LOG "State: Not permitted. Only abstract is saved.\n";
				print LOG "---------------------------------\n\n";
				$permit++;
				last;
			    }
			    
			    open(PDF, ">$dir/$i".'.pdf');
			    print PDF $res->content;
			    close(PDF);
			    
			    print "$i\.pdf: Download from $url was successful.\n";
			    print "---------------------------------\n";

			    print LOG "URL: $url\n";
			    print LOG "FILE: $i\.pdf\n";
			    print LOG "State: Download successfully.\n";
			    print LOG "---------------------------------\n\n";

			    $download++;
			}
			last;
		    }

########################################################
##  Access to the site                                 #
########################################################
		    
		    $url =~ s/amp\;//g;
		    $url =~ s/amp\%3[Bb]//g;
		    $req = HTTP::Request->new(HEAD => "$url");
		    $head = $ua->request($req);

		    $url =  $head->{_request}->{_uri};
		    $url =~ s/amp\;//g;
		    $url =~ s/amp\%3[Bb]//g;

		    $req = HTTP::Request->new(GET => "$url");
		    $res = $ua->request($req);

		    unless($res->is_success){
			print "$i\.txt: Not found $url.\n";
			print "---------------------------------\n";
			
			open(TXT, ">$dir/$i".'.txt');
			print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
			close(TXT);

			print LOG "URL: $url\n";
			print LOG "FILE: $i\.txt\n";
			print LOG "State: Not found the page. Only abstract is saved.\n";
			print LOG "---------------------------------\n\n";
			$notfound++;
			
			$url="not found";
			$frag = 1;
		    }
		    
########################################################
##  Springer                                           #
########################################################
		    
		    if($url =~ /\.springer\./){
			my $spfrag;

			$url =~ s/index.html//;
			@line3=split(/\n/, $res->content);
			foreach(@line3){
			    if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){
				$url2=$url.$1;
				$req = HTTP::Request->new(GET => "$url2");
				$res = $ua->request($req);
				last;
			    }
			    elsif(/Otherwise click \<a href\=\"\.\.\/\.\.(.*)\"\>here\!\<\/a\>\<\/p\>/){
				$tmp = $1;
				$url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/;

				$req = HTTP::Request->new(GET => "$url");
				$res = $ua->request($req);

				$url =~ s/index.html//;
				@line2=split(/\n/, $res->content);
				foreach(@line2){
				    if(/\<frame title\=\"Navigation\" name\=\"nav\" src\=\"(.*)\".*/){
					$url2=$url.$1;
					$req = HTTP::Request->new(GET => "$url2");
					$res = $ua->request($req);
					last;
				    }
				}
				last;
			    }
			    elsif(/\<a href\=\"\.\.\/\.\.(.*\.pdf)\"\>PDF/ || /\<a HREF\=\"\.\.\/\.\.(.*\.pdf)\"\>Article in PDF format/){
				$spfrag =1;
				$tmp=$1;
				$url =~ s/(.*\/journals\/\d+)\/.*/$1$tmp/;

				$req = HTTP::Request->new(GET => "$url");
				$res = $ua->request($req);
				
				if($res->is_success){
				    $frag = 1;
				    unless($res->content =~ /^\%PDF.*/){
					print "$i\.txt: Not permitted in $url.\n";
					print "---------------------------------\n";
					
					open(TXT, ">$dir/$i".'.txt');
					print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					close(TXT);
					print LOG "URL: $url\n";
					print LOG "FILE: $i\.txt\n";
					print LOG "State: Not permitted. Only abstract is saved.\n";
					print LOG "---------------------------------\n\n";
					$permit++;
					last;
				    }
				    
				    open(PDF, ">$dir/$i".'.pdf');
				    print PDF $res->content;
				    close(PDF);
				    
				    print "$i\.pdf: Download from $url was successful.\n";
				    print "---------------------------------\n";
				    
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.pdf\n";
				    print LOG "State: Download successfully.\n";
				    print LOG "---------------------------------\n\n";
				    $download++;
				    last;
				}
			    }
			}
			
			last if($spfrag == 1);

			if($res->content =~ /.*\"(.*\.pdf)\".*/){
			    $url2 = $url.$1;
			    $req = HTTP::Request->new(GET => "$url2");
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url2.\n";
				    print "---------------------------------\n";
				    
				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url2\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url2 was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url2\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

########################################################
##  Springer-ny                                        #
########################################################
		    
		    if($url =~ /\.springer-ny\./){
			$url =~ s/index\.html//;
			$url2=$url.'paper/index.html';
			
			$req = HTTP::Request->new(GET => "$url2");
			$res = $ua->request($req);
			
			if($res->is_success){
			    $frag = 1;
			    unless($res->content =~ /^\%PDF.*/){
				print "$i\.txt: Not permitted in $url2.\n";
				print "---------------------------------\n";

				open(TXT, ">$dir/$i".'.txt');
				print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				close(TXT);
				print LOG "URL: $url2\n";
				print LOG "FILE: $i\.txt\n";
				print LOG "State: Not permitted. Only abstract is saved.\n";
				print LOG "---------------------------------\n\n";
				$permit++;
				last;
			    }
			    
			    open(PDF, ">$dir/$i".'.pdf');
			    print PDF $res->content;
			    close(PDF);
			    
			    print "$i\.pdf: Download from $url2 was successful.\n";
			    print "---------------------------------\n";

			    print LOG "URL: $url2\n";
			    print LOG "FILE: $i\.pdf\n";
			    print LOG "State: Download successfully.\n";
			    print LOG "---------------------------------\n\n";
			    $download++;
			}
		    }

########################################################
##  Catchword                                          #
########################################################
		    
		    if($url =~ /\.catchword\./){
			my $catchword;
			if($res->content =~ /SRC\=\"(.*)\" NAME\=\"toolbar\"/){
			    $url='http://www.catchword.com/'.$1;
			
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    
			    @line3=split('\n', $res->content);
			    foreach(@line3){
				if($res->content =~ /\<a href\=\"(.*)\"alt\=\"full document\"/){
				    $catchword=1;
				    $url='http://www.catchword.com/'.$1;
				    
				    $req = HTTP::Request->new(GET => "$url");
				    $res = $ua->request($req);
			    	    
				    if($res->is_success){
					$frag = 1;
					unless($res->content =~ /^\%PDF.*/){
					    print "$i\.txt: Not permitted in $url.\n";
					    print "---------------------------------\n";
					    
					    open(TXT, ">$dir/$i".'.txt');
					    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					    close(TXT);
					    print LOG "URL: $url\n";
					    print LOG "FILE: $i\.txt\n";
					    print LOG "State: Not permitted. Only abstract is saved.\n";
					    print LOG "---------------------------------\n\n";
					    $permit++;
					    last;
					}
			    
					open(PDF, ">$dir/$i".'.pdf');
					print PDF $res->content;
					close(PDF);
					
					print "$i\.pdf: Download from $url was successful.\n";
					print "---------------------------------\n";
					
					print LOG "URL: $url\n";
					print LOG "FILE: $i\.pdf\n";
					print LOG "State: Download successfully.\n";
					print LOG "---------------------------------\n\n";
					$download++;
				    }
				}
			    }
			    if($catchword == 0){
				$frag = 1;
				print "$i\.txt: Not permitted in $url.\n";
				print "---------------------------------\n";
				
				open(TXT, ">$dir/$i".'.txt');
				print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				close(TXT);
				print LOG "URL: $url\n";
				print LOG "FILE: $i\.txt\n";
				print LOG "State: Not permitted. Only abstract is saved.\n";
				print LOG "---------------------------------\n\n";
				$permit++;
			    }
			}
		    }
		    
########################################################
##  InterScience                                       #
########################################################
		    
		    elsif($url =~ /\.interscience\./){
			if($res->as_string =~ /.*\"(.*\.pdf)\".*/){
			    $url2 = 'http://www3.interscience.wiley.com'.$1;
			    
			    $req = HTTP::Request->new(GET => "$url2");
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url2.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url2\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url2 was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url2\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

########################################################
##  Wiley                                              #
########################################################
		    
		    if($url =~ /doi\.wiley\.com/){
			if($res->content =~ /\<h1\>Error\<\/h1\>/){
			    print "$i\.txt: Not found $url.\n";
			    print "---------------------------------\n";
			    
			    open(TXT, ">$dir/$i".'.txt');
			    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
			    close(TXT);
			    
			    print LOG "URL: $url\n";
			    print LOG "FILE: $i\.txt\n";
			    print LOG "State: Not found the page. Only abstract is saved.\n";
			    print LOG "---------------------------------\n\n";
			    $notfound++;
			    
			    $url="not found";
			    $frag = 1;
			}	    
		    }
		    
########################################################
##  Synergy                                            #
########################################################
		    
		    elsif($url =~ /\.blackwell-synergy\./){
			if($res->as_string =~ /\<a href\=\"javascript\:newWindow\(\'(.*\.x\/pdf)\'.*/){
			    $url2 = 'http://www.blackwell-synergy.com'.$1;
			    
			    $req = HTTP::Request->new(GET => "$url2");
			    $res = $ua->request($req);
			    
			    if($res->as_string =~ /\<a href\=\"(.*pdf.*)\"\>/){
				$req = HTTP::Request->new(GET => "$1");
				$tmp = $1;
				$res = $ua->request($req);
				
				if($res->is_success){
				    $frag = 1;
				    unless($res->content =~ /^\%PDF.*/){
					print "$i\.txt: Not permitted in $tmp.\n";
					print "---------------------------------\n";

					open(TXT, ">$dir/$i".'.txt');
					print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					close(TXT);
					print LOG "URL: $tmp\n";
					print LOG "FILE: $i\.txt\n";
					print LOG "State: Not permitted. Only abstract is saved.\n";
					print LOG "---------------------------------\n\n";
					$permit++;
					last;
				    }
				    
				    open(PDF, ">$dir/$i".'.pdf');
				    print PDF $res->content;
				    close(PDF);
				    
				    print "$i\.pdf: Download from $tmp was successful.\n";
				    print "---------------------------------\n";

				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.pdf\n";
				    print LOG "State: Download successfully.\n";
				    print LOG "---------------------------------\n\n";
				    $download++;
				}
			    }
			}
		    }
		    
########################################################
##  EMBO                                               #
########################################################
		    
		    elsif($url =~ /\/\/emboj\./){
			if($res->as_string =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/){
			    $url = 'http://emboj.oupjournals.org'.$1;
			    $url =~ s/content/reprint/;
			    $url = $url.'.pdf';
			    
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

########################################################
##  JVMS                                               #
########################################################

                    elsif($url =~ /\/\/jvms\./){
                        if($res->content =~ /\<a href\=\"(.*)\"\>PDF/){
                            $url = 'http://jvms.jstage.jst.go.jp'.$1;
	
                            $req = HTTP::Request->new(GET => "$url");
                            $res = $ua->request($req);

                            if($res->is_success){
                                $frag = 1;
                                unless($res->content =~ /^\%PDF.*/){
                                    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";
				    
                                    open(TXT, ">$dir/$i".'.txt');
                                    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
                                    close(TXT);
                                    print LOG "URL: $url\n";
                                    print LOG "FILE: $i\.txt\n";
                                    print LOG "State: Not permitted. Only abstr\
act is saved.\n";
                                    print LOG "--------------------------------\
-\n\n";
                                    $permit++;
                                    last;
                                }

                                open(PDF, ">$dir/$i".'.pdf');
                                print PDF $res->content;
                                close(PDF);

                                print "$i\.pdf: Download from $url was successful.\n";
                                print "---------------------------------\n";

                                print LOG "URL: $url\n";
                                print LOG "FILE: $i\.pdf\n";
                                print LOG "State: Download successfully.\n";
                                print LOG "---------------------------------\n\n";
				$download++;
                            }
                        }
                    }

########################################################
##  J Biol Chem, J Clinical Inv and Neurology          #
########################################################

		    elsif($url =~ /\/\/(www\.jbc\.org)/ || $url =~ /\/\/(www\.jci\.org)/ || $url =~ /\/\/(www\.neurology\.org)/ || $url =~ /\/\/(circ\.ahajournals\.org)/ || $url =~ /\/\/(www\.pnas\.org)/ || $url =~ /\/\/(www\.fasebj\.org)/ || $url =~ /\/\/(www\.jneurosci\.org)/ || $url =~ /\/\/(bioinformatics\.oupjournals\.org)/){
			$tmp=0;
			$tmp2=$1;
			if($res->content =~ /window\.location \= \"(.*)\"\;/){
			    $url='http://'.$tmp2.$1;
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    $tmp=1;
			}
			elsif($res->content =~ /\<A HREF\=\"(.*)\"\>Reprint \(PDF\)/ || $res->content =~ /\<A HREF\=\"(.*)\"\>Screen \(PDF\)/){
			    $url='http://'.$tmp2.$1.'.pdf';
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);
			    $tmp=1;
			}

			if($tmp == 1){
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

########################################################
##  Nature                                             #
########################################################

		    elsif($url =~ /\/\/www\.nature\.com/){
			if($res->content =~ /Full text.*\"(.*)\"\>PDF/){
			    $url='http://www.nature.com'.$1;
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);

			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

########################################################
##  Portlandpress                                      #
########################################################

		    elsif($url =~ /\/\/cs\.portlandpress\.com/){
			if($res->content =~ /\<A class\=\"sidelinks\" HREF\=\"(.*\.pdf)\"\>\<img src/){
			    $url='http://cs.portlandpress.com'.$1;
			    $req = HTTP::Request->new(GET => "$url");
			    $res = $ua->request($req);

			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $url.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $url\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $url was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $url\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }

########################################################
##  Elsevier                                           #
########################################################
		    
		    elsif($url =~ /\/\/linkinghub\.elsevier\.com/){
			if($res->content =~ /\<a HREF\=\"(.*)\"\>\<img border.*src\=\"http\:\/\/www\.sciencedirect\.com\//){
			    $tmp = $1;
			    $tmp =~ s/amp\;//g;
			    $req = HTTP::Request->new(GET => "$tmp");
			    $res = $ua->request($req);
			}

			if($res->content =~ /.*\"(.*\.pdf)\".*/){
			    $req = HTTP::Request->new(GET => "$1");
			    $tmp = $1;
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $tmp.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $tmp was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $tmp\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
			else{
			    $frag=1;
			    
			    print "$i\.txt: Not permitted in $tmp.\n";
			    print "---------------------------------\n";
			    
			    open(TXT, ">$dir/$i".'.txt');
			    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
			    close(TXT);
			    print LOG "URL: $tmp\n";
			    print LOG "FILE: $i\.txt\n";
			    print LOG "State: Not permitted. Only abstract is saved.\n";
			    print LOG "---------------------------------\n\n";
			    $permit++;
			    last;
			} 
		    }

########################################################
##  ScienceDirect and Others                           #
########################################################
		    
		    else{
			if($res->as_string =~ /.*\"(.*\.pdf)\".*/){
			    $tmp = $1;
			    $tmp =~ s/UADB\/xppview\/// if($url =~ /\.acs\.org\//);
			    $req = HTTP::Request->new(GET => "$tmp");
			    $res = $ua->request($req);

			    if($res->is_success){
				if($res->content =~ /^\%PDF.*/){
				    $frag = 1;
				    open(PDF, ">$dir/$i".'.pdf');
				    print PDF $res->content;
				    close(PDF);
				    
				    print "$i\.pdf: Download from $tmp was successful.\n";
				    print "---------------------------------\n";

				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.pdf\n";
				    print LOG "State: Download successfully.\n";
				    print LOG "---------------------------------\n\n";
				    $download++;
				}
				else{
				    if($url =~ /(http\:\/\/.*?)\/.*/){
					$tmp=$1.$tmp;
				    }
				    $req = HTTP::Request->new(GET => "$tmp");
				    $res = $ua->request($req);
				    
				    if($res->is_success){
					$frag = 1;
					unless($res->content =~ /^\%PDF.*/){
					    print "$i\.txt: Not permitted in $tmp.\n";
					    print "---------------------------------\n";

					    open(TXT, ">$dir/$i".'.txt');
					    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
					    close(TXT);
					    print LOG "URL: $tmp\n";
					    print LOG "FILE: $i\.txt\n";
					    print LOG "State: Not permitted. Only abstract is saved.\n";
					    print LOG "---------------------------------\n\n";
					    $permit++;
					    last;
					}
					
					open(PDF, ">$dir/$i".'.pdf');
					print PDF $res->content;
					close(PDF);
					
					print "$i\.pdf: Download from $tmp was successful.\n";
					print "---------------------------------\n";

					print LOG "URL: $tmp\n";
					print LOG "FILE: $i\.pdf\n";
					print LOG "State: Download successfully.\n";
					print LOG "---------------------------------\n\n";
					$download++;
				    }
				}
			    }
			}
			elsif($res->as_string =~ /.*\"(http\:\/\/.*pdf.*)\".*/){
			    $req = HTTP::Request->new(GET => "$1");
			    $tmp = $1;
			    $res = $ua->request($req);
			    
			    if($res->is_success){
				$frag = 1;
				unless($res->content =~ /^\%PDF.*/){
				    print "$i\.txt: Not permitted in $tmp.\n";
				    print "---------------------------------\n";

				    open(TXT, ">$dir/$i".'.txt');
				    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
				    close(TXT);
				    print LOG "URL: $tmp\n";
				    print LOG "FILE: $i\.txt\n";
				    print LOG "State: Not permitted. Only abstract is saved.\n";
				    print LOG "---------------------------------\n\n";
				    $permit++;
				    last;
				}
				
				open(PDF, ">$dir/$i".'.pdf');
				print PDF $res->content;
				close(PDF);
				
				print "$i\.pdf: Download from $tmp was successful.\n";
				print "---------------------------------\n";

				print LOG "URL: $tmp\n";
				print LOG "FILE: $i\.pdf\n";
				print LOG "State: Download successfully.\n";
				print LOG "---------------------------------\n\n";
				$download++;
			    }
			}
		    }
		}
	    }
	    if($frag == 0 & $frag2 == 1){
		if($url =~ /.*\.sciencedirect\..*/){
		    print "$i\.txt: Not permitted in $url.\n";
		    print "---------------------------------\n";
		    
		    open(TXT, ">$dir/$i".'.txt');
		    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
		    close(TXT);
		    print LOG "URL: $url\n";
		    print LOG "FILE: $i\.txt\n";
		    print LOG "State: Not permitted. Only abstract is saved.\n";
		    print LOG "---------------------------------\n\n";
		    $permit++;
		}
		else{
		    print "$i\.txt: Invalid format from $url.\n";
		    print "---------------------------------\n";

		    open(TXT, ">$dir/$i".'.txt');
		    print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
		    close(TXT);
		    print LOG "URL: $url\n";
		    print LOG "FILE: $i\.txt\n";
		    print LOG "State: Invalid format. Only abstract is saved.\n";
		    print LOG "---------------------------------\n\n";
		    $invalid++;
		}
	    }
	    elsif($frag == 0 & $frag2 == 0){
		print "$i\.txt: There is no PDF document.\n";
		print "---------------------------------\n";

		open(TXT, ">$dir/$i".'.txt');
		print TXT "$journal\n$title\n$authors\n$affiliation\n$abstract\n$pmid\n";
		close(TXT);
		print LOG "URL:\n";
		print LOG "FILE: $i\.txt\n";
		print LOG "State: There is no PDF document. Only abstract is saved.\n";
		print LOG "---------------------------------\n\n";
		$nopdf++;
	    }
	}
    }
    print "Searched from ".$i." papers.","\n";
    print "Downloaded: $download\n";
    print "Not permitted: $permit\n";
    print "Not found: $notfound\n";
    print "Invalid format: $invalid\n";
    print "No PDF: $nopdf\n";
    if($i-$nopdf != 0){
	print sprintf("%d",$download/($i-$nopdf)*100)."\% of papers have been downloaded.\n\n";
    }
    else{
	print "0% of papers have been downloaded.\n\n";
    }

    print "Directory is\" $dir\".\n";
    print "Log file is\" $dir\/$dir\.log\"\n";
    print "Key file is\" $dir\/$dir\.key\"\n" if($key);

    print LOG "Total: $i\n";
    print LOG "Downloaded: $download\n";
    print LOG "Not permitted: $permit\n";
    print LOG "Not found: $notfound\n";
    print LOG "Invalid format: $invalid\n";
    print LOG "No PDF: $nopdf\n";
    if($i-$nopdf != 0){
	print LOG "Accuracy: ".sprintf("%d",$download/($i-$nopdf)*100)."\%\n";
    }
    else{
	print LOG "Accuracy: 0%\n";
    }
    close(LOG);

    if($key){
	KeySearch($dir, $key);
    }

}

WordCount

description

top

sub WordCount {

    &opt_default();
    my @args=opt_get(@_);

    my $file=shift @args;
    my $query=shift @args;
    my $count;

    open(FILE,"$file");
    while(<FILE>){
	$count+=s/${query}/${query}/g;
    }

    return $count;

}

new

description

top

sub new {

    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;

}

General documentation

AUTHOR	top
A. U. Thor, a.u.thor@a.galaxy.far.far.away
SEE ALSO	top
perl(1).