#!/usr/bin/perl # #Written by Paul Stothard, Canadian Bioinformatics Help Desk #based on a script by Oleg Khovayko http://olegh.spedia.net # #Modified by S.R. Santos, Sept. 2004 # #Modified by S.R. Santos, Aug. 2007 # #This script uses NCBI's Entrez Programming Utilities to perform #batch requests to NCBI Entrez. # #See 'Entrez Programming Utilities' for more info at #http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html use warnings; use strict; use LWP::Simple; print "\nWhich database will be searched? \n"; print "The following values are supported: pubmed protein nucleotide\n"; print "nuccore nucgss nucest structure genome books\n"; print "cancerchromosomes cdd domains gene genomeprj gensat geo gds\n"; print "homologene journals mesh ncbisearch nlmcatalog omia omim pmc\n"; print "popset probe pcassay pccompound pcsubstance snp swissprot\n"; print "taxonomy unigene unists.\n"; print "> "; chomp (my $db = ); print "\nWhat is the query to search for? \n"; print "Ex. genus and/or species name[for seq data], author[for pubmed], ect.\n"; print "Qualifiers can be used to restrict a search to a certain field (enclosed by [ ])\n"; print "See NCBI for details.\n"; print "> "; chomp (my $query = ); print "\nWhat type of report should be generated? \n"; print "Ex. summary [all databases], fasta/genbank [for seq data], abstract [for pubmed].\n"; print "> "; chomp (my $report = ); print "\nWhat will be the name of the output file? \n"; print "Hint: use a descriptive extension; e.g., .summary, .fasta, .abstract\n"; print "> "; chomp (my $output_file = ); if ($db eq '' or $query eq '' or $report eq '' or $output_file eq '') { print "\nThere were missing input fields. Program terminating due to error(s)!\n\n"; exit; } my $url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils"; my $esearch = "$url/esearch.fcgi?" . "db=$db&retmax=1&usehistory=y&term="; my $esearch_result = get($esearch . $query); $esearch_result =~ m/(\d+)<\/Count>.*(\d+)<\/QueryKey>.*(\S+)<\/WebEnv>/s; my $count = $1; my $query_key = $2; my $web_env = $3; my $retstart; my $retmax = 500; open (OUTFILE, ">" . $output_file) or die ("Error: Cannot open $output_file : $!"); print "\n$count entries to retrieve\n\n"; print "Would you like to continue and save these entries? (y will continue, all others will terminate)\n"; print "> "; chomp (my $response = ); if ($response eq 'y') { for ($retstart = 0; $retstart < $count; $retstart = $retstart + $retmax) { print "\nRequesting download of entries $retstart to " . ($retstart + $retmax) . "\n\n"; my $efetch = "$url/efetch.fcgi?" . "rettype=$report&retmode=text&retstart=$retstart&retmax=$retmax&" . "db=$db&query_key=$query_key&WebEnv=$web_env"; my $efetch_result = get($efetch); print (OUTFILE $efetch_result); sleep(3); } close (OUTFILE) or die ("Error: Cannot close $output_file file: $!"); print "Download has completed. Goodbye!\n\n"; } else { print "\nNCBI search/download has been terminated.\n\n"; exit; }