#!/bin/bash
# Script by S.R. Santos; 02 Dec 2007
# Script for downloading sequences from NCBI in FASTA format.
# You will need a file of accession numbers (one accession number per line) for this script

# Ask for the input file name

printf "\nWhat is the name of the input file?  "
read INPUT_FILE

# Sanity check for input file

if test ! -f ./$INPUT_FILE ; then
  printf "\nNo matching input file name!!!  Die!!!\n\n"
  exit 192
fi  

# Ask for the output file name

printf "\nWhat would you like to name the output file?  "
read OUTPUT_FILE

# Now the hard work: cat the accessions to xargs, which individually adds them to the query line, curl download each sequence and adds it to a file
# NOTE: if you run this script a second time using the same output file name in the same location, the previous output will be clobbered!!!

cat $INPUT_FILE | xargs -I % curl "http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=protein&list_uids=%&dopt=fasta&sendto=t" > ${OUTPUT_FILE}.fasta

# Exit message

printf "\n\nDownloading of sequences completed.  Goodbye!!\n\n"