\$::DESC_LEN, # maximum description length
'format=i' => \$::FORMAT, # output format
help => \$::HELP, # print full help to STDOUT
html => \$::HTML, # output HTML hdr & trlr in some formats
long => \$::LONG_QUERY, # output full query name, not 1st word
'expect=f' => \$::MAX_EXPECT, # only keep hits & HSPs with E-value <= $::MAX_EXPECT
'score=f' => \$::MIN_SCORE, # only keep hits & HSPs with score >= $::MIN_SCORE
bits => \$::BITS_SCORE, # output bits value, instead of score
'numhsps=i' => \$::NUM_HSPS, # only examine top $::NUM_HSPS hits
'sort=s' => \$::SORT_TYPE, # is output sorted and how?
timelogic => \$::TIMELOGIC, # input file(s) are TimeLogic
# tab-delimited
tophsp => \$::TOP_HSP, # use only top HSP for each hit?
make_match => \$::MAKE_MATCH, # make dummy HSPs for non-matching queries
verbose => \$::VERBOSE, # print progress msgs to STDERR
);
die $::USAGE unless( GetOptions(@opts) );
&display_more_help if ($::HELP);
if ($::FORMAT !~ /^\d+$/ || $::FORMAT < 0 || $::FORMAT > $::MAX_FORMATS)
{
die "\nInvalid value for -format '$::FORMAT'\n$::USAGE";
}
#elsif ($::FORMAT == 1) # select format for write, if needed
# {
# select((select(STDOUT), $~ = "BLAST2TABLE")[0]);
# }
#elsif ($::FORMAT == 2)
# {
# select((select(STDOUT), $~ = "BLAST2TABLE_BLASTN_HTML")[0]);
# }
#elsif ($::FORMAT == 3)
# {
# select((select(STDOUT), $~ = "BLAST2TABLE_BLASTX_HTML")[0]);
# }
elsif ($::FORMAT == 4)
{
select((select(STDOUT), $~ = "BLAST2TABLE_EST")[0]);
}
if ($::HTML && $::FORMAT > 4)
{
die "\nCannot use -html, except with -format 1, 2, 3, or 4\n$::USAGE";
}
$::SORT_SUB = '';
if ($::SORT_TYPE =~ /^e/i)
{
$::SORT_SUB = \&by_expect;
}
elsif ($::SORT_TYPE =~ /^p/i)
{
$::SORT_SUB = \&by_pos;
}
elsif ($::SORT_TYPE =~ /^s/i)
{
$::SORT_SUB = \&by_score;
}
elsif ($::SORT_TYPE ne '')
{
die "\nInvalid -sort type '$::SORT_TYPE'\n$::USAGE";
}
if ($::DESC_LEN !~ /^\d+$/)
{
die "\nInvalid value for -desc_len '$::DESC_LEN'\n$::USAGE";
}
$::total_hsps = $::total_hits = $::total_queries = $::total_files =
$::html_header_printed = $::dummy_hsps= 0;
my @hsp_list = ();
# Allow reading extra HSPss before sorting and -numhsps limiting, if -sort
# specified
$::NUM_HSPS_EXTRAS = $::NUM_HSPS + ($::SORT_SUB ? $::extra_HSPs : 0);
push @ARGV, 'Standard Input' unless @ARGV; # Handle STDIN if no files
# process list of file names from command line
foreach my $file (@ARGV)
{
my ($searchin, $query, $infile);
$::file_queries = 0;
$::total_files++;
if ($::TIMELOGIC)
{
if ($file eq 'Standard Input')
{
$infile = 'STDIN';
}
else
{
# open the tab-delimited input file
unless (defined(open(INFILE, "<$file")))
{
print STDERR "cannot open input tab-delimited file '$searchin',\n" .
" skipping this file, $!\n";
next;
}
$infile = 'INFILE';
}
process_file_tab($infile);
close(INFILE) unless $infile eq 'STDIN';
}
else # (!$::TIMELOGIC) # use BioPerl to parse Blast output file
{
# create the blast object and get the parsed list
if ($file eq 'Standard Input')
{
$searchin = new Bio::SearchIO( -format => 'blast', -fh => \*STDIN );
}
else
{
$searchin = new Bio::SearchIO( -format => 'blast', -file => $file );
}
# $searchin should be a Bio::SearchIO object
unless ($searchin)
{
print STDERR "Call to new Bio::SearchIO failed for file: '$file'\n";
next;
}
while ( $query = $searchin->next_result() ) # next query within output file
{
# $query is a Bio::Search::Result::ResultI object
$::file_queries++;
process_blast($query);
}
}
print STDERR " $::file_queries blast queries parsed from file: $file\n"
if $::VERBOSE;
} # end foreach $file (@ARGV)
if ($::total_queries)
{
if ($::HTML && ($::FORMAT == 2 || $::FORMAT == 3))
{
print "\n";
print "\n";
$::html_header_printed = 1;
}
if ($::SORT_SUB)
{
my @temp_hsps = @hsp_list;
@hsp_list = sort $::SORT_SUB @temp_hsps;
}
# print the table
foreach my $hsp (@hsp_list)
{
&output_hsp($hsp)
}
if ($::html_header_printed)
{
print "\n";
print "\n";
}
}
if ($::VERBOSE)
{
print STDERR "\n$::total_files files containing $::total_queries queries were found with $::total_hsps HSPs for $::total_hits hits\n";
print STDERR "$::dummy_hsps additional dummy HSPs were created for non-matching queries\n"
if ($::MAKE_MATCH);
}
###########################################################################
# process blast object from query
###########################################################################
sub process_blast
{
my ($bl) = @_; # $bl is a Bio::Search::Result::ResultI object
my $hit;
my (@temp_hsp_list, @sorted_hsps);
$::total_queries++;
my $hsps = 0;
my $hits = 0;
# my $date = $bl->date(); # from Bio::Tools::SeqAnal.pm
# $date ||= '';
# my $program = $bl->algorithm(); # BLASTN, BLASTP, ...
# my $DB = $bl->database_name();
# Short or long query name? query_accession is first word of query_name
my $seq1name = $::LONG_QUERY ? $bl->query_name() : $bl->query_accession();
my $seq1len = $bl->query_length(); # length of query sequence
my $seq2len = 0; # we don't know seq2len yet
my $hsplen = 0; # or hsplen
my $fake_hsp = 0;
while ($hit = $bl->next_hit()) # returns Bio::Search::Hit::HitI objects
{
$hits++;
# max num of hits/query before sorting and limiting
last if $::NUM_HSPS && $hsps >= $::NUM_HSPS_EXTRAS;
my $seq2name = $hit->name();
$seq2len = $hit->length(); # length of hit sequence
$hsplen = 0; # we don't hsplen know yet
my $acc = $hit->accession();
my $description = $hit->description() || $acc;
# truncate descriptions that are too long
substr($description, $::DESC_LEN) = ' ...'
if ($::DESC_LEN && length($description) > $::DESC_LEN);
my $score = $hit->raw_score();
# my $bits = $hit->bits(); # There is no bits() for a Hit
my $evalue = $hit->significance();
$evalue =~ s/^([eE][-+]?\d+)$/1$1/; # 'E-123' => '1E-123'
# $evalue =~ s{(\d+)\.[5-9]\d*([eE])}
# {sprintf"%d$3", $2 + 1}e; # '6.78e-90' => '7e-90'
# $evalue =~ s/\.\d*([eE])/$1/e; # '1.23E-45' => '1E-45'
next if ($evalue > $::MAX_EXPECT ||
($score < $::MIN_SCORE && ! $::BITS_SCORE));
my $hsp;
while ($hsp = $hit->next_hsp()) # $hsp is a Bio::Search::HSP::HSPI object
{
last if $::NUM_HSPS && $hsps >= $::NUM_HSPS_EXTRAS;
my $score = $hsp->score();
my $bits = $hsp->bits();
$score = $bits if $::BITS_SCORE; # return bits, not score?
my $evalue = $hsp->evalue();
$evalue =~ s/^([eE][-+]?\d+)$/1$1/; # 'E-123' => '1E-123'
# $evalue =~ s{(\d+)\.[5-9]\d*([eE])}
# {sprintf"%d$3", $2 + 1}e; # '6.78e-90' => '7e-90'
# $evalue =~ s/\.\d*([eE])/$1/e; # '1.23E-45' => '1E-45'
# filter out poor hsps
next if ($score < $::MIN_SCORE || $evalue > $::MAX_EXPECT);
$hsps++;
my $query_object = $hsp->query(); # a Bio::SeqFeature::Similarity object,
my $hit_object = $hsp->hit(); # is also a Bio::SeqFeature::Generic obj
$hsplen = $hsp->length(); # length of alignment
my $seq1beg = $query_object->start();
my $seq1end = $query_object->end();
my $qstrand = $query_object->strand();
my $qframe = $query_object->frame();
my $seq2beg = $hit_object->start();
my $seq2end = $hit_object->end();
my $strand = $hit_object->strand();
my $frame = $hit_object->frame();
$query_object = $hit_object = undef;
my $identity = sprintf "%.0f", $hsp->frac_identical * 100;
my $similarity = sprintf "%.0f", $hsp->frac_conserved * 100;
push @temp_hsp_list,
[$evalue, $score,
$seq1beg, $seq1end, $seq1name, $qstrand, $qframe,
$seq2beg, $seq2end, $seq2name, $strand, $frame,
$description, $identity, $similarity,
$seq1len, $seq2len, $hsplen];
last if $::TOP_HSP; # skip rest of HSPs for this hit?
} # end while ($hsp = ... )
$hsp = undef;
} # end while ($hit = ... )
if ($::MAKE_MATCH && ! @temp_hsp_list)
{
# if $::MAKE_MATCH and we found no HSPs, fake up a dummy, very
# bad, HSP to record the fact that $seq1name was searched for in
# the database, but no match was found.
# push @temp_hsp_list, [$evalue, $score,
# $seq1beg, $seq1end, $seq1name, $qstrand, $qframe,
# $seq2beg, $seq2end, $seq2name, $strand, $frame,
# $description, $identity, $similarity,
# $seq1len, $seq2len, $hsplen];
push @temp_hsp_list, [99, 0,
0, 0, $seq1name, 0, '.',
0, 0, '-No-Hit-', 0, '.',
'No Matching Hit found', 0, 0,
$seq1len, 0, 0];
$fake_hsp = 1;
}
if ($::SORT_SUB)
{
@sorted_hsps = sort $::SORT_SUB @temp_hsp_list;
if ($::NUM_HSPS && (scalar @sorted_hsps) > $::NUM_HSPS)
{
@temp_hsp_list = splice @sorted_hsps, 0, $::NUM_HSPS;
}
else
{
@temp_hsp_list = @sorted_hsps;
}
}
$hsps = scalar @temp_hsp_list;
push @hsp_list, @temp_hsp_list; # add new HSPs to global list
$::total_hits += $hits;
if ($fake_hsp)
{
print STDERR " For query=$seq1name, 1 fake HSP was created for $hits hit\n"
if ($::VERBOSE);
$::dummy_hsps++;
}
else
{
print STDERR " For query=$seq1name, $hsps HSPs were found for $hits hits\n"
if ($::VERBOSE);
$::total_hsps += $hsps;
}
$bl = $hit = undef;
} # end process_blast
###########################################################################
# process blast info from tab-delimited input file
###########################################################################
sub process_file_tab
{
my ($infile) = @_; # $infile is the file handle to read
my (@temp_hsp_list, $hsp);
my ($hsps, $hits, $skipping_hsps, $fake_hsp) = (0) x 4;
my ($seq1beg, $seq1end, $seq1name, $qstrand, $qframe,
$seq2beg, $seq2end, $seq2name, $strand, $frame,
$acc, $description, $identity, $similarity,
$seq1len, $seq2len, $hsplen);
my ($old_query, $old_hit, $query, $hit);
$old_query = ' force new query ';
$seq1len = 0; # we don't have this information for this type of file
$seq2len = 0; # *
$hsplen = 0; # *
while (defined($hsp = <$infile>)) # read next input line
{
chomp($hsp);
# split out TimeLogic fields from input line
my($status, $domainsignificance, $domainscore,
$querystart, $queryend, $queryaccession, $querytext,
$queryframe, $targetstart, $targetend, $targetlocus,
$targetframe, $targetdescription, $targetaccession,
$percentalignment, $simpercentalignment) = split ("\t", $hsp);
# check for valid input line
# if ($status ne 'OK')
# {
# print STDERR "unusual tab-delimited input line, status='$status'\n '$hsp'\n";
# }
# convert TimeLogic fields to old Blast2table fields
$queryaccession ||= $querytext; # make sure we have a query accession
$query = $queryaccession . $querytext;
$hit = $targetlocus . $targetdescription . $targetaccession;
# check for new query
if ($query ne $old_query)
{
$::file_queries++;
$::total_queries++;
process_query_tab($hsps, $hits, $seq1name, \@temp_hsp_list, $seq1len)
if ($old_query ne ' force new query ');
$skipping_hsps = $hsps = $hits = 0;
$seq1name = '';
@temp_hsp_list = ();
$old_query = $query;
$old_hit = ' force new hit ';
}
$seq1name = $::LONG_QUERY ? $querytext : $queryaccession;
# skip to next input line if not OK, probably no hits for query
next if ($status ne 'OK');
# check for new hit
if ($hit ne $old_hit)
{
$hits++;
$old_hit = $hit;
}
# max num of hits/query before sorting and limiting
$skipping_hsps = 1 if ($::NUM_HSPS && $hsps >= $::NUM_HSPS_EXTRAS);
next if $skipping_hsps;
$skipping_hsps = 1 if $::TOP_HSP; # skip rest of HSPs for this hit?
# continue converting TimeLogic fields to old Blast2table fields
my $evalue = $domainsignificance;
$evalue =~ s/^([eE][-+]?\d+)$/1$1/; # 'E-123' => '1E-123'
my $bits = $domainscore; # this is always a bit score, so score
my $score = $bits; # is bits, if -bits is used or not
$seq1beg = $querystart;
$seq1end = $queryend;
$qframe = $queryframe;
$qframe = +1 if $qframe eq 'D';
$qframe = -1 if $qframe eq 'C';
$qstrand = $qframe > 0 ? +1 : $qframe < 0 ? -1 : 0;
$seq2beg = $targetstart;
$seq2end = $targetend;
$seq2name = $targetlocus;
$frame = $targetframe;
$frame = +1 if $frame eq 'D';
$frame = -1 if $frame eq 'C';
$strand = $frame > 0 ? +1 : $frame < 0 ? -1 : 0;
$acc = $targetaccession;
$description = $targetdescription || $acc;
# truncate descriptions that are too long
substr($description, $::DESC_LEN) = ' ...'
if ($::DESC_LEN && length($description) > $::DESC_LEN);
$identity = $percentalignment;
$similarity = $simpercentalignment;
# filter out poor hsps
next if ($score < $::MIN_SCORE || $evalue > $::MAX_EXPECT);
$hsps++;
# save this hsp
push @temp_hsp_list,
[$evalue, $score,
$seq1beg, $seq1end, $seq1name, $qstrand, $qframe,
$seq2beg, $seq2end, $seq2name, $strand, $frame,
$description, $identity, $similarity,
$seq1len, $seq2len, $hsplen];
} # while (defined($hsp = <$infile>)) # read next input line
process_query_tab($hsps, $hits, $seq1name, \@temp_hsp_list, $seq1len);
$skipping_hsps = $hsps = $hits = 0;
$seq1name = '';
@temp_hsp_list = ();
$old_query = ' force new query ';
$old_hit = ' force new hit ';
} # end process_file_tab
###########################################################################
# process query info from tab-delimited input file
###########################################################################
sub process_query_tab
{
my ($hsps, $hits, $seq1name, $temp_hsp_list_ref, $seq1len) = @_;
my (@temp_hsp_list, @sorted_hsps);
my $fake_hsp = 0;
if ($::MAKE_MATCH && ! scalar @{$temp_hsp_list_ref})
{
# if $::MAKE_MATCH and we found no HSPs, fake up a dummy, very
# bad, HSP to record the fact that $seq1name was searched for in
# the database, but no match was found.
# push @temp_hsp_list, [$evalue, $score,
# $seq1beg, $seq1end, $seq1name, $qstrand, $qframe,
# $seq2beg, $seq2end, $seq2name, $strand, $frame,
# $description, $identity, $similarity,
# $seq1len, $seq2len, $hsplen];
push @{$temp_hsp_list_ref}, [99, 0,
0, 0, $seq1name, 0, '.',
0, 0, '-No-Hit-', 0, '.',
'No Matching Hit found', 0, 0,
, $seq1len, 0, 0];
$fake_hsp = 1;
}
if ($::SORT_SUB)
{
@sorted_hsps = sort $::SORT_SUB @{$temp_hsp_list_ref};
if ($::NUM_HSPS && (scalar @sorted_hsps) > $::NUM_HSPS)
{
@temp_hsp_list = splice @sorted_hsps, 0, $::NUM_HSPS;
}
else
{
@temp_hsp_list = @sorted_hsps;
}
}
else
{
@temp_hsp_list = @{$temp_hsp_list_ref};
}
push @hsp_list, @temp_hsp_list; # add new HSPs to global list
$::total_hits += $hits;
if ($fake_hsp)
{
print STDERR " For query=$seq1name, 1 fake HSP was created for $hits hits\n"
if ($::VERBOSE);
$::dummy_hsps++;
}
else
{
print STDERR " For query=$seq1name, $hsps HSPs were found for $hits hits\n"
if ($::VERBOSE);
$::total_hsps += scalar @temp_hsp_list;
$hsps = 0;
}
@temp_hsp_list = ();
} # end process_query_tab
###########################################################################
# output_hsp - Output an HSP in a variety of formats
###########################################################################
sub output_hsp
{
my ($hsp) = @_;
my( $evalue, $score,
$seq1beg, $seq1end, $seq1name, $qstrand, $qframe,
$seq2beg, $seq2end, $seq2name, $strand, $frame,
$description, $identity, $similarity,
$seq1len, $seq2len, $hsplen ) = @{ $hsp };
my($id, $giNum);
($seq1beg, $seq1end) = ($seq1end, $seq1beg)
if ($qstrand < 0 && $seq1beg < $seq1end);
($seq2beg, $seq2end) = ($seq2end, $seq2beg)
if ($strand < 0 && $seq2beg < $seq2end);
if ($::FORMAT == 1)
{
$seq2name = &HTML_link($seq2name, 's') if $::HTML;
# write STDOUT;
# format BLAST2TABLE =
# @#### @<<<<<<<< @<<<<<<<<<< @###### @###### @<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
printf STDOUT "%5d %-9s %-11s %7d %7d %-20s %s\n",
$score, $evalue, $seq1name, $seq1beg, $seq1end, $seq2name,
$description;
# .
}
elsif ($::FORMAT == 2) # Blast2table_blastn_html
{
$seq2name =~ s/^gi\|g/gi\|/;
$seq2name = &HTML_link($seq2name, 'n') if $::HTML;
# write STDOUT;
# format BLAST2TABLE_BLASTN_HTML =
# @#### @<<<<<<<< @<<<<<<<<<< @###### @###### @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
printf STDOUT "%5d %-9s %-11s %7d %7d %-93s %s\n",
$score, $evalue, $seq1name, $seq1beg, $seq1end, $seq2name,
$description;
# .
}
elsif ($::FORMAT == 3) # Blast2table_blastx_html
{
($id, $giNum) = split (/\|/, $seq2name);
($id, $id, $giNum) = split (/\|/, $seq2name) if (! $giNum);
$seq2name = &HTML_link($giNum, 'p_r') if ($::HTML && $giNum);
# write STDOUT;
# format BLAST2TABLE_BLASTX_HTML =
# @#### @<<<<<<<< @<<<<<<<<<< @###### @###### @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
printf STDOUT "%5d %-9s %-11s %7d %7d %-93s %s\n",
$score, $evalue, $seq1name, $seq1beg, $seq1end, $seq2name,
$description;
# .
}
elsif ($::FORMAT == 4) # Blast2table_est
{
$seq2name = &HTML_link($seq2name, 's') if ($::HTML);
my $hit = 1;
write STDOUT;
format BLAST2TABLE_EST =
@<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<< @### @<<<<<<<< @<
$seq2name, $description, $frame, $score, $evalue, $hit
.
}
elsif ($::FORMAT == 5) # exbldb output, like "MSPcrunch -d"
{
printf STDOUT "%4d %.2f %5d %5d %10s %5d %5d %s %s\n",
$score, $identity, $seq1beg, $seq1end, $seq1name,
$seq2beg, $seq2end, $seq2name, $description;
}
elsif ($::FORMAT == 6) # Blast2table_jdw format with hit
{ # positions/direction
printf "%4d %-9s %-30s %7d %7d %-30s %7d %7d %s\n",
$score, $evalue, substr($seq1name, 0, 30),
$seq1beg, $seq1end, substr($seq2name, 0, 30),
$seq2beg, $seq2end, substr($description, 0, 90);
}
elsif ($::FORMAT == 7) # tab-delimited table
{
print join("\t", $score, $evalue, "$identity%", "$similarity%",
$seq1name, $seq1beg, $seq1end,
$seq2name, $seq2beg, $seq2end, $description),
"\n";
}
elsif ($::FORMAT == 8) # tab-delimited table
{
print join("\t", $score, $evalue,
$hsplen, "$identity%", "$similarity%",
$seq1name, $seq1len, $seq1beg, $seq1end,
$seq2name, $seq2len, $seq2beg, $seq2end, $description),
"\n";
}
elsif ($::FORMAT eq '0') # tab-delimited table
{
print join("\t", "qs=$qstrand", "qf=$qframe", "ss=$strand",
"sf=$frame", $score, $evalue, $seq1name, $seq1beg,
$seq1end, $seq2name, $seq2beg, $seq2end), "\n";
}
else
{
die "\nInvalid -format '$::FORMAT'\n$::USAGE";
}
} # end output_hsp
###########################################################################
# convert blast hit id into HTML hot link
###########################################################################
sub HTML_link
{
my($seq2name, $db) = @_;
"$seq2name";
} # end HTML_link
###########################################################################
# by_expect - sort comparison routine to sort by increasing expect value
# or by decreasing score if expect values match
###########################################################################
sub by_expect
{
my($evaluea, $scorea) = @{ $a };
my($evalueb, $scoreb) = @{ $b };
$evaluea <=> $evalueb or $scoreb <=> $scorea;
} # end by_expect
###########################################################################
# by_pos - sort comparison routine to sort by hit position within query
###########################################################################
sub by_pos
{
my($evaluea, $scorea, $seq1bega, $seq1enda, $seq1namea) = @{ $a };
my($evalueb, $scoreb, $seq1begb, $seq1endb, $seq1nameb) = @{ $b };
$seq1namea cmp $seq1nameb or # query name
$seq1bega <=> $seq1begb or # then beginning base position
$seq1enda <=> $seq1endb; # then ending base position
} # end by_pos
###########################################################################
# by_score - sort comparison routine to sort by decreasing score
# or by increasing expect value if scores match
###########################################################################
sub by_score
{
my($evaluea, $scorea) = @{ $a };
my($evalueb, $scoreb) = @{ $b };
$scoreb <=> $scorea or $evaluea <=> $evalueb;
} # end by_score
###########################################################################
# display full help info
###########################################################################
sub display_more_help
{
print STDOUT <= 'min_score', if '-score min_score' is
used.
-desc_len Specify the maximum length of
hit description to be output. Descriptions longer than
are truncated and have " ..." appended
to indicate the truncation. (The actual output length is then
+ 4.) The default value for
is $::DESC_LEN. If is
set to zero, then no truncation occurs.
-expect Specify the maximum allowed value for the expect
value. HSPs with larger expect values are discarded. For example,
'-expect 1E-5' would discard all HSP with an expect value greater
than 0.00001. The default value of 'max_expect' is $::MAX_EXPECT.
-format Select output format. Default is '-format 1'.
'-format 1' - Create a table with descriptions and hit names.
May be used with '-html' to output the hit names as HTML links.
This is the same format used by the previous Blast2table and is
the default, which may be omitted. '-format 1' is useful for
finding the locations of matching genes in long query contigs.
Fields are written in a fixed column format, but long field values
are extended to avoid truncation. The fields are: Score,
E-value, Query Name, Starting Position of Match in Query, Ending
Position of Match in Query, Hit Name, Hit Description. '-format 1'
allows a short Hit Name and a very long Hit Description.
'-format 2' - Create a table with descriptions and hit names.
The hit name is modified to be a valid query key for the Genbank
nucleotide database and would normally be used with '-html' to
output the hit names as HTML links and to encapsulate the entire
file in HTML tags. This is the same format used by
Blast2table_blastn_html. '-format 2' is useful for finding the
locations of matching genes in long query contigs.
Fields are written in a fixed column format, but long field values
are extended to avoid truncation. The fields are: Score,
E-value, Query Name, Starting Position of Match in Query, Ending
Position of Match in Query, Hit Name, Hit Description. The
format is similar to '-format 1', but it allows a longer Hit Name
while preserving column formatting.
'-format 3' - Create a table with descriptions and hit names.
The hit name is modified to be a valid query key for the Genbank
protein database and would normally be used with '-html' to output
the hit names as HTML links and to encapsulate the entire file in
HTML tags. This is the same format used by Blast2table_blastx_html.
'-format 3' is useful for finding the locations of matching genes
in long query contigs.
Fields are written in a fixed column format, but long field values
are extended to avoid truncation. The fields are: Score,
E-value, Query Name, Starting Position of Match in Query, Ending
Position of Match in Query, Hit Name, Hit Description. The format
is very similar to '-format 2'.
'-format 4' - Create a table of hits in a format used to include in
sequenced EST data submitted to Genbank. This is the format
produced by Blast2table_est.
Fields are written in a fixed column format: The fields are: Hit
Name, Hit Description, Frame, Score, E-value, and a '1'.
'-format 5' - Produces exbldb output, similar to \"MSPcrunch -d\",
also like Blast2table_exbldb. Percent identical bases is reported
with two digits after the decimal point to match the format of
MSPcrunch.
Fields are printed in a fixed column format: The fields are:
Score, %-Identical, Starting Position of Match in Query, Ending
Position of Match in Query, Query Name, Starting Position of
Match in Hit, Ending Position of Match in Hit, Hit Name, Hit
Description.
'-format 6' - Create a table with longer descriptions and hit
names. This format includes positions of matching sequences from
both the query and each HSP, indicating the direction of each
match. This is the format produced by Blast2table_jdw and is
useful for matching overlapping sequences for gap closure.
Fields are printed in a fixed column format: The fields are:
Score, E-Value, Query Name (up to 30 chars), Starting Position
of Match in Query, Ending Position of Match in Query, Hit Name
(up to 30 chars), Starting Position of Match in Hit, Ending
Position of Match in Hit, Hit Description (up to 90 chars).
'-format 7' - Create a tab-delimited table with full-length
descriptions and hit names. Uses tabs as field separators,
instead of using spaces to separate into fixed-length fields.
This format is also useful for matching overlapping sequences for
gap closure.
Fields are printed in a tab-delimited format: The fields are:
Score, E-Value, Percentage of Identical Bases in Match, Percentage
of Similar Bases in Match, Query Name, Starting Position of Match
in Query, Ending Position of Match in Query, Hit Name, Starting
Position of Match in Hit, Ending Position of Match in Hit, Hit
Description.
'-format 8' - Create a tab-delimited table with full-length
descriptions and hit names. Uses tabs as field separators,
instead of using spaces to separate into fixed-length fields.
This format is also useful for matching overlapping sequences for
gap closure.
Fields are printed in a tab-delimited format: The fields are:
Score, E-Value, Length of Match, Percentage of Identical Bases in
Match, Percentage of Similar Bases in Match, Query Name, Total
Length of Query Sequence, Starting Position of Match in Query,
Ending Position of Match in Query, Hit Name, Total Length of Hit
Sequence, Starting Position of Match in Hit, Ending Position of
Match in Hit, Hit Description.
-help Print a help message.
-html Output hit name as an HTML tag. May be used with
'-format 1', '-format 2', '-format 3', or '-format 4'.
-long Output full long version of query name. If -long is omitted,
then only the first word of the query name (the accession) is used.
-make_match If there is no match for a query, then make up a dummy
HSP to record the fact that no match was made. The dummy HSPs will
have a Score value of 0 and an E-value of 99.
-numhsps Specify the maximum number of hsps
returned for each query string. Note that there may be multiple
HSPs per hit, so fewer than different hits may
be returned for a query, unless '-tophsp' was specified. If
is zero, then all hsps are returned. If
-numhsps is not specified, then the default value is $::NUM_HSPS.
-score Specify the minimum allowed value for the score
value. HSPs with smaller score values are discarded. For example,
'-score 100' would discard all HSP with an score value smaller
than 100. Note that if '-bits' was specified, then 'min_score'
refers to the minimum allowed bits value, not the score. The
default value of 'min_score' is $::MIN_SCORE.
-sort Specify the type of sorting of output HSPs. If '-sort'
is omitted, then no sorting is performed. The output lines are
in the original order from Blast -- query by query, decreasing
score value for each hit within a query, then decreasing score
value for each HSP within each hit that has more than one HSP. If
'-numhsps max_hsps_per_query' is specifed, then extra HSPs are
read before sorting the results of each query. After sorting
the HSPs for each query, then only the top 'max_hsps_per_query'
HSPs for each query are returned. The output of all queries is
sorted together before the HSPs are output.
'-sort expect' - Sort the output HSPs by increasing value of
'expect'. If multiple queries are processed, HSPs from all
queries are sorted together.
'-sort pos' - Sort the output HSPs by the query name and then
by the location of the match in the query string from beginning
to end. If multiple queries are processed, the HSPs from each
query are grouped together and sorted by location within the
query.
'-sort score' - Sort the output HSPs by decreasing value of
'score'. If multiple queries are processed, HSPs from all
queries are sorted together.
-timelogic Input file(s) are in TimeLogic tab-delimited format,
instead of NCBI or WU Blast output format.
The following TimeLogic fields should be written into
the file(s): status, domainsignificance, domainscore,
querystart, queryend, queryaccession, querytext,
queryframe, targetstart, targetend, targetlocus,
targetframe, targetdescription, targetaccession,
percentalignment, and simpercentalignment.
-tophsp Use only the first HSP for each hit. Ignore the rest.
-verbose Verbose mode - Print progress messages to STDERR.
DATE LAST MODIFIED: $::Date_Last_Modified
HELP
exit 1;
} # end display_more_help
##################################################################
# _format_hash(\%hash) - returns formatted string for printing
# contents of a hash
##################################################################
sub _format_hash
{
my($hashref, $string, $key);
if (@_ == 1)
{
$hashref = shift;
}
else
{
%$hashref = @_;
}
if (! defined $hashref)
{
$string = "";
}
elsif (ref $hashref)# eq 'ARRAY')
{
$string = "{";
foreach $key (sort keys %$hashref)
{
$string .= ", " if $string ne "{";
$string .= "$key=";
$string .= (defined $$hashref{$key}) ? "'" . $$hashref{$key} . "'" :
"";
$string .= "=" . _format_hash($$hashref{$key}) if $string =~ /HASH\(0x/;
}
$string .= "}";
}
else
{
$string = $hashref;
}
return $string;
} # end _format_hash