#!/usr/bin/perl -w # Parsing BLAST reports with BioPerl's Bio::SearchIO module; # Recovers all HSPs; # See below for filtering options; use warnings; use Bio::SearchIO; # Prompt the user for the file name if it's not an argument; # NOTE: BLAST file must be in text (not html) format; if (! $ARGV[0]) { $usage = "./blast_parsing_all.pl BLASTREPORT"; print "Usage is '$usage' or.....\n"; print "What is the BLAST file to parse? "; # Get input and remove the newline character at the end; chomp ($inFile = ); } else { $inFile = $ARGV[0]; } $report = new Bio::SearchIO( -file => "$inFile", -format => "blast" ); # Prints the header for the columns; print "Query_Name\tQuery_Acc\tQuery_Len\tHit_Acc\tHit_Desc\tHSP_rank\t\%_ID\te_Value\tHSP_length\n"; # Go through BLAST reports while( $result = $report->next_result ) { # Go through each matching sequence while( $hit = $result->next_hit ) { # Go through each each HSP for the sequence while ( $hsp = $hit->next_hsp ) { # The following "if" statements are used to filter the hits for certain criteria; # Remove comments from them and corresponding "{}" to call into effect; # Also remember to adjust values to those that you are interested in; # if ( my $hsp->length('total') > 100 ) # { # if ( my $hsp->percent_identity >= 75 ) # { # if ( my $hsp->evalue >= 0 ) # { # Print tab-delimited data about this HSP; print $result->query_name, "\t"; print $result->query_accession, "\t"; print $result->query_length, "\t"; print $hit->accession, "\t"; print $hit->description, "\t"; print $hsp->rank, "\t"; print $hsp->percent_identity, "\t"; print $hsp->evalue, "\t"; print $hsp->hsp_length, "\n"; # } # } # } } } }