#!/usr/bin/perl -w
#blastn2qrnadepth.pl

use strict;

use vars qw ($opt_c $opt_d $opt_e $opt_g $opt_h $opt_i $opt_j $opt_l $opt_o $opt_q $opt_r $opt_s $opt_t $opt_v $opt_w $opt_x);
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';
#use constant GNUPLOT => '/usr/local/bin/gnuplot';  

getopts ('cd:e:g:hi:j:l:o:qrs:tvw:x:');      

if (!@ARGV) { 
    print "usage: blastn2qrnadepth.pl [options] <blastfile>\n";
    print "options:\n";
    print "-c             :  DONOT resolve conflicts of dept  [ default: do by truncating alignments  ]\n";
    print "-d <depth>     :  max number of alignment coverage per position  [ default depth = 1       ]\n";
    print "-e <max_eval>  :  maximum evalue   of blast hits allowed         [ default max_eval = 0.01 ] \n";
    print "-g <org_name>  :  name of the blasting organism                  [ default 'org'           ]\n";
    print "-h             :  make histogram of depth per position           [ ]\n";
    print "-i <min_id>    :  minimum identity of blast hits allowed         [ default min_id = 0      ]\n";
    print "-j <max_id>    :  maximum identity of blast hits allowed         [ default max_id = 100    ]\n";
    print "-l <min_len>   :  minimum length   of blast hits allowed         [ default min_len = 1     ]\n";
    print "-o <outfile>   :  output qfile                                   [ default = 'blastfile.q' ]\n";
    print "-r             :  calculate depth respect to the database instead of the query \n";
    print "-s <shift>     :  position shift when calculating depth          [ default shift = 1       ] \n";
    print "-t             :  keep the intermediate files, for debugging     [ default removes all intermediate files ] \n";
    print "-v             :  verbose, for debugging purposes \n";
    print "-w <which>     :  criteria to pick alignments                    [ default which = 'SC'    ] \n";
    print "                                    ID -- best \% identity\n";
    print "                                    SC -- best score\n";
    print "-x <name>      : ignore given name, use this one for gff outputs\n\n";
    print "-q             : gff file write the database entrie instead of the query\n\n";
    exit;
}
my $filename  = shift;
my $dir;
my $file;

if ($filename =~ /^(\S+)\/([^\/]+)$/) {
    $dir  = $1."/";
    $file = $2;
}
else {
    $dir  = "";
    $file = $filename;}
print "$dir\n $file\n";

my $seeplots = 0;
#
#options
my $depth;
if ($opt_d) { $depth = $opt_d; }
else        { $depth = 1;      }

my $max_eval;
if ($opt_e) { $max_eval = $opt_e; }
else        { $max_eval = 0.01;   }

my $org;
if ($opt_g) { $org = $opt_g; }
else        { $org = "org";  }
my $min_id;
if ($opt_i) { $min_id = $opt_i; }
else        { $min_id = 0;      }

my $max_id;
if ($opt_j) { $max_id = $opt_j; }
else        { $max_id = 100;    }

my $min_len;
if ($opt_l) { $min_len = $opt_l; }
else        { $min_len = 1;      }

my $outfile;
if ($opt_o) { $outfile = $opt_o;    }
else        { $outfile = "$file.E$max_eval.D$depth.q"; }

my $reverse = $opt_r;

my $shift;
if ($opt_s) { $shift = $opt_s; }
else        { $shift = 1;      }

my $verbose = $opt_v;

my $which;
if ($opt_w) { $which = $opt_w; }
else        { $which = "SC";   }
if ($which =~ /^SC$/ || $which =~ /^ID$/) { ;}
else { print "wrong option: 'SC' or 'ID'"; die; }

my $usename;
if ($opt_x) { $usename = $opt_x; }

my $filea   = "$file.a";      # here I dump the results of using len, evalue and identity cutoffs
my $fileb   = "$file.b";      # here I order the aligments for any query according to %id or score
my $filec   = "$file.c";      # final list of alignments in pseudoblastn format
my $fileq   = "$outfile";     # qrna-ready file
my $gfffile = "$outfile.gff";
my $report  = "$outfile.rep";
my $filerev = "$file.rev";    # reverse the blast output from query to sbjct

my $max_len = 300000000;

#init for depth histograms
my $max_depth = 5000;
my $min_depth = 0;
my $Nd = $max_depth;
my $kd = 1;
my $depthhistobef = $file."_histo_depth_bef";
my $depthhistoaft = $file."_histo_depth_aft";
my $depthhistofinal = $file."_histo_depth_final";
my @his_depth_before;
my @his_depth_after;
my @his_depth_final;
init_histo_array ($Nd, $kd, \@his_depth_before);
init_histo_array ($Nd, $kd, \@his_depth_after);
init_histo_array ($Nd, $kd, \@his_depth_final);

my $total_pos = 0;
my $conserved_pos = 0;
my %len_query;
my $n_query = 0;
my $n_ali_total = 0;
my $n_ali_one = 0;
my $n_ali_two = 0;
my $n_ali_two_unchanged = 0;
my $n_ali_two_fragments = 0;

my %n_ali_per_query_total;
my %n_ali_per_query_one;
my %n_ali_per_query_two;
my %n_ali_per_query_two_unchanged;
my %n_ali_per_query_two_fragments;
my %worse_evalue;

my $ave_len_ali_total = 0;
my $ave_len_ali_one   = 0;
my $ave_len_ali_two   = 0;

my $blastfile = "$dir$file"."_copy";
    
system ("cp $dir$file $blastfile\n");


if ($reverse) { print " Rervese blast\n"; reverse_blast ("$blastfile", "$filerev"); system ("mv $filerev $blastfile\n"); }

prune_blastn  ("$blastfile", "$filea", \%len_query, \$total_pos); 

if ($n_ali_one > 0) {

    order_blastn  ("$filea", "$fileb");  
    if (!$opt_t) { system ("rm $filea\n"); }

    depile_blastn ($shift, \%len_query, \$total_pos, \$conserved_pos, $Nd, $kd, 
		   "$fileb", "$depthhistobef", \@his_depth_before, 
		   "$filec", "$depthhistoaft", \@his_depth_after, "$depthhistofinal", \@his_depth_final);  
    if (!$opt_t) { system ("rm $fileb\n"); }

    blastn_2_qrna ("$filec", "$fileq");  
    if (!$opt_t) { system ("rm $filec\n"); }
}
else { 
    foreach my $r (keys(%n_ali_per_query_total)) { 
	$n_ali_per_query_two{$r} = 0;
	$n_ali_per_query_two_unchanged{$r} = 0;
	$n_ali_per_query_two_fragments{$r} = 0;
    }
    if (!$opt_t) { system ("rm $filea\n"); }
    system ("touch $fileq\n");
}

my $total_nali;
qrnafile_validate($fileq, \$total_nali);

write_report  ("$report", $total_nali);

system("rm $blastfile\n");


##
## SUBROUTINES
##

# blastn_2_qrna ()
#
#
sub blastn_2_qrna {

    my ($file, $fileq) = @_;

    my $nseq1 = 0;
    my $nseq2 = 0;
    
    my $line1 = 0;
    my $line2 = 0;
    
    my $name1;
    my $name2;

    my $annotation1;
    my $annotation2;

    my $more;
    my $info;

    my $start1;
    my $start2;

    my $end1;
    my $end2;

    my $coor1;
    my $coor2;

    my @more_parts;

    my $seq = 0;

    my @seq1;
    my @seq2;
    
    my @numl1;
    my @numl2;

    my @numr1;
    my @numr2;

    my $strand1;
    my $strand2;

    my $pmstrand1;
    my $pmstrand2;


    my $abs;

    open (GFF, ">$gfffile") || die;
    open (OUT, ">$fileq") || die;

    open (SBLAST, "$file") || die;

    while (<SBLAST>) {
	if    (/^>(.+)/ && $seq == 0) { 
	    my $key = $1;

	    if ($nseq1 > 0) {
		if ($numl1[0] < $numr1[$line1-1]) { $start1 = $numl1[0];        $end1 = $numr1[$line1-1]; $strand1 = ">"; $pmstrand1 = "+"; }
		else                              { $start1 = $numr1[$line1-1]; $end1 = $numl1[0];        $strand1 = "<"; $pmstrand1 = "-"; }
		
		$coor1 = $start1.$strand1.$end1;
		
		print OUT ">", $name1, "-", $coor1, "-", $annotation1, "\n";
		for (my $i = 0; $i < $line1; $i++) {
		    print OUT $seq1[$i], "\n"; }

		$abs = 0;

		if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
		    $name1 = $1; 
		    my $rest = $2;
		    $name1 =~ s/\\//g;

		    if ($rest =~ /(\S+)\-\S+/) {
			$rest = $1;
			$rest =~ s/\///g; $rest =~ s/\\//g; 
			$abs = $rest-1;
		    
		    }
		}
		$name1 =~ s/\\//g;
		if ($name1 =~ /(\S+)[\/\-\:](\d+)\-(\d+)/) { 
		    $name1 = $1;
		    if ($2 < $3) { $abs += $2-1; }
		    else         { $abs += $3-1; }
		}
		$start1 += $abs;
		$end1   += $abs;

		if (defined($opt_x)) {
		    print GFF "$usename\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n"; 
		}
		else                 { 
		    print GFF "$name1\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n";   
		}


	    }
	    $line1 = 0;

	    $key =~ /^\d+>(.+)$/; $name1 = $1;
	    $name1 = quotemeta $name1;

	    $annotation1 = "";  
	    if ($name1 =~ /^(\S+)\s+(\S+)/) { $name1 = $1; $annotation1 = $2; }
	    
	    $seq = 1;
	    $nseq1++;
	}
	
	elsif (/^>(.+)/ && $seq == 1) {
	    if ($nseq2 > 0) {
		if ($numl2[0] < $numr2[$line2-1]) { $start2 = $numl2[0];        $end2 = $numr2[$line2-1]; $strand2 = ">"; $pmstrand2 = "+"; }
		else                              { $start2 = $numr2[$line2-1]; $end2 = $numl2[0];        $strand2 = "<"; $pmstrand2 = "-"; }
		
		$coor2 = $start2.$strand2.$end2;

		print OUT ">", $name2, "-", $coor2, "-", $annotation2, "\n";
		for (my $i = 0; $i < $line2; $i++) {
		    print OUT $seq2[$i], "\n"; }

		if ($opt_q) { 
		    print GFF "$name2\tBLASTN\tsimilarity\t$start2\t$end2\t.\t$pmstrand2\t.\n";   
		}
	    }
	    $line2 = 0;
	    $name2 = quotemeta $1;
	    
	    $annotation2 = ""; 
	    if ($name2 =~ /^(\S+)\s+(\S+)/) { $name2 = $1; $annotation2 = $2; }
	    
	    $seq = 0;
	    $nseq2++;
	}
	elsif (/^Query:\s+(\S+)\s+(\S+)\s(\S+)/) { 
	    $numl1[$line1] = $1; $seq1[$line1] = $2; $numr1[$line1] = $3; $line1++; }
	elsif (/^Sbjct:\s+(\S+)\s+(\S+)\s(\S+)/) { 
	    $numl2[$line2] = $1; $seq2[$line2] = $2; $numr2[$line2] = $3; $line2++; }
	else { next; }
    }
    
    close (SBLAST);
    
    if ($numl1[0] < $numr1[$line1-1]) { $start1 = $numl1[0];        $end1 = $numr1[$line1-1]; $strand1 = ">"; $pmstrand1 = "+"; }
    else                              { $start1 = $numr1[$line1-1]; $end1 = $numl1[0];        $strand1 = "<"; $pmstrand1 = "-"; }
    
    $coor1 = $start1.$strand1.$end1;

    print OUT ">", $name1, "-", $coor1, "-", $annotation1, "\n";
    for (my $i = 0; $i < $line1; $i++) {
	print OUT $seq1[$i], "\n"; }
    
    $abs = 0;
    if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
	$name1 = $1; 
	my $rest = $2;
	$name1 =~ s/\\//g;
	
	if ($rest =~ /(\S+)\-\S+/) {
	    $rest = $1;
	    $rest =~ s/\///g; $rest =~ s/\\//g; 
	    $abs = $rest-1;
	    
	}
    }
    $name1 =~ s/\\//g;
    if ($name1 =~ /(\S+)[\/\-\:](\d+)\-(\d+)/) { 
	$name1 = $1;
	if ($2 < $3) { $abs += $2-1; }
	else         { $abs += $3-1; }
    }
    $start1 += $abs;
    $end1   += $abs;

    if (defined($opt_x)) { print GFF "$usename\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n"; }
    else                 { print GFF "$name1\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n";   }
    
    if ($numl2[0] < $numr2[$line2-1]) { $start2 = $numl2[0];        $end2 = $numr2[$line2-1]; $strand2 = ">"; $pmstrand2 = "+"; }
    else                              { $start2 = $numr2[$line2-1]; $end2 = $numl2[0];        $strand2 = "<"; $pmstrand2 = "-"; }

    $coor2 = $start2.$strand2.$end2;
    print OUT ">", $name2, "-", $coor2, "-", $annotation2, "\n";
    for (my $i = 0; $i < $line2; $i++) {
	print OUT $seq2[$i], "\n"; }

    if ($opt_q) { 
	print GFF "$name2\tBLASTN\tsimilarity\t$start2\t$end2\t.\t$pmstrand2\t.\n";   
    }

    $nseq1++;
    $nseq2++;

    close (OUT);
    close (GFF);   

}

# depile_blastn ()
#
#
sub depile_blastn {
    
    my ($shift, $len_query_ref, $total_pos_ref, $conserved_pos_ref, 
	$N, $k, $fileb, $hisfileb, $hisb_ref, $filec, $hisfilec, $hisc_ref, $hisfinal, $his_final_ref) = @_;
    
    my %bit;

    my $verbose = 0;

    my $conserved_pos = 0;
    my $total_pos_afterprunning;

    discard_alignments ("$fileb", $len_query_ref, $total_pos_ref, \%bit,  $N, $k, $hisfileb, $hisb_ref, $shift, \$conserved_pos);
    
    print_selected_alignments ("$fileb", "$filec", $len_query_ref, $total_pos_ref, \$total_pos_afterprunning, \$conserved_pos, \%bit, 
			       $N, $k, $hisfilec, $hisc_ref);
    
    if ($opt_h) {
	my $max;
	final_histogram ("$filec", $len_query_ref, \$total_pos_afterprunning, $conserved_pos, $N, $k, $hisfinal, $his_final_ref, \$max);

	if (!$opt_c) { if ($max > $depth) { print "you did not do it righ: depth = $depth, you have $max\n"; die; } }

    }
    
    $$conserved_pos_ref = $conserved_pos;
    
    if ($verbose) {
	print "Total positions:     $$total_pos_ref\n";
	print "conserved positions: $$conserved_pos_ref\n";
    }
}


sub discard_alignments {

    my ($fileb, $len_query_ref, $total_pos_ref, $bit_ref, $N, $k, $hisfileb, $hisb_ref, $shift, $conserved_pos_ref) = @_;

    my $keyquery;
    my $keyquery_new;

    my $numl;
    my $numr;

    my $start;
    my $end;

    my @howmany;
    my @which;

    my @array;
    my $pile;

    my $count_query = 0;
    my $count_ali_per_query = 0;

    my $queryplot;

    my $num;
    my $pos;

    my $x;

    my $conserved_pos = 0;

    open (SBLAST, "$fileb") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+(.+)/ || ($reverse && /^\s*Sbjct=\s+(.+)/) ) 
	{ 
	    $keyquery_new = $1; 
	    if ($count_query > 0) {

		$bit_ref->{$keyquery} = "";
		for (my $n = 0; $n < $n_ali_per_query_one{$keyquery}; $n++) { $bit_ref->{$keyquery} .= "0"; }

		if ($verbose) { 
		    $keyquery =~ /^(\d+)>(\S+)/;
		    $queryplot = "$fileb.$1_$2.plot"; 
		    system ("touch $queryplot\n"); 
		    open (PLOT, ">$queryplot") || die; 
		}

		$x = 0;
		while ($x < $len_query_ref->{$keyquery}) { 
		    if ($howmany[$x] > 0) { $conserved_pos ++; }
		    if ($opt_h) { fill_histo_array ($howmany[$x], $N, $k, $hisb_ref); } 
		    $x ++;
		}

		$pos = 0;
		while ($pos < $len_query_ref->{$keyquery}) {
			
		    $pile = ($howmany[$pos] > $depth)? $depth : $howmany[$pos];

		    if ($verbose) { print PLOT "$pos $howmany[$pos] $pile\n"; }

		    my $string;
		    if ($which[$pos] =~ /^\-(.+\-)/) { $string = $1;           }
		    else                             { $string = $which[$pos]; } 

		    @array = split(/-/, $string);
		    
		    #paranoia
		    if (($#array+1) != $howmany[$pos]) { 
			print "how many alignments at position $pos? ", $#array+1, " or $howmany[$pos]?\n"; die; 
		    }
		    
		    for (my $num = 0; $num < $pile; $num++) {
			substr($bit_ref->{$keyquery}, $array[$num], 1) = "1";
		    }

		    $pos += $shift;
		}

		if ($verbose) { close (PLOT); plot ("$queryplot"); }
 	    }

	    $count_query ++; 
	    $count_ali_per_query = 0;
	    $keyquery = $keyquery_new;

	    for (my $l = 0; $l < $len_query_ref->{$keyquery}; $l++) { $howmany[$l] = 0; $which[$l] = "-";}
	    
	}    
	elsif (/^\s+Identities/)       
	{ 
	    $count_ali_per_query ++;
	}
	elsif (/^Query:\s+(\S+)\s+\S+\s+(\S+)/ || ($reverse && /^Sbjct:\s+(\S+)\s+\S+\s+(\S+)/) ) 
	{ 
	    
	    $numl = $1; 
	    $numr = $2;
	    
	    if ($numl < $numr) { $start = $numl-1; $end = $numr-1; }
	    else               { $start = $numr-1; $end = $numl-1; }
	    
	    if ($start < 0) { die "wrong blast limits: start= $start\n"; }

	    if ($end >= $len_query_ref->{$keyquery}) { die "wrong blast limits: end=$end len = $len_query_ref->{$keyquery}\n"; }	    
	    
	    for (my $l = $start; $l <= $end; $l++) {
		$howmany[$l] ++;
		$num = $count_ali_per_query - 1;
		$which[$l] .= $num."-";
	    }
	}
    }
    
    close (SBLAST);

    #last one again
    for (my $n = 0; $n < $n_ali_per_query_one{$keyquery}; $n++) { $bit_ref->{$keyquery} .= "0"; }

    if ($verbose) { 
	$keyquery =~ /^(\d+)>(\S+)$/;
	$queryplot = "$fileb.$1_$2.plot"; 
 	system ("touch $queryplot\n"); 
	open (PLOT, ">$queryplot") || die; 
    }

    $x = 0;
    while ($x < $len_query_ref->{$keyquery}) { 
	if ($howmany[$x] > 0) { $conserved_pos ++; }
	if ($opt_h) { fill_histo_array ($howmany[$x], $N, $k, $hisb_ref); } 
	$x ++;
    }


    $pos = 0;
    while ($pos < $len_query_ref->{$keyquery}) {


	$pile = ($howmany[$pos] > $depth)? $depth : $howmany[$pos];
	
	if ($verbose) { print PLOT "$pos $howmany[$pos] $pile\n"; } 
	

	my $string;
	if ($which[$pos] =~ /^\-(.+\-)/) { $string = $1;           }
	else                             { $string = $which[$pos]; } 
	
	@array = split(/-/, $string);
	
	#paranoia
	if (($#array+1) != $howmany[$pos]) { print "how many alignments at position $pos? ", $#array+1, " or $howmany[$pos]?\n"; die; }
	
	for (my $num = 0; $num < $pile; $num++) {
	    substr($bit_ref->{$keyquery}, $array[$num], 1) = "1";
	}
	
	$pos += $shift;
    }
 
    if ($verbose) { close (PLOT); plot ("$queryplot"); }
    
    $$conserved_pos_ref = $conserved_pos;
    
    #now print the histogram
    my $ylabel = "Percentage of positions";
    my $xlabel = "Number of alignments per position";
    my $max;
    if ($opt_h) {
	write_histo_file ("$hisfileb", $N, $k, $conserved_pos, "Depth Histogram ** Before prunning", 
			  $xlabel, $ylabel, $file, $hisb_ref, $min_depth, $max_depth);
    }
}


sub fill_histo_array {
    my ($len, $N, $k, $his_ref) = @_;
    
    my $dim = $N * $k;
    
    if ($len >=  $N) { $his_ref->[$dim] += 1; return; }
    
    for (my $i=0; $i<$dim; $i++) { 

	if ( $i/$k <= $len && $len < ($i+1)/$k) { 
	    #print "$len ", $i/$k, " ", ($i+1)/$k, " \n"; 
	    $his_ref->[$i] += 1; last; 
	} 
    }
    
}

sub final_histogram {
    my ($filec, $len_query_ref, $total_pos_afterprunning_ref, $conserved_pos, $N, $k, $hisfinal, $his_final_ref, $max_ref) = @_;

    my $verbose = 0;
    my $keyquery;
    my $keyquery_new;
    my $num;
    my $num_new;

    my $numl;
    my $numr;

    my $start;
    my $end;

    my $query;
    my $sbjct;

    my $count = 0;
    my $count_query = 0;
    my $final_depth_conserved_pos = 0;
    
    my $total_number_pos = 0;

    my @howmany;

    my $x;
    my $seq = 0;

    my $isfrag = 0;
    open (SBLAST, "$filec") || die;
    while (<SBLAST>) {
	if    (/^>(.+)/ && $seq == 0) 
	{ 
	    $keyquery_new = $1; 
	    if ($keyquery_new =~ /^(\d+)\>/) { $num_new = $1; }
	    
	    if ($count > 0) {
		if ($num_new =~ /^$num$/) {	 }
		else {
		    
		    $x = 0;
		    my $per_query = 0;
		    $total_number_pos += $len_query_ref->{$keyquery};
		    while ($x < $len_query_ref->{$keyquery}) { 

			if ($howmany[$x] > $depth) { 
			    print "bad truncation at pos $x [$howmany[$x]] of $count_query> ($count) $keyquery $len_query_ref->{$keyquery} $sbjct\n";
			    die;
			}

			if ($howmany[$x] > 0) { $final_depth_conserved_pos ++; $per_query ++;}
			fill_histo_array ($howmany[$x], $N, $k, $his_final_ref); 
			$x ++;
		    }
		    if ($verbose) {
			print "$count_query> ($count) $keyquery $len_query_ref->{$keyquery} $total_number_pos $per_query $final_depth_conserved_pos\n";
		    }
		    if ($count != $n_ali_per_query_two_unchanged{$keyquery} + $n_ali_per_query_two_fragments{$keyquery}) {
			print "bad counting of aligments at query $keyquery: here: $count there: 
			unchenged:$n_ali_per_query_two_unchanged{$keyquery} fragmented: $n_ali_per_query_two_fragments{$keyquery} \n";
			#die;
		    }

		    $count_query ++; 
		    $keyquery = $keyquery_new; 
		    $num      = $num_new; 
		    
		    $count = 0;
		    
		    undef(@howmany);
		    for (my $l = 0; $l < $len_query_ref->{$keyquery}; $l++) { $howmany[$l] = 0; }
		}
	    }
	    
	    $seq = 1;
	    if ($count == 0) { 
		$keyquery = $keyquery_new; 
		$num = $num_new;  
		undef(@howmany);
		for (my $l = 0; $l < $len_query_ref->{$keyquery}; $l++) { $howmany[$l] = 0; }
	    }

	}
	elsif (/^>(.+)/ && $seq == 1)       
	{ 
	    $sbjct = $1;
	    $count ++;

	    $isfrag = 0;
	    $seq = 0;

	}
	if (/^\#depth_fragment/) {
	    $isfrag = 1;
	}
	elsif ((/^Query:\s+(\S+)\s+\S+\s+(\S+)/ || ($reverse && /^Sbjct:\s+(\S+)\s+\S+\s+(\S+)/)) ) 
	{ 
	    $numl = $1; 
	    $numr = $2;
	    
	    if ($numl < $numr) { $start = $numl-1; $end = $numr-1; }
	    else               { $start = $numr-1; $end = $numl-1; }
	    

	    if ($start < 0) { die "wrong blast limits: start= $start\n"; }

	    if ($end >= $len_query_ref->{$keyquery}) { die "wrong blast limits: end=$end len = $len_query_ref->{$keyquery}\n"; }	    
	    
	    for (my $l = $start; $l <= $end; $l++) {
		$howmany[$l] ++;
	    }
	}

    }
    
    close (SBLAST);

    #last case
    $x = 0;
    my $per_query = 0;
    $total_number_pos += $len_query_ref->{$keyquery};
    while ($x < $len_query_ref->{$keyquery}) { 
	if ($howmany[$x] > $depth) { 
	    print "bad truncation at pos $x/$len_query_ref->{$keyquery} [$howmany[$x]] of $count_query> ($count) $keyquery $len_query_ref->{$keyquery}  $sbjct\n";
	    die;
	}

	if ($howmany[$x] > 0) { $final_depth_conserved_pos ++; $per_query ++; }
	fill_histo_array ($howmany[$x], $N, $k, $his_final_ref); 
	$x ++;
    }
    if ($verbose) {
	print "$count_query> ($count) $keyquery $len_query_ref->{$keyquery} $total_number_pos $per_query $final_depth_conserved_pos\n";
    }
    close (OUT);

    if ($verbose) { print "N queries filec = $count_query\n"; }
    
    if ($$total_pos_afterprunning_ref != $total_number_pos) { 
	print "error total_number_pos_afterprunning = $$total_pos_afterprunning_ref  total_number_pos_here = $total_number_pos\n"; 
	die; 
    }
    if ($conserved_pos != $final_depth_conserved_pos) { 
	print "error conserved_pos = $conserved_pos final_depth_conserved_pos = $final_depth_conserved_pos\n"; 
	die; 
    }
    
    #now print the histogram
    my $ylabel = "Percentage of positions";
    my $xlabel = "Number of alignments per position";
    if ($opt_h) {
	write_histo_file ("$hisfinal", $N, $k, $conserved_pos, "Depth Histogram ** Final Histogram", 
			  $xlabel, $ylabel, $file, $his_final_ref, $min_depth, $max_depth, $max_ref);
    }
}

sub find_ali_pos {

    my ($seq, $ali_pos_ref, $len) = @_;

    my @seq =split(//, $seq);
    my $len_ali = $#seq + 1;
  
    my $pos = 0;
    for (my $x = 0; $x < $len_ali; $x ++) {

	$ali_pos_ref->[$pos] = $x;

	if ($seq[$x] =~ /^\-$/ || $seq[$x] =~ /^\.$/) { next;    }
	else                                          { $pos ++; }
    }

    #paranois
    if ($len != $pos) { print "bad find_ali_pos len = $len pos = $pos\n"; die; }
}



sub gnuplot_histo {

    my ($filehisto, $title, $xlabel, $ylabel, $key, $total, $norm) = @_;
      
    my $outplot = "$filehisto.ps";
    open(GP,'|'.GNUPLOT) || die "Gnuplot: $!";
    
    print GP "set terminal postscript  default color solid 14\n";
    print GP "set linestyle 1 lt 1 lw 4\n";
    print GP "set linestyle 2 lt 2 lw 4\n";
    print GP "set linestyle 3  lt 3 lw 4\n";
    print GP "set linestyle 4 lt 4 lw 4\n";
    print GP "set linestyle 5 lt 5 lw 4\n";
    print GP "set linestyle 6 lt 6 lw 4\n";
    print GP "set linestyle 7 lt 7 lw 4\n";
    print GP "set linestyle 8 lt 8 lw 4\n";
    print GP "set linestyle 9 lt 9 lw 4\n";
    print GP "set linestyle 11 lt 1 lw 2 pt 1 ps 1.2\n";
    print GP "set linestyle 22 lt 2 lw 2 pt 2 ps 1.2\n";
    print GP "set linestyle 33 lt 3 lw 2 pt 3 ps 1.2\n";
    print GP "set linestyle 44 lt 4 lw 2 pt 4 ps 1.2\n";
    print GP "set linestyle 55 lt 5 lw 2 pt 5 ps 1.2\n";
    print GP "set linestyle 66 lt 6 lw 2 pt 6 ps 1.2\n";
    print GP "set linestyle 77 lt 7 lw 2 pt 7 ps 1.2\n";
    print GP "set linestyle 88 lt 8 lw 2 pt 8 ps 1.2\n";
    print GP "set linestyle 99 lt 9 lw 2 pt 9 ps 1.2\n";

    print GP "set output '$outplot'\n";
    #print GP "set nokey\n";
    print GP "set ylabel '$ylabel'\n";
    print GP "set xlabel '$xlabel'\n";

    print GP "set title '$title [$norm # conserved positions]'\n"; 
    print GP "plot '$filehisto' using 1:6 title '$key' with histeps ls 3\n";

    print GP "set title '$title [$total # positions]'\n"; 
    print GP "plot '$filehisto' using 1:4 title '$key' with histeps ls 3\n";

    close (GP);
    
    if ($seeplots) { system ("gv -landscape -magstep -2 $outplot&\n"); }
}



sub init_histo_array {

    my ($N, $k, $his_ref) = @_;

    my $dim = $N * $k;
    
    for (my $i=0; $i<=$dim; $i++) { $his_ref->[$i] = 0;  }
    
}

sub order_alignments {
    my ($which_ref, $blast_ref, $lend_ref, $rend_ref, $score_ref, $order_ref) = @_;

    my $verbose = 0;

    my $string;
    if ($$which_ref =~ /^\-(.+\-)/) { $string = $1;          }
    else                            { $string = $$which_ref; } 
    
    my @conf = split(/\-/, $string);

    my $n = 0;
    my @p;
    foreach my $conf (@conf) {
	$p[$n] = $conf;
	$n ++;
    }
    
    my $exp;
    for (my $x = 0; $x < $n; $x ++) {
	for (my $y = $x+1; $y < $n; $y ++) {
	    if ($score_ref->{$p[$y]} > $score_ref->{$p[$x]}) {
		
		$exp   = $p[$x]; 
		$p[$x] = $p[$y];
		$p[$y] = $exp;
	    }	    
	}
    }
    
    for (my $x = 0; $x < $n; $x ++) { $order_ref->{$p[$x]} = $x; }
   
    @conf = sort{ $order_ref->{$a} <=> $order_ref->{$b} } @conf; 
    
    my $which = "-";
    foreach my $conf (@conf) {
	$which .= "$conf\-";
    }

    $$which_ref = $which;

    if ($verbose) {
	foreach my $conf (@conf) {
	    print "score $score_ref->{$conf} order $order_ref->{$conf}\n";
	}
    }
    
}


# order_blastn ()
#
#
sub order_blastn {

    my ($filea, $fileb) = @_;

    my $keyquery;
    my $keyquery_new;

    my $sbjct;

    my $count_qr= 0;

    my $n_ali= 0;
    my @ali;
    my @sc;

    my $sc;
    my $ali;

    open (OUT, ">$fileb") || die;

    open (SBLAST, "$filea") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+(.+)/ || ($reverse && /^\s*Sbjct=\s+(.+)/)) 
	{ 
	    $keyquery_new = $1; 

	    if ($count_qr > 0) {
		for (my $i = 0; $i <= $n_ali; $i++) {
		    for (my $j = $i+1; $j <= $n_ali; $j++) {
			if ($sc[$i] < $sc[$j]) {
			    $sc  = $sc[$i];
			    $ali = $ali[$i];

			    $sc[$i]  = $sc[$j];
			    $ali[$i] = $ali[$j];

			    $sc[$j]  = $sc;
			    $ali[$j] = $ali;

			}
		    }
		}

		print OUT "Query=  $keyquery\n";
		for (my $i = 0; $i <= $n_ali; $i++) {
		    print OUT $ali[$i] ;
		}
	    }
	    
	    $n_ali = -1;
	    undef @ali;
	    $keyquery = $keyquery_new;
	    $count_qr ++;
	}	
	elsif (/^>(.+)/) 
	{
	    $n_ali ++;
	    $ali[$n_ali] .= $_; 
	}
	elsif ($which =~ /^ID$/ && /^\s+Identities\s+=\s+\S+\/\S+\s\((\S+)%\)/) 
	{
     	    $sc[$n_ali] = $1; 
	    $ali[$n_ali] .= $_; 
	}
	elsif ($which =~ /^SC$/ && /^\s+Score\s+=\s+(\S+)/) 
	{
     	    $sc[$n_ali] = $1; 
	    $ali[$n_ali] .= $_; 
	}
	elsif ($n_ali > -1) { $ali[$n_ali] .= $_; }

    
    }
    
    #last case
    for (my $i = 0; $i <= $n_ali; $i++) {
	for (my $j = $i+1; $j <= $n_ali; $j++) {
	    if ($sc[$i] < $sc[$j]) {
		$sc  = $sc[$i];
		$ali = $ali[$i];
		
		$sc[$i]  = $sc[$j];
		$ali[$i] = $ali[$j];
		
		$sc[$j]  = $sc;
		$ali[$j] = $ali;
		
	    }
	}
    }

    if ($reverse) { print OUT "Sbjct=  $keyquery\n"; }
    else          { print OUT "Query=  $keyquery\n"; }

    for (my $i = 0; $i <= $n_ali; $i++) {
	print OUT $ali[$i] ;
    }
    
    close (SBLAST);
    close (OUT);
    
}

sub parse_alignment {

    my ($ali_num, $blast, $lend_ref, $rend_ref, $lend_sbjct_ref, $rend_sbjct_ref, $score_ref) = @_;

    my $numl_query;
    my $numl_sbjct;
    my $numr_query;
    my $numr_sbjct;

    my $start_query;
    my $start_sbjct;
    my $end_query;
    my $end_sbjct;

    my $line = 0;
    my $rev_query;
    my $rev_sbjct;

    my $pedantic = 0;

    my $blastfile = "$file.$ali_num";

    open (FILE, ">$blastfile") || die;
    print FILE $blast;
    close (FILE);

    open (FILE, "$blastfile") || die;
    while (<FILE>) {
	if (/^\s+Score\s+=\s+(\S+)/)       
	{ 
	    $score_ref->{$ali_num} = $1;
	}
	elsif (/^\s+Identities.+\s+Strand\s+=\s+(\S+)\s+\/\s+(\S+)/)       
	{ 
	    my $q = $1;
	    my $s = $2;

	    if ($q =~ /^Plus$/) { $rev_query = 0; }
	    else                { $rev_query = 1; }

	    if ($s =~ /^Plus$/) { $rev_sbjct = 0; }
	    else                { $rev_sbjct = 1; }

	}
	elsif (/^Query:\s+(\S+)\s+\S+\s+(\S+)/) 
	{ 
	    
	    $line ++;
	    $numl_query = $1; 
	    $numr_query = $2;
	    
	    if ($line == 1) {
		if    ($numl_query < $numr_query || ($rev_query == 0 && $numl_query == $numr_query) ) { $start_query = $numl_query; $end_query   = $numr_query; } 
		elsif ($numl_query > $numr_query || ($rev_query == 1 && $numl_query == $numr_query) ) { $end_query   = $numl_query; $start_query = $numr_query; }
	    }
	    else { 
		if    ($numl_query < $numr_query || ($rev_query == 0 && $numl_query == $numr_query) ) { $end_query   = $numr_query; } 
	       	elsif ($numl_query > $numr_query || ($rev_query == 1 && $numl_query == $numr_query) ) { $start_query = $numr_query; }
	    }

	    if ($pedantic) {
		if    ($numl_query < $numr_query || ($rev_query == 0 && $numl_query == $numr_query) ) { print $numl_query, " ", $numr_query, "\n"; }
	      	elsif ($numl_query > $numr_query || ($rev_query == 1 && $numl_query == $numr_query) ) { print $numr_query, " ", $numl_query, "\n"; }
	    }

	    if ($start_query < 0) { die "wrong blast limits: start_query = $start_query\n"; }
	    	
	}
 	elsif (/^Sbjct:\s+(\S+)\s+\S+\s+(\S+)/) 
	{ 
	    
	    $numl_sbjct = $1; 
	    $numr_sbjct = $2;
	    
	    if ($line == 1) {
		if    ($numl_sbjct < $numr_sbjct || ($rev_sbjct == 0 && $numl_sbjct == $numr_sbjct) ) { $start_sbjct = $numl_sbjct; $end_sbjct   = $numr_sbjct; } 
		elsif ($numl_sbjct > $numr_sbjct || ($rev_sbjct == 1 && $numl_sbjct == $numr_sbjct) ) { $end_sbjct   = $numl_sbjct; $start_sbjct = $numr_sbjct; }
	    }
	    else { 
		if    ($numl_sbjct < $numr_sbjct || ($rev_sbjct == 0 && $numl_sbjct == $numr_sbjct) ) { $end_sbjct   = $numr_sbjct; } 
	       	elsif ($numl_sbjct > $numr_sbjct || ($rev_sbjct == 1 && $numl_sbjct == $numr_sbjct) ) { $start_sbjct = $numr_sbjct; }

	    }

	    if ($pedantic) {
		if    ($numl_sbjct < $numr_sbjct || ($rev_sbjct == 0 && $numl_sbjct == $numr_sbjct) ) { print $numl_sbjct, " ", $numr_sbjct, "\n"; }
	      	elsif ($numl_sbjct > $numr_sbjct || ($rev_sbjct == 1 && $numl_sbjct == $numr_sbjct) ) { print $numr_sbjct, " ", $numl_sbjct, "\n"; }
	    }

	    if ($start_sbjct < 0) { die "wrong blast limits: start_sbjct = $start_sbjct\n"; }
	    	
	}
    }
    
    close (FILE);

    $lend_ref->{$ali_num} = $start_query;
    $rend_ref->{$ali_num} = $end_query;

    $lend_sbjct_ref->{$ali_num} = $start_sbjct;
    $rend_sbjct_ref->{$ali_num} = $end_sbjct;

    if ($pedantic) {
	print "Aligment:$ali_num \n";
	print "SCORE $score_ref->{$ali_num}\n";
	print "QUERY ENDS $lend_ref->{$ali_num} $rend_ref->{$ali_num}\n";
	print "SBJCT ENDS $lend_sbjct_ref->{$ali_num} $rend_sbjct_ref->{$ali_num}\n";
    }

    #paranoia
    if ($rend_ref->{$ali_num}       < $lend_ref->{$ali_num} ||
	$rend_sbjct_ref->{$ali_num} < $lend_sbjct_ref->{$ali_num}) {
	print "bad parsing of the alignment\n";
	print "Aligment:$ali_num \n";
	print "SCORE $score_ref->{$ali_num}\n";
	print "query ENDS $lend_ref->{$ali_num} $rend_ref->{$ali_num} [rev $rev_query]\n";
	print "sbjct ENDS $lend_sbjct_ref->{$ali_num} $rend_sbjct_ref->{$ali_num} [rev $rev_sbjct]\n";
	system("more $blastfile\n");
	die;
    }

    system("rm $blastfile\n");
}


# plot ()
#
#
sub plot {

    my ($plotfile) = @_;

    open(GP,'|'.GNUPLOT) || die "Gnuplot: $!";
    
    print GP "set terminal postscript color\n";
    print GP "set output '$plotfile.ps'\n";
    
    
    print GP "set title 'Density of BLASTN alignments'\n"; 
    print GP "set xlabel 'Position'\n";
    print GP "set ylabel 'Number of blastn hits'\n";
    
    print GP "plot '$plotfile' using 1:2 with lines,  '$plotfile' using 1:3 with lines\n";
    close GP;
    
    system ("ghostview -landscape -magstep -2 $plotfile.ps&\n");
}


sub print_selected_alignments {
    
    my ($fileb, $filec, $len_query_ref, $total_pos_ref, $total_pos_afterprunning_ref, $conserved_pos_ref, $bit_ref, $N, $k, $hisfilec, $hisc_ref) = @_;
    
    my $verbose = 0;
    my $keyquery;
    my $keyquery_new;

    my $numl;
    my $numr;

    my $start;
    my $end;

    my @howmany;
    my @which;
    my @blast;
    
    my $count_ali_per_query = 0;

    my $flag = 0;
    my $count = 0;
    my $count_two = 0;
    my $count_query = 0;

    my @qbit;
    my $query;
    my $sbjct;

    my $covered;
    my $uncovered;
    my $total;

    my $x;
    my $num;

    my $total_pos_afterprunning = 0;

    my $depth_conserved_pos = 0;

    my $n_conflicts = 0;
    my $n_conflicts_total = 0;
    my @conflicts;

    #print to the filec file
    #
    open (OUT, ">$filec") || die;
 
    open (SBLAST, "$fileb") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+(.+)/ || ($reverse && /^\s*Sbjct=\s+(.+)/)) 
	{ 
	    $keyquery_new = $1; 
	    
	    $query = $keyquery_new;

	    if ($count_query > 0) {
		$n_ali_per_query_two_unchanged{$keyquery} = $n_ali_per_query_two{$keyquery};
		$n_ali_per_query_two_fragments{$keyquery} = 0;

		if (!$opt_c) { 
		    resolve_conflicts_per_query ($keyquery, $len_query_ref, \@which, \@howmany, \@blast, \$n_conflicts, \$n_conflicts_total, \@conflicts);
		}
		for (my $c = 1; $c <= $count; $c ++)  { if ($blast[$c-1]) { print OUT $blast[$c-1]; }}
		undef @blast;

		if ($count > 0) {
		    $x = 0;
		    my $per_query = 0;
		    $total_pos_afterprunning += $len_query_ref->{$keyquery};

		    while ($x < $len_query_ref->{$keyquery}) { 
			if ($howmany[$x] > 0) { $depth_conserved_pos ++; $per_query ++; }
			if ($opt_h) { fill_histo_array ($howmany[$x], $N, $k, $hisc_ref); }
			$x ++;
		    }
		    $n_ali_two_unchanged += $n_ali_per_query_two_unchanged{$keyquery};
		    $n_ali_two_fragments += $n_ali_per_query_two_fragments{$keyquery};
		    if ($verbose) { 
			print "++$count_query> ($count) ($count_two) ($n_ali_two) $keyquery $len_query_ref->{$keyquery} $total_pos_afterprunning $per_query $depth_conserved_pos\n";
		    }

		}
	    }
	    
	    $count_query ++; 
	    $count_ali_per_query = 0;
	    $keyquery = $keyquery_new;

	    if ($bit_ref->{$keyquery}) {
		
		$covered   = ($bit_ref->{$keyquery} =~ s/1/1/g);
		$uncovered = ($bit_ref->{$keyquery} =~ s/0/0/g);
		$total = $covered + $uncovered;  
		
		$n_ali_per_query_two{$keyquery}  = $covered;
		$n_ali_two                      += $covered;

		@qbit = split(//,$bit_ref->{$keyquery});
		
		#paranoia
		if ($total   != $n_ali_per_query_one{$keyquery}) { 
		    print "how many at first at $keyquery? $total or $n_ali_per_query_one{$keyquery}?\n";  
		    die;
		}
		if ($covered != $n_ali_per_query_two{$keyquery}) { 
		    print "how many at last at $keyquery? $covered or $n_ali_per_query_two{$keyquery}?\n";  
		    die; 
		}
		if ($#qbit+1 != $n_ali_per_query_one{$keyquery}) { 
		    print "how many in qbit at $keyquery? ", $#qbit+1, " or $n_ali_per_query_one{$keyquery}?\n"; 
		    die; 
		}
	    }
		
	    $count = 0;
	    $count_two = 0;


	    for (my $l = 0; $l < $len_query_ref->{$keyquery}; $l++) { $howmany[$l] = 0; $which[$l] = "-";}
	    
	}
	elsif (/^>(.+)/)       
	{ 
	    $sbjct = $1;
	    $flag = $qbit[$count];
	    $count ++;

	    if ($flag) {
		$count_two ++;
		
		$blast[$count-1] = ">$query\n>$sbjct\n";

	    }

	}
	elsif ($flag && /^\s+Identities\s+=\s+\S+\/(\S+)/)       
	{ 
	    $count_ali_per_query ++;
	    $ave_len_ali_two += $1;
	    $blast[$count-1] .= $_;

	}
	elsif ($flag && (/^Query:\s+(\S+)\s+\S+\s+(\S+)/ || ($reverse && /^Sbjct:\s+(\S+)\s+\S+\s+(\S+)/)) ) 
	{ 
	    $blast[$count-1] .= $_;
	    
	    $numl = $1; 
	    $numr = $2;
	    
	    if ($numl < $numr) { $start = $numl-1; $end = $numr-1; }
	    else               { $start = $numr-1; $end = $numl-1; }
	    
	    if ($start < 0) { die "wrong blast limits: start= $start\n"; }

	    if ($end >= $len_query_ref->{$keyquery}) { die "wrong blast limits: end=$end len = $len_query_ref->{$keyquery}\n"; }	    
	    
	    for (my $l = $start; $l <= $end; $l++) {
		$howmany[$l] ++;
		$num = $count - 1;
		$which[$l] .= $num."-";
	    }
	}
	elsif ($flag) { $blast[$count-1] .= $_; }

    }
    
    close (SBLAST);

    #last case
    
    $n_ali_per_query_two_unchanged{$keyquery} = $n_ali_per_query_two{$keyquery};
    $n_ali_per_query_two_fragments{$keyquery} = 0;
    
    if (!$opt_c) {
	resolve_conflicts_per_query ($keyquery, $len_query_ref, \@which, \@howmany, \@blast, \$n_conflicts, \$n_conflicts_total, \@conflicts);
    }
    for (my $c = 1; $c <= $count; $c ++)  { if ($blast[$c-1]) { print OUT $blast[$c-1]; } }
    close (OUT);

    if ($count > 0) {
	$x = 0;
	my $per_query = 0;
	$total_pos_afterprunning += $len_query_ref->{$keyquery};
	while ($x < $len_query_ref->{$keyquery}) { 
	    if ($howmany[$x] > 0) { $depth_conserved_pos ++; $per_query ++;}
	    if ($opt_h) { fill_histo_array ($howmany[$x], $N, $k, $hisc_ref); } 
	    $x ++;
	}
	$n_ali_two_unchanged += $n_ali_per_query_two_unchanged{$keyquery};
	$n_ali_two_fragments += $n_ali_per_query_two_fragments{$keyquery};
	if ($verbose) {
	    print "++$count_query> ($count) ($count_two) ($n_ali_two) $keyquery $len_query_ref->{$keyquery} $total_pos_afterprunning $per_query $depth_conserved_pos\n";
	}
    }

    $$total_pos_afterprunning_ref = $total_pos_afterprunning;
    if ($$total_pos_afterprunning_ref > $$total_pos_ref) {
	print "error:  total_pos = $$total_pos_ref total_pos_afterprunning = $$total_pos_afterprunning_ref\n"; 
	die;
    }
    if ($verbose) { print "\ntotal_pos = $$total_pos_ref total_pos_afterprunning = $$total_pos_afterprunning_ref\n"; }

    if ($$conserved_pos_ref != $depth_conserved_pos) {
	print "error conserved_pos = $$conserved_pos_ref depth_conserved_pos = $depth_conserved_pos\n"; 
	die;
    }

    
    if ($verbose) { print "N queries fileb = $count_query\n"; }

    #now print the histogram
    my $ylabel = "Percentage of positions";
    my $xlabel = "Number of alignments per position";
    if ($opt_h) {
	write_histo_file ("$hisfilec", $N, $k, $$conserved_pos_ref, "Depth Histogram ** After prunning", 
			  $xlabel, $ylabel, $file, $hisc_ref, $min_depth, $max_depth);
    }
 
}

# prune_blastn ()
#
#
sub prune_blastn {

    my ($file, $filea, $len_query_ref, $total_pos_ref) = @_;

    my $query;
    my $keyquery;
    my $sbjct;

    my $length;

    my $evalue;
    my $scoreline;

    my $identity;
    my $identityline;

    my $flag = 0;

    my $total_pos = 0;

    open (OUT, ">$filea") || die;

    open (BLAST, "$file") || die;
    while (<BLAST>) {

	if    (/^\s*Query=(.+)/ || ($reverse && /^\s*Sbjct=(.+)/)) 
	{ 
	    $n_query ++;
	    $query = $1; 

	    if (length($query) == 0) { $query = "Query"; }

	    if ($query =~ /^\s+(\S+)/) { $query = $1; }
	    if ($query =~ /^(\S+)\s+/) { $query = $1; }

	    $keyquery = $n_query.">".$query; 
	    
	    if (!$reverse) { print OUT "Query=  $keyquery\n"; }
	    else           { print OUT "Sbjct=  $keyquery\n"; }

	    #set counters
	    $n_ali_per_query_total{$keyquery} = 0;
	    $n_ali_per_query_one{$keyquery}   = 0;
	    $n_ali_per_query_two{$keyquery}   = 0;

	    $worse_evalue{$keyquery} = exp(-300.0*log(10.0));
	}
	elsif (/^\s+\((\S+) letters/) 
	{ 
	    $len_query_ref->{$keyquery} = $1;  $len_query_ref->{$keyquery} =~ s/,//g;
	    $total_pos += $len_query_ref->{$keyquery};
	}
	elsif (/^>(.+)/)       
	{ 
	    $flag = 0;
	    $sbjct = $1; $sbjct =~ s/ //g; $sbjct =~ s/[\(\)\|\+\_\/]/-/g;  
	}
	elsif (/Expect\s+=\s+(\S+)/) 
	{
	    $evalue = $1; 

	    # dealing with different representation of evalues
	    if ($evalue =~ /^(\S+),/)       { $evalue = $1;          }
	    if ($evalue =~ /^(\S+)e-(\d+)/) { $evalue = $1*exp(-$2*log(10.0)); }
	    if ($evalue =~ /^e-(\d+)/)      { $evalue = exp(-$1*log(10.0));    }

	    if ($evalue > $worse_evalue{$keyquery}) { $worse_evalue{$keyquery} = $evalue; }

	    $scoreline = $_; 
	}
	elsif (/^\s+Identities\s+=\s+\S+\/(\S+)\s\((\S+)%\)/) 
	{
	    $flag = 0;

	    $length   = $1; 
	    $identity = $2; 
	    
	    $identityline = $_; 

	    $n_ali_total  ++;
	    $n_ali_per_query_total{$keyquery}  ++;

	    $ave_len_ali_total += $length;

	    if ($length   >= $min_len && $length   <= $max_len &&
		$identity >= $min_id  && $identity <= $max_id  &&  
		$evalue   <= $max_eval) { 
		$n_ali_per_query_one{$keyquery} ++; $n_ali_one ++; 
		$flag = 1;

		$ave_len_ali_one += $length;
		
		print OUT ">", $sbjct, "\n";
		print OUT $scoreline;
		print OUT $identityline;
		
	    }
	
	}
	elsif (/Parameters:/) { $flag = 0; }
	elsif ($flag == 1) { print OUT $_; }

    }
    close (BLAST);
    close (OUT);

    $$total_pos_ref = $total_pos;
}


sub qrnafile_validate {
    my ($qfile, $tnali_ref) = @_;

    my $n_ali = 0;
    my $seq = 0;

    my $seq1;
    my $seq2;
    
    my $name1;
    my $name1_new;
    my $name2;

    my $line;

    open (QFILE, "$qfile") || die;
    while (<QFILE>) {
	
	if (/^\>(.+)/ && $seq == 0) { 
	    $name1_new = $1;
	    if ($n_ali > 0) {
		if (length($seq1) != length($seq2)) { 
		    print "qrnafile_validate(): #$n_ali: bad alignment $name1 len_seq1=", 
		    length($seq1), " $name2 len_seq2=", length($seq2), "\n"; 
		    die;
		}
	    }
	    
	    $name1 = $name1_new;
	    $seq = 1; 
	    $seq1 = "";
	    $n_ali ++;
	}
	elsif (/^[^\>]/ && $seq == 1) { 
	    $line = $_; $line =~ s/ //g; $line =~ s/\n//g; $seq1 .= $line; 
	}
	elsif (/^\>(.+)/ && $seq == 1) {
	    $name2 = $1;
	    $seq   = 0; 
	    $seq2  = "";
	}	
	elsif (/^[^\>]/ && $seq == 0) { 
	    $line = $_; $line =~ s/ //g; $line =~ s/\n//g; $seq2 .= $line; 
	}
	
    }
    close (QFILE);

    #last case
    if (length($seq1) != length($seq2)) { 
	print "qrnafile_validate(): #$n_ali: bad alignment $name1 len_seq1=", 
	length($seq1), " $name2 len_seq2=", length($seq2), "\n"; 
	die;
    }
  
    $$tnali_ref = $n_ali;
}

sub resolve_conflicts_per_query {

    my ($keyquery, $len_query_ref, $which_ref, $howmany_ref, $blast_ref, $n_conflicts_ref, $n_conflicts_total_ref, $conflicts_ref) = @_;

    my $verbose = 0;

    my	$x = 0;
    my $n_conflicts_prv = $$n_conflicts_ref;
    my $n_conflicts_per_query = 0;

    my $start_pos;
    my $end_pos;
    my $start_pos_new;

    my $which;
    my $which_new;
    my $howmany;
    my $howmany_new;

    my $curr;

    $which = "";
    $start_pos = -1;
    $end_pos   = -1;
    
    my $is_conflict = 0;
    my $is_conflict_new = 0;

    while ($x < $len_query_ref->{$keyquery}) { 

	#identify conflicts
	if ($howmany_ref->[$x] > $depth) { 
	    $which_new = $which_ref->[$x];
	    
	    $is_conflict_new = 1;

	    if ($which_new =~ /^$which$/) { $end_pos = $x; }
	    else {
		
		$start_pos_new = $x;
		$howmany_new = $howmany_ref->[$x];
		
		if ($n_conflicts_per_query > 0) {
		    
		    my $string;
		    if ($which =~ /^\-(.+\-)/) { $string = $1;     }
		    else                       { $string = $which; } 

		    my @alis = split(/\-/,$string);
		    
		    if ($verbose) {
			print "\nCONFLICT $$n_conflicts_ref>\n";
			print "query: $keyquery\n";
			print "pos: $start_pos $end_pos\n";
			print "howmany: $howmany \n";
			print "which: $which \n";	
			print "alis: @alis \n";
			
			foreach my $ali_num (@alis) {
			    #print "$blast_ref->[$ali_num]\n";
			}
		    }
		    
		    my $flag = 0;
		    for (my $c = 0; $c < $n_conflicts_per_query; $c ++) {
			$curr = $c+$n_conflicts_prv;
			#print "c $c $conflicts_ref->[$curr]\n";
			foreach my $ali_num (@alis) {
			    if ($conflicts_ref->[$curr] =~ /\-$ali_num\-/) { 
				$flag = 1; 					    
			    }
			}
			if ($flag) {
			    foreach my $ali_num (@alis) {

				if ($conflicts_ref->[$curr] =~ /\-$ali_num\-/) { next; }
				else                                           { $conflicts_ref->[$curr] .= "$ali_num-"; }
			    }

			    if ($verbose) { print "curr $curr which $which conflicts $conflicts_ref->[$curr]\n"; }

			}
			
		    }

		    if ($flag == 0) { 
			my $c = $n_conflicts_per_query+$n_conflicts_prv;
			$conflicts_ref->[$c] = $which; $n_conflicts_per_query ++; 
			if ($verbose) { print "curr $c which $which conflicts $conflicts_ref->[$c]\n"; }
		    }
		    
		    $is_conflict = $is_conflict_new;
		}
		
		$start_pos = $start_pos_new;
		$end_pos = $start_pos;
		$which = $which_new;
		$howmany = $howmany_new;

		$$n_conflicts_ref ++;
	    }

	    if ($n_conflicts_per_query == 0 || $$n_conflicts_ref == $n_conflicts_prv+1) { 
		$n_conflicts_per_query = 1;
		$conflicts_ref->[$n_conflicts_prv] = $which;

		$$n_conflicts_ref ++;

		$start_pos = $start_pos_new;
		$which = $which_new;
		$howmany = $howmany_new;
	    }
	    
	}
	#end identify_conflict

	$x ++;
    }

    #last conflict
    if ($is_conflict) {
	my $string;
	if ($which =~ /^\-(.+\-)/) { $string = $1;     }
	else                       { $string = $which; } 
	
	my @alis = split(/\-/,$string);
	
	if ($verbose) {
	    print "\nCONFLICT $$n_conflicts_ref>\n";
	    print "query: $keyquery\n";
	    print "pos: $start_pos $end_pos\n";
	    print "howmany: $howmany \n";
	    print "which: $which \n";
	    
	    print "alis: @alis \n";
	    
	    foreach my $ali_num (@alis) {
		#print "$blast_ref->[$ali_num]\n";
	    }
	}
	
	my $flag = 0;
	for (my $c = 0; $c < $n_conflicts_per_query; $c ++) {
	    $curr = $c+$n_conflicts_prv;
	    #print "c $c $conflicts_ref->[$curr]\n"; 
	    foreach my $ali_num (@alis) {
		if ($conflicts_ref->[$curr] =~ /\-$ali_num\-/) { 
		    $flag = 1; 					    
		}
	    }
	    if ($flag) {
		foreach my $ali_num (@alis) {
		    
		    if ($conflicts_ref->[$curr] =~ /\-$ali_num\-/) { next; }
		    else                                           { $conflicts_ref->[$curr] .= "$ali_num-"; }
		}
		
		#print "which $which curr $curr conflicts $conflicts_ref->[$curr]\n"; 
		
	    }
	    
	}
	if ($flag == 0) { 
	    my $c = $n_conflicts_per_query+$n_conflicts_prv;
	    $conflicts_ref->[$c] = $which; $n_conflicts_per_query ++; 
	    if ($verbose) { print "curr $c which $which conflicts $conflicts_ref->[$c]\n";  }
	}
    }
    
    if ($n_conflicts_per_query > 0) { 
	$$n_conflicts_ref --;
	if ($verbose) { print "N_conflicts_per query $keyquery = ", $n_conflicts_per_query, "\n"; }
    }
    
    $$n_conflicts_total_ref += $n_conflicts_per_query - 1;
    
    # now resolve the conflicts
    for (my $c = 0; $c < $n_conflicts_per_query; $c ++) {
	resolve_cluster_alignments ($keyquery, $conflicts_ref->[$c+$n_conflicts_prv], $blast_ref);	
    }

}

sub resolve_cluster_alignments {

    my ($keyquery, $which, $blast_ref) = @_;
    
    my $verbose = 0;
    my $string;
    if ($which =~ /^\-(.+\-)/) { $string = $1;     }
    else                       { $string = $which; } 
    
    my @conf = split(/\-/,$string);
    
    my %lend;
    my %rend;
    my %lend_sbjct;
    my %rend_sbjct;
    my %scores;

    my %order;

    my $extreme_left;
    my $extreme_right;
    my @limit_left;
    my @limit_right;
    my $edge = 0;
    my $flag;

    foreach my $conf (@conf) {
	parse_alignment ($conf, $blast_ref->[$conf], \%lend, \%rend, \%lend_sbjct, \%rend_sbjct, \%scores);
	if ($verbose) { 
	    print "$conf\n$blast_ref->[$conf]\n";
	    print "lend $lend{$conf} rend $rend{$conf} $scores{$conf} \n";  
	}
    }
    
    order_alignments (\$which, $blast_ref, \%lend, \%rend, \%scores, \%order);
    
    if ($which =~ /^\-(.+\-)/) { $string = $1;     }
    else                       { $string = $which; } 
    
    @conf = split(/\-/,$string);
 
   if ($verbose) {
       foreach my $conf (@conf) {
	   print "\nconf=$conf\n";
	   #print "$blast_ref->[$conf]\n";
	   print "$conf> lend $lend{$conf} rend $rend{$conf} lend_sbjct $lend_sbjct{$conf} rend_sbjct $rend_sbjct{$conf} $scores{$conf} $order{$conf}\n";   
       }
   }
    
    # keep these alignments, set the absolute boundaries
    foreach my $conf (@conf) {
	$flag = 0;

	if ($order{$conf} == 0) { 
	    $limit_left[$edge]  = $lend{$conf}; 
	    $limit_right[$edge] = $rend{$conf}; 

	    $extreme_left  = $lend{$conf};
	    $extreme_right = $rend{$conf}; 
	    $edge ++;

	    if ($verbose) {  
		print"conf $conf--absolute boundaries\n";
		for (my $a = 0; $a < $edge; $a ++) {
		    print "edge = $a $limit_left[$a] $limit_right[$a]\n"; 
		}
	    }
	}       

	elsif ($order{$conf} < $depth) { 
	    
	    for (my $a = 0; $a < $edge; $a ++) {
		if ($verbose) { print "\n lend $lend{$conf} < $limit_left[$a]  -- rend $rend{$conf} >= ", $limit_left[$a]-1,"\n";}
		#
		#overlap left
		#
		if ($lend{$conf} <  $limit_left[$a]  && $rend{$conf} >=  $limit_left[$a]-1 
		    && ($a == 0 || ($a > 0 && $lend{$conf} > $limit_right[$a-1]))) { 
		    if ($verbose) { printf("\noverlap LEFT\n");}
		    $limit_left[$a] = $lend{$conf}; 
		    $flag = 1;
		    
		    #reevalueate right edges 
		    #reduce edges by one
		    if ($a < $edge - 1 &&  $rend{$conf} >  $limit_left[$a+1] ) {
			$limit_right[$a] = $limit_right[$a+1];
			#remove one edge
			for (my $b = $a+2; $b < $edge; $b ++) { 
			    $limit_left[$b-1] = $limit_left[$b];
			    $limit_right[$b-1] = $limit_right[$b];
			}
			pop(@limit_left);
			pop(@limit_right);
			$edge --;			
		    }
		} 

		#
		#overlap right
		#
		if ($rend{$conf} >  $limit_right[$a] && $lend{$conf} <= $limit_right[$a]+1) {  
		    $limit_right[$a] = $rend{$conf}; 
		    $flag = 1; 
		    #reevalueate left edges 
		    #reduce edges by one
		    if ($a > 0 && $lend{$conf} < $limit_right[$a-1] ) {
			$limit_right[$a-1] = $limit_right[$a];

			#remove one edge
			for (my $b = $a; $b < $edge-1; $b ++) { 
			    $limit_left[$b] = $limit_left[$b+1];
			    $limit_right[$b] = $limit_right[$b+1];
			}
			pop(@limit_left);
			pop(@limit_right);
			$edge --;			
		    }
		} 

		#
		#is included
		#
		if ($flag == 0 && $lend{$conf} >= $limit_left[$a]  && $rend{$conf} <= $limit_right[$a]) { $flag = 1; } 

	    }

	    #add a new edge and order them
	    if ($flag == 0) { $limit_left[$edge] =  $lend{$conf}; $limit_right[$edge] = $rend{$conf}; $edge ++; }   

	    @limit_left  = sort { $a <=> $b } @limit_left;
	    @limit_right = sort { $a <=> $b } @limit_right;

	    if ($verbose) {  
		print"conf $conf--absolute boundaries\n";
		for (my $a = 0; $a < $edge; $a ++) {
		    print "edge = $a $limit_left[$a] $limit_right[$a]\n"; 
		}
	    }
	    

	}
	else { 	    
	    $n_ali_per_query_two_unchanged{$keyquery} --;
	}

	if ($lend{$conf} < $extreme_left ) { $extreme_left  = $lend{$conf}; }
	if ($rend{$conf} > $extreme_right) { $extreme_right = $rend{$conf}; }
    }
     
    if ($verbose) { print "extremes: $extreme_left $extreme_right\n"; }

    @limit_left  = sort { $a <=> $b } @limit_left;
    @limit_right = sort { $a <=> $b } @limit_right;
    
   for (my $a = 0; $a < $edge-1; $a ++) {
	if ($limit_right[$a] >= $limit_left[$a+1]) { 
	    $limit_right[$a] = $limit_right[$a+1]; 
	    
	    #remove a+1 edge
	    for (my $b = $a+2; $b < $edge; $b ++) { 
		$limit_left[$b-1] = $limit_left[$b];
		$limit_right[$b-1] = $limit_right[$b];
	    }

	    $edge --;
	    $a --;
	}
    }

   if ($verbose) {  
	print"absolute boundaries\n";
  	    for (my $a = 0; $a < $edge; $a ++) {
		print "edge = $a $limit_left[$a] $limit_right[$a]\n"; 
	    }
    }

    #paranoia
    for (my $a = 0; $a < $edge; $a ++) {
	if ($limit_right[$a] < $limit_left[$a]) { 
	    print "bad edges.\n edge = $a ($limit_left[$a] $limit_right[$a])\n"; 
	    die; 
	}
    }
    for (my $a = 0; $a < $edge-1; $a ++) {
	if ($limit_left[$a+1] <= $limit_right[$a]) { 
	    print "bad edges.\n edge = $a ($limit_left[$a] $limit_right[$a])\n edge = ", $a+1, " ($limit_left[$a+1] $limit_right[$a+1])\n"; 
	    die; 
	}
    }

    #truncate these alignments to the outside of the absolute boundaries 
    #
    # we expand the boundaries as we do that, and the aligments are taken according to their order
    #
    
    foreach my $conf (@conf) {
	if ($order{$conf} >= $depth) { 
	    truncate_alignment ($keyquery, $conf, \$blast_ref->[$conf], 
				$lend{$conf}, $rend{$conf}, 
				$lend_sbjct{$conf}, $rend_sbjct{$conf}, 
				$scores{$conf}, \$edge, \@limit_left, \@limit_right);
	}
    }
    
    #paranoia final
    if ($edge > 1) { print "bad truncation edge = $edge\n"; die; }
    if ($limit_left[0]  != $extreme_left ) { 
	print "bad truncation\n limit_left  = $limit_left[0] extreme left $extreme_left \n limit_right = $limit_right[0] extreme right $extreme_right\n"; 
	die; 
    }
    if ($limit_right[0] != $extreme_right) { 
	print "bad truncation\n limit_right = $limit_right[0] extreme right $extreme_right\n limit_left  = $limit_left[0] extreme left $extreme_left \n"; 
	die; 
    }

}

# reverse_blast ()
#
#
sub reverse_blast
{
    my ($file, $filerev) = @_;

    my $query_num = 0;

    my $keysbjct;
    my %chain;
    my %bit;
    
    my $n_sbjct = 0;

    open (OUT, ">$filerev") || die;

    open (SBLAST, "$file") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+.+/) 
	{ 
	    $query_num ++;

	}	
	elsif (/^>(.+)$/) 
	{

	    $keysbjct = $1; $keysbjct =~ s/[\(\)\|\+\_\/]/-/g; 

	    $chain{$keysbjct} .= "$query_num"."\-";

	}

    }
    close (SBLAST);
    
    foreach my $sbjct (keys %chain) {  

	$n_sbjct ++;
	my @bit;
	my $get = 0;
	my $keyquery;

	print OUT "Sbjct= $sbjct\n"; 

	if ($sbjct =~ /^.+\-(\d+)\-(\d+)$/) { 

	    my $len = $2 - $1; 

	    print OUT "          ($len letters)\n";
		
	}
	
	for (my $n = 0; $n < $query_num; $n++) { $bit{$sbjct} .= "0"; }
	
	foreach my $query (split(/\-/, $chain{$sbjct})) { substr($bit{$sbjct}, $query, 1) = "1"; }
	
	@bit = split(//, $bit{$sbjct});

	$query_num = 0;
	open (SBLAST, "$file") || die;
	while (<SBLAST>) {
	    if    (/^\s*Query=\s+(.+)/) 
	    { 
		$keyquery = $1;

		if ($bit[$query_num] == 1) { }

		$query_num ++;
		
	    }	
	    elsif (/^>(.+)$/) 
	    {
		$get = 0;
		$keysbjct = $1; $keysbjct =~ s/[\(\)\|\+\_\/]/-/g; 
		if ($keysbjct =~ /^$sbjct$/) { 
		    $get = 1; 
		    print OUT ">$keyquery\n";

		}

	    }
	    elsif (/Parameters:/) { $get = 0; }
	    elsif ( $get == 1) { print OUT $_; }
	    
	}
	close (SBLAST);
	
    }
}

sub truncate_alignment {

    my ($keyquery, $ali_num, $blast_ref, $lend, $rend, $lend_sbjct, $rend_sbjct, $score, $edge_ref, $limit_left_ref, $limit_right_ref) = @_;

    my $verbose = 0;

    my $edge  = $$edge_ref;
    my $blast = $$blast_ref;

    my @left;
    my @right;
    my $ali_edge = 0;

    my $blastfile = "$file.blast.$ali_num";
    open (FILE, ">$blastfile") || die;
    print FILE $blast;
    close (FILE);

    $blast = "";

    if ($verbose) { 
	print "\nali_num=$ali_num> lend = $lend, rend = $rend, lend_sbjct = $lend_sbjct, rend_sbjct = $rend_sbjct, score = $score\n";
 	#system("more $blastfile\n"); 
   }

    if ($verbose) { 
	print"\nabsolute GIVEN boundaries\n";
	for (my $a = 0; $a < $edge; $a ++) {
	    print "edge = $a $limit_left_ref->[$a] $limit_right_ref->[$a]\n"; 
	}
    }
    
    #
    # 8  possible cases
    #
    my $cur;
    #----------------------------------
    #case 1 - left of one and before the next
    #----------------------------------
    for (my $a = 0; $a < $edge; $a ++) {
	
	#1a - touches to the right
	#
	if ($rend == $limit_left_ref->[$a]-1 &&
	    ($a == 0 || ($a > 0 && $lend > $limit_right_ref->[$a-1]+1))) {
	    $left[$ali_edge]  = $lend; 
	    $right[$ali_edge] = $rend; 
	    $ali_edge ++;
	    
	    #modify edge
	    $limit_left_ref->[$a]  = $lend;
	}
	
	#1b - touches to the left
	#
	if ($rend < $limit_left_ref->[$a]-1 &&
	    ($a > 0 && $lend == $limit_right_ref->[$a-1]+1)) {
	    $left[$ali_edge]  = $lend; 
	    $right[$ali_edge] = $rend; 
	    $ali_edge ++;
	    
	    #modify edge
	    $limit_right_ref->[$a-1]  = $rend;
	}

	# 1c# - touches both ends
	#
	if ($rend == $limit_left_ref->[$a]-1 &&
	    ($a > 0 && $lend == $limit_right_ref->[$a-1]+1)) {
	    $left[$ali_edge]  = $lend; 
	    $right[$ali_edge] = $rend; 
	    $ali_edge ++;
	    
	    #remove one edge
	    $limit_right_ref->[$a-1] = $limit_right_ref->[$a+1];
	    
	    for (my $b = $a+1; $b < $edge; $b ++) { 
		$limit_left_ref->[$b-1] = $limit_left_ref->[$b];
		$limit_right_ref->[$b-1] = $limit_right_ref->[$b];
	    }
	    $edge --;
	    $a--;
	}

	#1d - they don't  touch
	#
	if ($rend < $limit_left_ref->[$a]-1 &&
	    ($a == 0 || ($a > 0 && $lend > $limit_right_ref->[$a-1]+1))) {
	    $left[$ali_edge]  = $lend; 
	    $right[$ali_edge] = $rend; 
	    $ali_edge ++;
	    
	    #add one more edge
	    for (my $b = $edge-1; $b >= $a; $b --) { 
		$limit_right_ref->[$b+1] = $limit_right_ref->[$b];
		$limit_left_ref->[$b+1]  = $limit_left_ref->[$b];
	    }
	    $limit_left_ref->[$a]  = $lend;
	    $limit_right_ref->[$a] = $rend;
	    $edge ++;
	}
    }

    #----------------------------------
    #case 2 - right of last
    #----------------------------------
    $cur = $edge-1;

    #2a - they touch
    #
    if ($lend == $limit_right_ref->[$cur]+1) {
       	$left[$ali_edge]  = $lend; 
	$right[$ali_edge] = $rend; 
	$ali_edge ++;
	
	#modify edge
	$limit_right_ref->[$cur] = $rend;
    }
    
    #2b - they don't touch
    #
    if ($lend > $limit_right_ref->[$cur]+1) {
       	$left[$ali_edge]  = $lend; 
	$right[$ali_edge] = $rend; 
	$ali_edge ++;
	
	#add one more edge
	$limit_left_ref->[$cur+1]  = $lend;
	$limit_right_ref->[$cur+1] = $rend;
	$edge ++;
    }


    #----------------------------------
    #case 3 - starts left ** ends in middle
    #case 4 - starts left ** ends right of inner boundary
    #case 5 - starts left ** ends right of last
    #----------------------------------
    for (my $a = 0; $a < $edge; $a ++) {
	if ($lend <  $limit_left_ref->[$a] && 
	    $rend >= $limit_left_ref->[$a] &&
	    ($a == 0 || ($a > 0 && $lend > $limit_right_ref->[$a-1])) ) { 

	    $left[$ali_edge]  = $lend; 
	    $right[$ali_edge] = $limit_left_ref->[$a]-1; 
	    $ali_edge ++;

	    #reevalueate left edges, extend to the left
	    $limit_left_ref->[$a] = $lend;

	    for (my $x = $a+1; $x < $edge; $x ++) {

		#----------------------------------
		#case 3 - starts left ** ends in middle
		#----------------------------------
		if ($rend > $limit_left_ref->[$x]) {
		    $left[$ali_edge]  = $limit_right_ref->[$x-1]+1; 
		    $right[$ali_edge] = $limit_left_ref->[$x]-1; 
		    $ali_edge ++;	
		    
		    #reevalueate left edges, extend to the left
		    $limit_right_ref->[$a] = $limit_right_ref->[$x];
		    #remove one more edge
		    for (my $b = $x+1; $b < $edge; $b ++) { 
			$limit_left_ref->[$b-1] = $limit_left_ref->[$b];
			$limit_right_ref->[$b-1] = $limit_right_ref->[$b];
		    }
		    $edge --;
		    $x --;
		}
		
		#------------------------------------
		#case 4 - starts left ** ends right of inner boundary
		#------------------------------------
		if ($x > $a+1 && 
		    $rend < $limit_left_ref->[$x] &&
		    $lend > $limit_left_ref->[$x-1]&&
		    $lend < $limit_right_ref->[$x-1]) {
		    $left[$ali_edge]  = $limit_right_ref->[$x-1]+1; 
		    $right[$ali_edge] = $rend; 
		    $ali_edge ++;	
		    #reevalueate left edges, extend to the left
		    $limit_right_ref->[$a] = $rend;
		}
	    }
	    
	    #------------------------------------
	    #case 5 - starts left ** ends right of last
	    #------------------------------------
	    if ($rend > $limit_right_ref->[$edge-1]) {
		$left[$ali_edge]  = $limit_right_ref->[$edge-1]+1; 
		$right[$ali_edge] = $rend; 
		$ali_edge ++;	
		#reevalueate left edges, extend to the left
		$limit_right_ref->[$a] = $rend;
	    }
	    
	}
    }
    
    #------------------------------------
    #case 6 - starts middle ** ends in middle
    #case 7 - starts middle ** ends right of inner boundary
    #case 8 - starts middle ** ends right of last
    #------------------------------------
    for (my $a = 0; $a < $edge; $a ++) {
	if ($lend >= $limit_left_ref->[$a]  &&
	    $rend >  $limit_right_ref->[$a] &&
	    ($edge == 1 || $a == $edge - 1 || ($edge > 1 && $a < $edge - 1 && $lend <  $limit_left_ref->[$a+1]) ) 
	    ) { 
	    
	    if ($edge == 1) { #extend edge
		$left[$ali_edge]  = $limit_right_ref->[$a]+1; 
		$right[$ali_edge] = $rend; 
		$ali_edge ++;	
		
		$limit_right_ref->[$a] = $rend;
	    }
	    
	    for (my $x = $a+1; $x < $edge; $x ++) {
		
		#----------------------------------
		#case 6 - starts middle ** ends in middle
		#----------------------------------
		if ($rend >= $limit_left_ref->[$x]) {

		    if ($limit_right_ref->[$x-1]+1 < $limit_left_ref->[$x]) {
			$left[$ali_edge]  = $limit_right_ref->[$x-1]+1; 
			$right[$ali_edge] = $limit_left_ref->[$x]-1; 
			$ali_edge ++;	
		    }
		    
		    #reevalueate left edges, extend to the left
		    $limit_right_ref->[$a] = $limit_right_ref->[$x];
		    #remove one more edge
		    for (my $b = $x+1; $b < $edge; $b ++) { 
			$limit_left_ref->[$b-1] = $limit_left_ref->[$b];
			$limit_right_ref->[$b-1] = $limit_right_ref->[$b];
		    }
		    $edge --;
		    $x --;
		}
		
		#------------------------------------
		#case 7 - starts middle ** ends right of inner boundary
		#------------------------------------
		if ($rend > $limit_right_ref->[$x-1] &&
		    $rend < $limit_left_ref->[$x]) {
		    $left[$ali_edge]  = $limit_right_ref->[$x-1]+1; 
		    $right[$ali_edge] = $rend; 
		    $ali_edge ++;	
		    #reevalueate left edges, extend to the right
		    $limit_right_ref->[$a] = $rend;
		}
	    }

	    #------------------------------------
	    #case 8 - starts middle ** ends right of last
	    #------------------------------------
	    if ($rend > $limit_right_ref->[$edge-1]) {
		$left[$ali_edge]  = $limit_right_ref->[$edge-1]+1; 
		$right[$ali_edge] = $rend; 
		$ali_edge ++;	
		#reevalueate right edges, extend to the right
		$limit_right_ref->[$a] = $rend;
	    }
	    
	}
    }

    # check if any of the edges are contiguous and merge them
    for (my $a = 0; $a < $edge-1; $a ++) {
	if ($limit_right_ref->[$a]+1 ==  $limit_left_ref->[$a+1])
	{
	    #reevalueate right edges, extend to the right
	    $limit_right_ref->[$a] = $limit_right_ref->[$a+1];
	    #remove one more edge
	    for (my $b = $a+2; $b < $edge; $b ++) { 
		$limit_left_ref->[$b-1] = $limit_left_ref->[$b];
		$limit_right_ref->[$b-1] = $limit_right_ref->[$b];
	    }
	    $edge --;
	    $a --;
	}	
    }

    if ($verbose) { 
	print"\nali boundaries = $ali_edge\n";
	for (my $a = 0; $a < $ali_edge; $a ++) {
	    print "ali_edge = $a $left[$a] $right[$a]\n\n"; 
	}
    }
    if ($verbose) { 
	print"\nabsolute MODIFIED boundaries\n";
	for (my $a = 0; $a < $edge; $a ++) {
	    print "edge = $a $limit_left_ref->[$a] $limit_right_ref->[$a]\n"; 
	}
    }
        
    #paranoia
    for (my $a = 0; $a < $ali_edge; $a ++) {
	if ($left[$a] > $right[$a]) {
	    print "bad ali_edge = $a $left[$a] $right[$a]\n"; 
	    die;
	}
	if ($left[$a] < $lend || $right[$a] > $rend) {
	    print "bad ali_edge = $a $left[$a] $right[$a]\n"; 
	    print "ali ends: $lend $rend\n"; 
	    die;
	}
    }
    for (my $a = 0; $a < $edge; $a ++) {
	if ($limit_left_ref->[$a] > $limit_right_ref->[$a]) {
	    print "bad ali_edge = $a $limit_left_ref->[$a] $limit_right_ref->[$a]\n"; 
	    die;
	}
    }

    my $head  = "";
    my $query = "";
    my $sbjct = "";
    my $rev_query = 0;
    my $rev_sbjct = 0;
    
    open (FILE, "$blastfile") || die;
    while (<FILE>) {
	if (/^\>/)   {
	    $head .= $_;
	}    
	elsif (/^\s+Score/)       
	{ 
	    #$head .= "#depth_fragment\n".$_;
	}
	elsif (/^\s+Identities.+\s+Strand\s+=\s+(\S+)\s+\/\s+(\S+)/)       
	{ 
	    $head .= $_;
	    $head .= "\n";

	    my $q = $1;
	    my $s = $2;

	    if ($q =~ /^Plus$/) { $rev_query = 0; }
	    else                { $rev_query = 1; }

	    if ($s =~ /^Plus$/) { $rev_sbjct = 0; }
	    else                { $rev_sbjct = 1; }

	}
	elsif (/^Query:\s+\S+\s+(\S+)\s+\S+/) 
	{ 
	    $query .= $1;
	  
	}
	elsif (/^Sbjct:\s+\S+\s+(\S+)\s+\S+/) 
	{ 
	    $sbjct .= $1;
	}
    }
    
    close (FILE);

    if ($verbose && $ali_edge > 0) {
	print "ali_num=$ali_num> lend = $lend, rend = $rend, score = $score\n";
	#system("more $blastfile\n"); 
    }
    system("rm $blastfile\n"); 

    my @ali_pos_query;
    my @ali_pos_sbjct;

    $query =~ s/ //g;
    $sbjct =~ s/ //g;
    my $len_query = $rend - $lend + 1;
    my $len_sbjct = $rend_sbjct - $lend_sbjct + 1;
    find_ali_pos ($query, \@ali_pos_query, $len_query);
    find_ali_pos ($sbjct, \@ali_pos_sbjct, $len_sbjct);

    my $left;
    my $right;
    my $left_sbjct;
    my $remove_left;
    my $remove_right;
    my $remove;
    my $posl_query;
    my $posl_sbjct;
    my $posr_query;
    my $posr_sbjct;
    my $shift = 60;

    for (my $a = 0; $a < $ali_edge; $a ++) {
	
	if ($rev_query) { $left = $rend - $right[$a]; $right = ($rend - $left[$a]);  }
	else            { $left = $left[$a] - $lend;  $right = ($right[$a] - $lend); }
	
	$remove_left  = $ali_pos_query[$left];
	$remove_right = $ali_pos_query[$rend-$lend] - $ali_pos_query[0] - $ali_pos_query[$right]; 
	if ($verbose) { print "\nhhh lend=$lend rend=$rend $ali_pos_query[$rend-$lend] $ali_pos_query[0] $ali_pos_query[$right]\n"; }
	
	if ($verbose) {
	    print "left $left right $right\n";
	    print "remove left $remove_left remove right $remove_right\n";
	}

	if ($remove_left < 0 || $remove_right < 0) {
	    print "bad removing remove_left $remove_left remove right $remove_right\n"; 
	    die; 
	}
	
	if (length($query) < $remove_left+$remove_right) { 
	    print "bad query  removing\n frag_query length = ", length($query), " remove left $remove_left remove right $remove_right\n"; 
	    die;
	}
	if (length($sbjct) < $remove_left+$remove_right) { 
	    print "bad subjct removing\n frag_query length = ", length($sbjct), " remove left $remove_left remove right $remove_right\n"; 
	    die;
	}

	my $frag_query = $query;
	$remove = $remove_left;
	while ($remove) {
	    if ($frag_query =~ /^\S(\S+)$/) { $frag_query = $1; }
	    $remove --;
	}

	$remove = $remove_right;
	while ($remove) {
	    if ($frag_query =~ /^(\S+)\S$/) { $frag_query = $1; }
	    $remove --;
	}

	my $frag_sbjct = $sbjct;
	my $chunk = "";
	$remove = $remove_left;
	while ($remove) {
	    if ($frag_sbjct =~ /^(\S)(\S+)$/) { $frag_sbjct = $2; $chunk .= $1; }
	    $remove --;
	}
	$chunk =~ s/\-//g; $left_sbjct = length($chunk);

	$remove = $remove_right;
	while ($remove) {
	    if ($frag_sbjct =~ /^(\S+)\S$/) { $frag_sbjct = $1; }
	    $remove --;
	}

	if ($rev_query) { $posl_query = $rend - $left; }
	else            { $posl_query = $left + $lend; }
	if ($rev_sbjct) { $posl_sbjct = $rend_sbjct - $left_sbjct; }
	else            { $posl_sbjct = $left_sbjct + $lend_sbjct; }

	if ($verbose) {
	    print "pos left query $posl_query \n";
	    print "pos left sbjct $posl_sbjct \n";
	}
	
	# take only those fragments above the len cutoff
	if (length($frag_query) >= $min_len) {

	    $n_ali_per_query_two_fragments{$keyquery} ++;

	    my $print_query;
	    my $print_sbjct;
	    my $qq_query;
	    my $qq_sbjct;
	    my $meat_query;
	    my $meat_sbjct;
	    my $meatungap_query;
	    my $meatungap_sbjct;
	    
	    $blast .= $head;
	    while ($frag_query) {	    
		
		if (length($frag_query) < $shift) { 
		    
		    $meat_query = $frag_query; 
		    $meat_sbjct = $frag_sbjct; 
		    
		    $frag_query =~ s/\-//g; 
		    $frag_sbjct =~ s/\-//g; 
		    $qq_query = length($frag_query)-1; undef $frag_query; 
		    $qq_sbjct = length($frag_sbjct)-1; undef $frag_sbjct;
		}
		else { 
		    $frag_query =~ s/(.{$shift})//; $meat_query = $1; 
		    $frag_sbjct =~ s/(.{$shift})//; $meat_sbjct = $1; 
		    
		    $meatungap_query = $meat_query; $meatungap_query =~ s/\-//g; 
		    $meatungap_sbjct = $meat_sbjct; $meatungap_sbjct =~ s/\-//g; 
		    
		    $qq_query = length($meatungap_query)-1;                       
		    $qq_sbjct = length($meatungap_sbjct)-1;   
                    
		    
		}
		
		$posr_query = ($rev_query)? $posl_query - $qq_query : $posl_query + $qq_query;
		$posr_sbjct = ($rev_sbjct)? $posl_sbjct - $qq_sbjct : $posl_sbjct + $qq_sbjct;
		
		my $lq = length($posl_query);
		my $ls = length($posl_sbjct);
		my $diff = $lq - $ls;
		
		my $lq_p = $posl_query;
		my $ls_p = $posl_sbjct;
		
		if ($diff > 0) { 
		    $ls_p = "";
		    for (my $i = 0; $i < $diff; $i ++) { $ls_p .= " "; }
		    $ls_p .= $posl_sbjct;
		}
		if ($diff < 0) { 
		    $lq_p = "";
		    for (my $i = 0; $i < -$diff; $i ++) { $lq_p .= " "; }
		    $lq_p .= $posl_query;
		}
		
		$print_query = "Query: $lq_p $meat_query $posr_query\n"; $blast .= "$print_query";
		$print_sbjct = "Sbjct: $ls_p $meat_sbjct $posr_sbjct\n"; $blast .= "$print_sbjct\n";
		
		$posl_query = ($rev_query)? $posr_query - 1 : $posr_query + 1;
		$posl_sbjct = ($rev_sbjct)? $posr_sbjct - 1 : $posr_sbjct + 1;
	    }
	}
	
    }    
    
    #if ($verbose) { print "BLAST\n$blast"; }
    $$edge_ref  = $edge;
    $$blast_ref = $blast;
}

sub write_histo_file {

    my ($hisfile, $N, $k, $norm_factor, $title, $xlabel, $ylabel, $key, $his_ref, $xl, $xr, $max_ref) = @_;
    
    my $dim = $N * $k;

    my $median = 0;
    my $mean;
    
    my $cum = 0;

    my $min_x = 0;
    my $max_x = 0;
    my $max_y = 0;
    my $add = 0;

    if ($norm_factor <= 0) { print "bad normalization\n"; die; }

    open (HIS,">$hisfile") || die;
    print HIS "#", $N, "\t", $k, "\t", $dim, "\n";
    print HIS "# $title\n";
    
    for (my $i=0; $i<=$dim; $i++) { $add += $his_ref->[$i]; if ($his_ref->[$i] > $max_y) { $max_y = $his_ref->[$i]; $mean = $i/$k;} }

    my $mid = int ($add/2) + 1;

    my $max;
    my $total;
    for (my $i=0; $i<=$dim; $i++) { 
	$cum += $his_ref->[$i]; 

	if ($cum <= $mid) { $median = ($i)/$k; } 

	if ($add > 0)  { if ($cum == $add) { $max_x = $N; } elsif ($cum/$add < 0.999999) { $max_x = $i/$k; } }
	if ($cum <= 0) { $min_x = $i/$k; }
	
   }

    if ($norm_factor > $add) { print "bad normalization\n"; die; }

    my $non_conserved = $add - $norm_factor;

    $cum = 0;
    for (my $i=0; $i<=$dim; $i++) { 


	$cum += $his_ref->[$i]; 

	if ($i/$k >= $min_x && $i/$k <= $max_x && 
	    ($cum < $add ||  $his_ref->[$i] > 0) ) {
	    print  HIS ($i)/$k, "\t", $his_ref->[$i], "\t", $cum; 
	    	    
	    if ($add > 0) { print  HIS "\t", $his_ref->[$i]/$add, "\t", $cum/$add, "\t"; }
	    else          { print  HIS "\t 0.0 \t 0.0\t";                                } 	

	    if ($norm_factor > 0) { 
		if ($i == 0) { 
		    if ($non_conserved > $his_ref->[$i]) { print "bad bad $his_ref->[$i] non_conserved $non_conserved\n"; die; }
		    print  HIS "\t", ($his_ref->[$i]-$non_conserved)/$norm_factor, "\t", $cum/$norm_factor, "\n"; 
		}
		else         {
		    print  HIS "\t", $his_ref->[$i]/$norm_factor, "\t", $cum/$norm_factor, "\n";
		}
	    }
	    else { 
		print  HIS "\t 0.0 \t 0.0\n";                                               
	    } 	

	    $max = ($i)/$k;
	    $total = $cum
	}
    }
    
    print HIS "#max value = $max ";
    if ($add > 0) { print HIS "(",   int($max/$add*10000)/100,")\n"; }    else { print HIS "\n"; }
    print HIS "#median    = $median \n"; 

    close (HIS);
    
    $$max_ref = $max;

    gnuplot_histo($hisfile, $title, $xlabel, $ylabel, $key, $total, $norm_factor);
    
}

# write_report ()
#
#
sub write_report {
    my ($report, $total_nali) = @_;

    my $verbose = 0;

    my $query;
    my $num;

    my $n_query_eval = 0;

    open (REP, ">$report") || die;
    
    print REP "FILE: \t$file\n";
    print REP "DIR:  \t$dir/\n";


    print REP "\nFIRST TRIMMING\n";
    print REP "Minimum length = $min_len\n";
    print REP "Maximum Evalue = $max_eval\n";
    print REP "Minimum %id    = $min_id\n";
    print REP "Maximum %id    = $max_id\n";

    print REP "\nSECOND TRIMMING\n";
    print REP "Alignments culled by = $which\n";
    print REP "Depth of alignments  = $depth\n";
    print REP "shift                = $shift\n\n";

    foreach my $r (keys(%n_ali_per_query_total)) { 

	$r =~ /^(\d+)>(.+)$/; $num = $1; $query = $2;
	print REP "$num-QUERY: $query \n";
	print REP "\tTotal \# alignments: $n_ali_per_query_total{$r}\t After First trimming: $n_ali_per_query_one{$r}\t After Second trimming: $n_ali_per_query_two{$r} unchanged $n_ali_per_query_two_unchanged{$r} fragments $n_ali_per_query_two_fragments{$r}\n";
	if ($verbose) {
	    print "$num-QUERY: $query \n";
	    print "\tTotal \# alignments: $n_ali_per_query_total{$r}\t After First trimming: $n_ali_per_query_one{$r}\t After Second trimming: $n_ali_per_query_two{$r} unchanged $n_ali_per_query_two_unchanged{$r} fragments $n_ali_per_query_two_fragments{$r}\n";
	}
	print REP "\tWorse evalue: $worse_evalue{$r}\n";

	if ($worse_evalue{$r} > $max_eval) { $n_query_eval ++; }
    }

    printf (REP "\nTotal \#Queries     \t%d\n", $n_query);
    printf (REP "Total \#Alignments    \t%d\t", $n_ali_total);
    if ($n_ali_total > 0.0) { printf (REP " ave_len = %.1f\n", $ave_len_ali_total/$n_ali_total); }
    else { printf (REP " ave_len = 0\n"); }
    
    printf (REP "After first trimming  \t%d\t", $n_ali_one);
    if ($n_ali_one > 0.0) { printf (REP " ave_len = %.1f\n", $ave_len_ali_one/$n_ali_one); }
    else { printf (REP " ave_len = 0\n"); }
    
    printf (REP "After second trimming \t%d\t", $n_ali_two);
    if ($n_ali_two > 0.0) {  printf (REP " ave_len = %.1f\n", $ave_len_ali_two/$n_ali_two); }
    else { printf (REP " ave_len = 0\n"); }
   
    print REP "\nTHIRD TRIMMING\n";
    print REP "Minimum length = $min_len\n";
    print REP "Total # alignments = $total_nali\n";

    printf (REP "Unchanged alignments \t%d\n", $n_ali_two_unchanged);
    if ($n_ali_two_unchanged > $n_ali_two) { print "bad counting of unchanged alignments\n"; die; }
    printf (REP "Fragment alignments \t%d\n", $n_ali_two_fragments);


    if ($verbose) {
	print "\nUnchanged alignments $n_ali_two_unchanged\n";
	print "Fragment alignments $n_ali_two_fragments\n";
	print "\nTotal positions:     $total_pos\n";
	print "Conserved positions: $conserved_pos\n";
    }

    printf (REP "\ntotal positions:     $total_pos\n");
    printf (REP "Conserved positions: $conserved_pos\n");

    close (REP);
}


