#!/usr/bin/perl -w
# -*-CPerl-*-
# Last changed Time-stamp: <2004-06-14 16:29:56 ivo>
#
# compare two pair prob matrices to compute the best common structure.
# 
# Problem: give the pair probs of two seqs A and B find a list of
# matches between pairs (i,j) from A (k,l) from B such that \Sum
# p^A_{i,j} * p^B_{k,l} is maximal and the list of matched pairs forms
# a valid secondary structure.
#
# Algorithm: 4-dim dynamic programming, O(n^4) memory and O(n^6) CPU
# Basic Recursions:
#
# S(i,j;k,l) = max{ S(i+1,j;k,l)+gap, S(i,j;k+1,l)+gap, S(i+1,j;k+1,l),
#                   max_{h<=j,q<=l} (S^M(i,h;k,q) + S(h+1,j;q+1,l))
# S^M(i,j;k,l) = p^A_{i,j} * p^B_{k,l} + S(i+1,j-1;k+1;l-1)
#
#
# use as  pmcomp.pl seq1_dp.ps seq2_dp.ps

use strict;
use Pod::Usage;
use Getopt::Long;
use warnings;

sub usage {
  die "Usage: $0 [--fast] [-g gap_penalty] [-D max separation] [--nolog] [--no-endgaps] seq1_dp.ps seq2_dp.ps\n";
}

my $sankoff=1;
my $endgaps = 1; # 1 if end gaps cost, 0 if end gaps free
my $seqw = 0.05; # weight of sequence score
my $gap = -3;    # gap penalty <= 0
my $maxsep = 5;
my $nolog = 0;
my $man;
&usage() unless GetOptions("g=f" => \$gap,
                           "D=i" => \$maxsep,
			   "nolog" => \$nolog,
			   "seqw=f" => \$seqw,  
			   "fast!" => sub {$sankoff= !pop()},
			   "endgaps!" => \$endgaps,
			   "man" => \$man
			  ) or pod2usage(2);
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

# read the pair probabilities from dot plots
usage() if eof();
$nolog=1 unless $sankoff;
my ($pp1, $p1, $seq1) = read_dot_plot();
my $n1 = length($seq1);
#usage() if eof();
my ($pp2, $p2, $seq2) = read_dot_plot();
my $n2 = length($seq2);
if ($maxsep < abs($n1-$n2)) {
  warn "increasing maxsep to difference in sequence length";
  $maxsep = abs($n1-$n2);
}
my @S;

my ($opt, $aseq, $bseq);
my @match = ();
my @pi1 = ();
my @pi2 = ();
if ($sankoff) {
  foreach my $k (keys %{$p1}) {
    my ($i,$j) = split /$;/, $k;
    push @{$pi1[$i]}, [$j, $p1->{$k}];
  }
  foreach my $i (1..$n1) {
    $pi1[$i] = [] unless defined $pi1[$i];
    @{$pi1[$i]} = sort {$a->[0] <=> $b->[0]} @{$pi1[$i]};
  }
  foreach my $k (keys %{$p2}) {
    my ($i,$j) = split /$;/, $k;
    push @{$pi2[$i]}, [$j, $p2->{$k}];
  }
  foreach my $i (1..$n2) {
    $pi2[$i] = [] unless defined $pi2[$i];
    @{$pi2[$i]} = sort {$a->[0] <=> $b->[0]} @{$pi2[$i]};
  }
  $opt = pmcomp();
  print "\n$opt\n";
  backtrack(1,$n1,1,$n2);

  my $score = 0;
  @match = sort {$$a[0] <=> $$b[0]} @match;
  foreach (@match) {
    print "@$_\n";
    if (@$_ == 2) {
      $score += seq_score(substr($seq1, $_->[0]-1,1),substr($seq2,$_->[1]-1,1));
    } else {
      my ($i,$j,$k,$l) = @$_;
      $score += $nolog ? 2*$$p1{$i,$j}*$$p2{$k,$l} : $$p1{$i,$j}+$$p2{$k,$l};
    }
  }
  my ($astr, $bstr);
  ($aseq, $astr, $bseq, $bstr) =  make_alignment();
  print "$aseq\n$astr\n$bseq\n$bstr\n";
  my $ngap = ($aseq =~ tr/-//);
  $ngap += ($bseq =~ tr/-//);
  print "recalculated score: ", $score+$ngap*$gap,"\n";
} else {
  # cheap profile alignment
  ($opt, $aseq, $bseq) = prof_comp($p1, $seq1, $p2, $seq2);
  print "\n$opt\n";
  print "$aseq\n", '.' x length($aseq), "\n$bseq\n", '.' x length($aseq), "\n";
}
my $n_pm = new_pm($aseq, $bseq);
PS_dot($n_pm, $aseq, $bseq);

sub pmcomp {
  # use indices 1..n since thats what's in the dot plots
  $|=1 if -t STDOUT;
  # initialization
  for my $i (1..$n1+1) {
    my $k0 = $i-2*$maxsep -1;
    $k0 = 1 if $k0 < 1;
    for my $k ($k0..$n2+1) {
      last if $k>$i+2*$maxsep+1;
      for my $j ($i..$n1+1) {
	my $l0 = $k + ($j-$i) - $maxsep-1; $l0 = $k if $l0<$k;
	my $d = $j-$i+$k-$l0;
	for my $l ($l0..$n2+1) {
	  last if $d<-$maxsep-1;
	  $S[$i][$j][$k][$l] = abs($d)*$gap;
	  $d--;
	}
      }
      for my $l ($k..$n2+1) {
	last if $l-$k>$maxsep+1;
	$S[$i][$i-1][$k][$l] =($l-$k+1)*$gap;
      }
      for my $j ($i..$n1+1) {
	last if $j-$i>$maxsep+1;
	$S[$i][$j][$k][$k-1] =($j-$i+1)*$gap;
      }
      $S[$i][$i-1][$k][$k-1] = 0;
    }
  }

  for my $j (5..$n1) {
    for (my $i=$j; $i>0; $i--) {
      for my $l (1..$n2) {
	last if ($n1-$j)-($n2-$l)>$maxsep;
	next if ($n2-$l)-($n1-$j)>$maxsep;
	my $ss = 0;
	for (my $k=$l; $k>0; $k--) {
	  $ss =-10000;
# comment out the following next/last statements to get the version
# that restricts only pairs (not all subalignments)
	  last if ($i-$k)>$maxsep;
	  next if ($k-$i)>$maxsep;
	  last if ($l-$k)-($j-$i)>$maxsep;
	  next if ($j-$i)-($l-$k)>$maxsep;
	  $S[$i][$j][$k][$l] = $S[$i+1][$j][$k][$l] + $gap;
	  $ss = $S[$i+1][$j][$k+1][$l] + seq_score(substr($seq1,$i-1,1),substr($seq2,$k-1,1));
	  $S[$i][$j][$k][$l] = $ss   if $ss>$S[$i][$j][$k][$l];
	  $S[$i][$j][$k][$l] = $S[$i][$j][$k+1][$l] + $gap
	    if $S[$i][$j][$k+1][$l]+$gap>$S[$i][$j][$k][$l];
	  my $dd = $k-$i; $dd = $l-$j if $l-$j > $dd;
#	  for my $h ($i+4..$j) {
#	    next unless exists($$p1{$i,$h});
	  foreach (@{$pi1[$i]}) {
	    my ($h, $ps1) = @$_;
	    last if $h>$j;
	    my $q0 = $dd + $h - $maxsep;
	    $q0 = $k+3 if $q0<$k+3;
#	    for my $q ($q0..$l) {
#	      next unless exists($$p2{$k,$q});
	    foreach (@{$pi2[$k]}) {
	      my ($q, $ps2) = @$_;
	      next if $q<$q0;
	      last if $q>$l;
	      last if ($q-$k)-($h-$i)>$maxsep;
	      last if ($j-$h)-($l-$q)>$maxsep;
	      my $sm = $nolog ? 2*$ps1*$ps2 :
		                  $ps1+$ps2;
	      $sm += $S[$i+1][$h-1][$k+1][$q-1];
	      if (defined($S[$h+1][$j][$q+1][$l])) {
		$sm += $S[$h+1][$j][$q+1][$l];
	      } else {
		print STDERR "[$h+1][$j][$q+1][$l] undefined i=$i k=$k\n";
		$sm += $gap * abs($j-$h-($l-$q));
	      }
	      $ss = $sm if $sm>$ss;
	    }
	  }
	  $S[$i][$j][$k][$l] = $ss if $ss>$S[$i][$j][$k][$l];
	}
      }
    }
    print STDERR '.' if -t STDOUT;
  }
  return $S[1][$n1][1][$n2];
}

sub backtrack {
  my ($i,$j, $k,$l) = @_;
  local $, = ' ';
  while (($i<=$j) && ($k<=$l)) {
    my $found=0;
    return unless defined $S[$i][$j][$k][$l];
    $S[$i+1][$j][$k][$l] = abs($j-$i-1+$k-$l)*$gap 
      unless defined $S[$i+1][$j][$k][$l];
    $S[$i+1][$j][$k+1][$l] = abs($j-$i+$k-$l)*$gap 
      unless defined $S[$i+1][$j][$k+1][$l];
    if ($S[$i][$j][$k][$l] == $S[$i+1][$j][$k][$l] + $gap) {
      $i++; next;
    }
    if ($S[$i][$j][$k][$l] == $S[$i][$j][$k+1][$l] + $gap) {
      $k++; next;
    }
    if ($S[$i][$j][$k][$l] == $S[$i+1][$j][$k+1][$l] + seq_score(substr($seq1,$i-1,1),substr($seq2,$k-1,1))){
      push @match, [$i,$k]; $i++; $k++; next;
    }
  FOUND: for my $h ($i+4..$j) {
      next unless exists($$p1{$i,$h});
      for my $q ($k+4..$l) {
	next unless exists($$p2{$k,$q});
	$S[$i+1][$h-1][$k+1][$q-1] = abs($h-$i+$k-$q)*$gap
	  unless defined $S[$i+1][$h-1][$k+1][$q-1];
	$S[$h+1][$j][$q+1][$l] = abs($j-$h+$q-$l)*$gap
	  unless defined $S[$h+1][$j][$q+1][$l];
	my $sm = $nolog? 2*$$p1{$i,$h}*$$p2{$k,$q} : $$p1{$i,$h}+$$p2{$k,$q};
	$sm += $S[$i+1][$h-1][$k+1][$q-1] + $S[$h+1][$j][$q+1][$l] +
	  ($k-$k)*$gap;
	if (abs($S[$i][$j][$k][$l] - $sm)<1e-8) {
	  push @match, [$i,$h,$k,$q];
	  backtrack($i+1,$h-1,$k+1,$q-1);
	  $i = $h+1;
	  $k = $q+1;
	  $found=1;
	  last FOUND;
	}
      }
    }
    die "backtrack failed at $i $j $k $l" if !$found;
  }
}

sub make_alignment {
  # first make a list of matched positions and sort it
  my @m = sort {$$a[0] <=>$$b[0]}
    map { if (@$_ == 2) {[$_->[0],$_->[1], '.']}
	  else { [$_->[0], $_->[2], '('], [$_->[1], $_->[3], ')']}} @match;

  my ($aseq, $astr, $bseq, $bstr) = ('','','','');
  my @prev = (0,0); # pos of previous match
  foreach my $m (@m) {
    my $la = $$m[0] - $prev[0];
    my $lb = $$m[1] - $prev[1];
    if ($la>1) {
      $aseq .= substr($seq1, $prev[0], $la-1);
      $astr .= '.' x ($la-1);
      $bseq .= '-' x ($la-1);
      $bstr .= '-' x ($la-1);
    }
    if ($lb>0) {
      $aseq .= '-' x ($lb-1);
      $astr .= '-' x ($lb-1);
      $bseq .= substr($seq2, $prev[1], $lb);
      $bstr .= '.' x ($lb-1); $bstr .= $$m[2];

    }
    $aseq .= substr($seq1,$$m[0]-1,1);
    $astr .= $$m[2];
    @prev = @$m;
  }
  my $la = length($seq1) - $prev[0];
  my $lb = length($seq2) - $prev[1];
  $aseq .= substr($seq1, $prev[0], $la);
  $bseq .= substr($seq2, $prev[1], $lb);
  $astr .= '.' x $la;
  $bstr .= '.' x $lb;
  if ($la<$lb) {
    $aseq .= '-' x ($lb-$la);
    $astr .= '-' x ($lb-$la);
  } else {
    $bseq .= '-' x ($la-$lb);
    $bstr .= '-' x ($la-$lb);
  }
  return ($aseq, $astr, $bseq, $bstr);
}

sub new_pm {
  my %pm = ();
  my $n = length($aseq);
  my ($ai, $bi)=(1,1);
  for my $i (0..$n-1) {
    my ($aj, $bj)=(1,1);
    if (substr($aseq,$i,1) eq '-') {$bi++; next;}
    if (substr($bseq,$i,1) eq '-') {$ai++; next;}
    for my $j (0..$n-1) {
      if (substr($aseq,$j,1) eq '-') {$bj++; next;}
      if (substr($bseq,$j,1) eq '-') {$aj++; next;}
      $pm{$i+1,$j+1} = sqrt($$pp1{$ai,$aj}*$$pp2{$bi,$bj})
	if exists($$p1{$ai,$aj}) && exists($$p2{$bi,$bj});
      $aj++; $bj++;
    }
    $ai++; $bi++;
  }
  return \%pm;
}

sub read_dot_plot {
  my $minread = sqrt(0.0001);
  # first get the sequence
  my $seq; 
  while (<>) {
    next unless (/\/sequence \{ \((\S*)[\\\)]/);
    $seq = $1;              # empty for new version
    while (!/\) \} def/) {  # read until end of definition
      $_ = <>;
      /(\S*)[\\\)]/;      # ends either with `)' or `\'
      $seq .= $1;
    }
    last;
  }
  # read on for pairs
  my (%sc, %pp);
  while (<>) {
    next unless (/ubox$/);
    next if /^%/;
    my($i, $j, $p, $id) = split;
    # $p *= $p;
    next unless ($p>$minread);
    $pp{$i,$j} = $p*$p;
    if (!$nolog) {
      $p = log($p*length($seq)*2)/(log(length($seq)*2));
      next unless $p>0;
    } else {
      $p *= $p;
    }
    $sc{$i,$j} = $p unless /^%/;
  }
  continue {
    last if eof;
  }
    return \%pp, \%sc, $seq;
}

sub PS_dot {
  my ($pm, $aseq, $bseq) =@_;

  my $pshead = <<EOF;
%%BoundingBox: 66 211 518 662
%%DocumentFonts: Helvetica
%%Pages: 1
%%EndComments

%Options: -noLP -d2 
%This file contains the square roots of the base pair probabilities in the form
% i  j  sqrt(p(i,j)) ubox
100 dict begin

/logscale false def

%delete next line to get rid of title
270 665 moveto /Helvetica findfont 14 scalefont setfont (DF1140) show

/lpmin {
   1e-05 log  % log(pmin) only probs>pmin will be shown
} bind def

/box { %size x y box - draws box centered on x,y
   2 index 0.5 mul add            % x += 0.5
   exch 2 index 0.5 mul add exch  % x += 0.5
   newpath
   moveto
   dup neg   0 rlineto
   dup neg   0 exch rlineto
             0 rlineto
   closepath
   fill
} bind def

/sequence { (\\
REPLACEME
) } def
/len { sequence length } bind def

/ubox {
   logscale {
      log dup add lpmin div 1 exch sub dup 0 lt { pop 0 } if
   } if
   3 1 roll
   exch len exch sub 1 add box
} bind def

/lbox {
   3 1 roll
   len exch sub 1 add box
} bind def

72 216 translate
72 6 mul len 1 add div dup scale
/Helvetica findfont 0.95 scalefont setfont

% print sequence along all 4 sides
[ [0.7 -0.3 0 ]
  [0.7 0.7 len add 0]
  [0.7 -0.2 90]
  [-0.3 len sub 0.7 len add -90]
] {
  gsave
    aload pop rotate translate
    0 1 len 1 sub {
     dup 0 moveto
     sequence exch 1 getinterval
     show
    } for
  grestore
} forall

0.5 dup translate
% draw diagonal
0.04 setlinewidth
0 len moveto len 0 lineto stroke 

%draw grid
0.01 setlinewidth
len log 0.9 sub cvi 10 exch exp  % grid spacing
dup 1 gt {
   dup dup 20 div dup 2 array astore exch 40 div setdash
} { [0.3 0.7] 0.1 setdash } ifelse
0 exch len {
   dup dup
   0 moveto
   len lineto 
   dup
   len exch sub 0 exch moveto
   len exch len exch sub lineto
   stroke
} for
0.5 neg dup translate
EOF
  open(DOT, ">aln_dp.ps") || die "can't open aln_dp.ps";
  my $version = '0.0.1';
  my $seq = '';
  my %RY = ('A' => 'R', 'G' => 'R', 'R' => 'R',
	    'U' => 'Y', 'C' => 'Y', 'Y' => 'Y', 'T'=>'Y');
  for my $p (0.. length($aseq)-1) {
    my $a = substr($aseq, $p,1);
    my $b = substr($bseq, $p,1);
    if (($a eq $b) || ($b eq '-')) {
      $seq .= $a;
    } elsif ($a eq '-') {
      $seq .= $b;
    } elsif ($RY{$a} && $RY{$b} && $RY{$a} eq $RY{$b}) {
      $seq .= $RY{$a};
    } else {
      $seq .= 'N';
    }
  }

  $pshead =~ s/REPLACEME/$seq\\/;
  print DOT '%!PS-Adobe-3.0 EPSF-3.0',"\n",
    '%%Title: RNA DotPlot', "\n";
  print DOT '%%Creator: PMcomp.pl ', "$version\n";
  print DOT '%%CreationDate:', scalar(localtime), "\n";

  print DOT $pshead;

  foreach (keys %$pm) {
    my ($i,$j) = split($;);
    printf DOT "%d %d %1.5f ubox\n", $i, $j, $$pm{$_};
  }
  my $ai=1;
  my @apos = ();
  for my $p (1.. length($aseq)) {
    next if substr($aseq,$p-1,1) eq '-';
    $apos[$ai++] = $p;
  }
  foreach (@match) {
    next if @$_ == 2;
    my ($i,$j,$k,$l) = @$_;
    printf DOT "%d %d 0.95 lbox\n", $apos[$i], $apos[$j];
  }
  print DOT "showpage\nend\n";
  print DOT '%%EOF', "\n";
}

sub prof_comp {
  my ($pm1, $seq1, $pm2, $seq2) = @_;
  my (@p1, @p2, @S);
  my ($i,$j);
  for $i (1..length($seq1)) {
    @{$p1[$i]}[1..3] = (0,0,0);
  }
  for $i (1..length($seq1)) {
    for $j ($i+1..length($seq1)) {
      next unless exists $$pm1{$i,$j};
      $p1[$i][1] += $$pm1{$i,$j};
      $p1[$j][2] += $$pm1{$i,$j};
    }
  }
  for $i (1..length($seq1)) {
    no warnings;
    $p1[$i][0] = 1 - $p1[$i][1] - $p1[$i][2];
    $p1[$i][3] = substr($seq1, $i-1,1);
  }
  for $i (1..length($seq2)) {
    @{$p2[$i]}[1..3] = (0,0,0);
  }
  for $i (1..length($seq2)) {
    for $j ($i+1..length($seq2)) {
      next unless exists $$pm2{$i,$j};
      $p2[$i][1] += $$pm2{$i,$j};
      $p2[$j][2] += $$pm2{$i,$j};
    }
  }
  for $i (1..length($seq2)) {
    no warnings;
    $p2[$i][0] = 1 - $p2[$i][1] - $p2[$i][2];
    $p2[$i][3] = substr($seq2, $i-1,1);
  }

  # Initialize
  for $i (0..length($seq1)) {
    $S[$i][0] = $endgaps ? $i * $gap : 0;
  }
  for $j (0..length($seq2)) {
    $S[0][$j] = $endgaps ? $j * $gap : 0;
  }
  for $i (1..length($seq1)) {
    for $j (1..length($seq2)) {
      my $ss = $S[$i-1][$j-1] + pscore($p1[$i], $p2[$j]);
      $ss = $S[$i][$j-1] +$gap if $S[$i][$j-1] +$gap > $ss;
      $ss = $S[$i-1][$j] +$gap if $S[$i-1][$j] +$gap > $ss;
      $S[$i][$j] = $ss;
    }
  }

  # backtrack
  my ($aseq, $bseq) = ('','');
  $i=length($seq1); $j=length($seq2);
  if ($endgaps==0) {
    my ($im, $jm) = ($i,$j);
    my $max = $S[$i][$j];
    for my $ii (0..$i) {
      ($max, $im) = ($S[$ii][$j], $ii) if $S[$ii][$j]>$max;
    }
    for my $jj (0..$j) {
      ($max, $jm) = ($S[$i][$jj], $jj) if $S[$i][$jj]>$max;
    }
    if ($S[$im][$j] == $max) {
      $aseq .= reverse substr($seq1,$im,$i-$im);
      $bseq .= '-' x ($i-$im);
      $i=$im;
    }
    else {
      $aseq .= '-' x ($j-$jm);
      $bseq .= reverse substr($seq2,$jm,$j-$jm);
      $j=$jm;
    }
  }

  while ($i>0 && $j>0) {
    my $ss =  $S[$i][$j];
    if ($ss == $S[$i-1][$j-1] + pscore($p1[$i], $p2[$j])) {
      $i--; $j--;
      $aseq .= substr($seq1, $i, 1);
      $bseq .= substr($seq2, $j, 1);
    } elsif ($ss == $S[$i][$j-1] +$gap) {
      $j--;
      $aseq .= '-';
      $bseq .= substr($seq2, $j, 1);
    } else {
      $i--;
      $aseq .= substr($seq1, $i, 1);
      $bseq .= '-';
    }
  }
  $aseq .= reverse(substr($seq1, 0, $i)) . '-' x $j;
  $bseq .= reverse(substr($seq2, 0, $j)) . '-' x $i;
  $aseq = reverse $aseq;
  $bseq = reverse $bseq;
  return $S[length($seq1)][length($seq2)], $aseq, $bseq;
}

sub pscore {
  my ($p1, $p2) = @_;
  my $s = sqrt($$p1[0]*$$p2[0])+sqrt($$p1[1]*$$p2[1])+sqrt($$p1[2]*$$p2[2]);
  $s += seq_score($$p1[3], $$p2[3]);
  return $s;
}

sub seq_score {
  return $seqw if $_[0] eq $_[1];
  return 0;
}

__END__

=head1 NAME

pmcomp.pl - Comparison/Alignment of RNA pair probability matrices

=head1 SYNOPSIS

pmcomp.pl first_dp.ps second_dp.ps

Options:

  --fast       fast/inaccurate string-like alignment of probability profiles
  --nolog      don't use logarithmic scores for pair matches
  --seqw num   set weight of sequence score to num
  -g gap       set gap penalty
  -D maxsep    consider only sub-alignments with |(j-i)-(l-k)|<maxsep
  --endgaps    gap penalty for end-gaps is 0 (only with --fast)
  --help       brief help message
  --man        full documentation

=head1 OPTIONS

=over 8

=item B<--fast>

use the fast but inaccurate string-like alignment of probability
profiles, similar to what is done in RNApdist.

=item B<--nolog>

use the product of pair probabilities p_ij*p_kl as score for pair
matches instead of the default logarithmic score
log(p_ij/p_min) + log(p_kl/p_min)

=item B<-g gap>

Set the gap penalty to gap. Affine gap costs (gap open and gap
extension) will be added at some later time.

=item B<-D maxsep>

To reduce memory and CPU requirements to O(n^3) and O(n^4),
respectively, we consider only those partial alignments of subsequence
[i..j] with [k..l] where |(j-i)-(l-k)|<maxsep. maxsep must be at
larger or equal to the difference in sequence lengths (otherwise it
will be increased automatically)

=item B<--seqw num>

Set weight of sequence score to num. The Sankoff version currently
uses a sequence score only for unpaired matches. The weight num should
be much smaller than 1.

=item B<--endgaps>

do not penalize gaps at the end of the alignment, i.e. give them a
score of 0. So far only iomplemented for the --fast version.

=back

=head1 DESCRIPTION

B<pmcomp.pl> reads two two pair probability matrices as produced by
C<RNAfold -noLP -p> and finds the common secondary structure of maximal 
weight.

The method is a variant of Sankoff's algorithm, which requires O(n^6)
CPU and O(n^4) memory. To cut down these requirements we restrict the
maximum difference in the lengths of aligned subsequences [i..j] and
[k..l] to |(j-i)-(l-k)|<maxsep. Even so the algorithm is not suitable
for sequences longer than a few hundred bases.

Matches between pairs get a score of log(p_ij/p_min)+log(p_kl/p_min)
by default, unpaired matches get a score of $seqw for identical bases,
0 else. Gap costs are linear.

=cut

