#Input: Unaligned query and genomic sequence in FASTA format #Output: Local alignments with mean posterior probability above 0 if(!-s "probalign"){ print "Probalign executable not found. Please make sure it is in the current directory\n"; } $f=shift; system "cp $f probalign.in"; open(IN,"probalign.in"); $i=-1; while(){ chomp $_; if(substr($_,0,1) eq ">"){ $i++; $n[$i]=$_;} else { $s[$i] .= $_; } } if($i > 1){ print "Error: only pairwise input\n"; } $total=$i+1; if(length($s[0]) > length($s[1])){$genome=$s[0]; $g=0;} else {$genome=$s[1]; $g=1;} $pp=1; $j=0; while ($pp > 0) { system "(probalign -nuc -T 9 -go 40 -ge 3 -showPP probalign.in > probalign.out) >& /dev/null"; open(IN,"probalign.out"); $l=; $l=; $ppline=$l; $i=-1; @n_=(); @s_=(); while(){ chomp $_; if(substr($_,0,1) eq ">"){ $i++; $n_[$i]=$_;} else { $s_[$i] .= $_; } } $s0=$s_[0]; $s1=$s_[1]; $n0=$n_[0]; $n1=$n_[1]; $ts0=$s0; $ts0=~ s/\-//g; $ts1=$s1; $ts1=~ s/\-//g; if(length($ts0) > length($ts1)){$g=0;} else {$g=1;} for($i=0; $i-1; $i--){ if(substr($s0,$i,1) ne "-" && substr($s1,$i,1) ne "-"){ $end=$i; $i=-1; } } $pp=0; $c=0; for($i=$start;$i<$end+1;$i++){ if(substr($s0,$i,1) ne "-" && substr($s1,$i,1) ne "-"){ $pp+=substr($ppline,$i,1); $c++; } } $pp/=$c; if($g==0){ $long=$s0; $ln=$n0; $short=$s1; $sn=$n1; } else { $long=$s1; $ln=$n1; $short=$s0; $sn=$n0; } $hit_l = substr($long,$start,$end-$start+1); $hit_s = substr($short,$start,$end-$start+1); $hit_l_ = $hit_l; $hit_l_ =~ s/\-//g; if($pp > 0){ printf "Genomic region: %d-%d, mean pp: %.2f\n", $start, $end, $pp; printf "%s\n%s\n%s\n%s\n", $sn, $hit_s, $ln, $hit_l; $long =~ s/\-//g; $short =~ s/\-//g; $hit_l =~ s/\-//g; $long =~ s/$hit_l//g; open(OUT, ">probalign.in"); printf OUT "%s\n%s\n%s\n%s\n", $sn, $short, $ln, $long; close OUT; $j++; print "\n"; } }