#!/Utils/bin/perl5

$pos{'.'} = 0;                            # period will be 0
$pos{' '} = 1;                            # space will be 1
for ($i = 0; $i < 26; $i++) {             # 'a' to 'z' will be 2 to 28
  $pos{chr ($i + 97)} = $i + 2; 
} 

$total = 0;
die "use: echo \"is dit engels?\" | ./probability english.tab\n" if ($#ARGV != 0);
open (I, $ARGV[0]) or die "$ARGV[0] not found\n";
while (<I>) {                                          # read trigrams from
  ($one,$two,$three,$freq) = /^(.)(.)(.)\t?(\d+)?/;    # STDIN and put them
  $freq = 1 unless defined $freq;                      # in array @table
  $table[$pos{$one}*784 + $pos{$two}*28 + $pos{$three}] += $freq;  
  $total += $freq;
}
close(I);

$_ = <STDIN>;
($one,$two,$three) = /^(.)(.)(.)/;
$posbigram = $pos{$two} * 784 + $pos{$three} * 28;
$bigram = 0;
for ($i = 0; $i < 28; $i++) {
   $bigram += $table[$posbigram + $i];
}
if ($bigram < 1) {
  $logprob = log(0.5 / $total);  
} else {
  $logprob = log($bigram / $total);
}
$length = 1;
while ($length < 50 && ($_ = <STDIN>)) {
  ($one,$two,$three) = /^(.)(.)(.)/;        
  $t = $table[$pos{$one}*784+$pos{$two}*28+$pos{$three}];
  if ($bigram < 1 or $t < 1) {
    $logprob += log(0.5 / 28);
  } else {  
    $logprob += log($t / $bigram);  
  } 
  $posbigram = $pos{$two} * 784 + $pos{$three} * 28;
  $bigram = 0;
  for ($i = 0; $i < 28; $i++) {
     $bigram += $table[$posbigram + $i];
  }
  $length++;
}

printf "Log likelihood :%9.9f\n", $logprob;
printf "Random         :%9.9f\n", -log(28) * $length;

