use strict; package Features::Cooccurence; use Text::NSP::Measures::2D::MI::pmi; sub new { my $class = shift; my $adapter = shift; my $self = { adapter => $adapter, scores => {}, }; bless $self, $class; return $self; } sub Calc { my $self = shift; my $adapter = $self->{adapter}; $adapter->Reset(); #print STDERR "Precalculating all word cooccurrences\n"; my $src_words = {}; my $trg_words = {}; my $coocs = {}; my $N; while($adapter->HasNext()) { my @src_tokens = split(/\s+/, $adapter->CurrentSource()); my @trg_tokens = split(/\s+/, $adapter->CurrentTarget()); my $k = $adapter->LineNo(); foreach my $s (@src_tokens) { $src_words->{$s} += @trg_tokens; foreach my $t (@trg_tokens) { $coocs->{$s}->{$t}++; $N++; } } foreach my $t (@trg_tokens) { $trg_words->{$t} += @src_tokens; } } foreach my $s (keys %$coocs) { foreach my $t (keys %{$coocs->{$s}}) { my $n11 = $coocs->{$s}->{$t}; my $n1p = $src_words->{$s}; my $np1 = $trg_words->{$t}; my $npp = $N; my $key = "${s}-${t}"; $self->{scores}->{$key} = calculateStatistic( n11 => $n11, n1p => $n1p, np1 => $np1, npp => $npp ); #print STDERR "$key : $self->{scores}->{$key}\n"; } } } sub Score { my $self = shift; my ($k, $i, $j, $s, $t) = @_; my $key = "${s}-${t}"; return $self->{scores}->{$key}; } 1;