para

TITLE

HMM decoding with the Viterbi algorithm.


SUBTITLE

data

=begin data
{"type":"page","title":"","description":"","footer":"","file":"work_mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm","sources":[],"node":{"type":"block","id":"BMyQpOnI7rg1T-FGEStcL","margin":"","name":"root","content":[{"text":"use v6;\n\nclass Algorithm::Viterbi;\n\n#our class Start {};\n#our class End {};\n\n# TODO:\nour role Observation {};\n\nmy grammar Grammar {\n    token TOP {\n        <chunk>+\n        [ $ || <.panic: \"Syntax error\"> ]\n    }\n\n    token chunk {\n        <record>+ \\n\n    }\n\n    token record {\n        $<observation>=[\\w+] \\t $<tag>=[\\w+] \\n\n    }\n}\n\nmy class Actions {\n    method TOP($/) {\n        make $<chunk>>>.ast;\n    }\n\n    method chunk($/) {\n        make $<record>>>.ast;\n    }\n\n    method record($/) {\n        make ~$<observation> => ~$<tag>;\n    }\n}\n\nhas @!alphabet; # The HMM's alphabet\nhas %.p-transition;\nhas %.p-emission;\n\n# TODO: It might be nice to be able to do the computations both using\n# logarithms and the way it works now?\nsubmethod BUILD(:@!alphabet!, :%!p-transition, :%!p-emission) { }\n\n# TODO: Algorithm::Viterbi on CPAN also computes the Forward probability of\n# the sequence. Should be doable to compute as well.\n# TODO: An improvement might be to create a Role for observations so that\n# domain objects can be passed directly to the decoder.\n#method decode($hmm: Array of Observation @input) {\nmethod decode($hmm: @input) {\n    # We represent the trellis as a 2D list. The first dimension is the \"tick\"\n    # along the input, the second the state space. @trellis contains the\n    # accumulated probabilities, @trace the state we came from.\n    my @trellis = [];\n    my @trace = [];\n\n    # Initialise the first row of the matrix.\n    my $first = @input.shift; # Shift the first observation off the input.\n    @trellis[0][0] = 0;\n    for ^@!alphabet -> $state {\n        my $tag = @!alphabet[$state];\n        @trellis[0][$state] = %!p-transition{'Start'}{$tag}\n                            * %!p-emission{$tag}{$first};\n        @trace[0][$state] = 'Start';\n    }\n\n    # Iterate over the input, calculating probabilities as we go.\n    for @input.kv -> $index, $observation {\n        for ^@!alphabet -> $state {\n            my ($max-p, $i) = (0, 0);\n            my $tag = @!alphabet[$state];\n\n            # Do the argmax to figure out which previous state is the optimal\n            # fit for this current state.\n            for ^@!alphabet -> $prev-state {\n                my $prev-tag = @!alphabet[$prev-state];\n                my $new-p = @trellis[$index][$prev-state]\n                          * %!p-transition{$prev-tag}{$tag}\n                          * %!p-emission{$tag}{$observation};\n\n                if $new-p > $max-p {\n                    $max-p = $new-p;\n                    $i = $prev-state;\n                }\n            }\n\n            # Update the trellis and the trace.\n            @trellis[$index+1][$state] = $max-p;\n            @trace[$index+1][$state] = $i;\n        }\n    }\n\n    # Finalisation.\n    my $index = @input.end + 1;\n    my ($max-p, $i) = (0, 0);\n    # Do the argmax to find the optimal previous state before the End state.\n    for ^@!alphabet -> $prev-state {\n        my $prev-tag = @!alphabet[$prev-state];\n        my $new-p = @trellis[$index][$prev-state]\n                  * %!p-transition{$prev-tag}{'End'};\n\n        if $new-p > $max-p {\n            $max-p = $new-p;\n            $i = $prev-state;\n        }\n    }\n\n    # Compute the resulting list of tags by unshifting tags onto @result from\n    # the reversed trace.\n    my $final-tag = $i;\n    my @result;\n    for @trace.reverse -> @arr {\n        @result.unshift: @!alphabet[$final-tag];\n        $final-tag = @arr[$final-tag];\n    }\n\n    return @result;\n}\n\n# Compute unsmoothed bigram probabilities from an input file.\nmulti method train($hmm: Str $file) {\n    # XXX: It'd probably be more efficient to do the counting inline in the\n    # actions and then have a private method that normalises the counts,\n    # instead of keeping the whole corpus in memory and dispatching to the\n    # other train().\n    my $res = Grammar.parsefile($file, :actions(Actions.new));\n    $hmm.train($res.ast);\n}\n\n#multi method train($hmm: Array of Pair @input) {\nmulti method train($hmm: @input) {\n    # First, count the number of transitions between pairs of tags, and\n    # emission counts for each tag-observation pair.\n    for @input.lol -> @sequence {\n        my $prev = 'Start';\n        for @sequence -> $pair {\n            my ($observation, $tag) = ($pair.key, $pair.value);\n\n            # Increment transition count.\n            %!p-transition{$prev} //= {};\n            %!p-transition{$prev}{$tag}++;\n            # Increment emission count.\n            %!p-emission{$tag} //= {};\n            %!p-emission{$tag}{$observation}++;\n\n            $prev = $tag;\n        }\n\n        %!p-transition{$prev} //= {};\n        %!p-transition{$prev}{'End'}++;\n    }\n\n    # XXX: Development testing code\n    #say %!p-transition{'Start'}<H>; # Should be: 77\n    #say %!p-transition<C><H>; # Should be: 26\n    #say %!p-transition<C>{'End'}; # Should be: 44\n    #say %!p-emission<C><3>; # Should be: 20\n\n    # Compute the actual transition probabilities.\n    for %!p-transition.kv -> $from, %to {\n        my $sum = [+] %to.values;\n        for %to.keys -> $k {\n            %to{$k} /= $sum;\n        }\n    }\n\n    # Compute the actual emission probabilities.\n    for %!p-emission.kv -> $tag, %value {\n        my $sum = [+] %value.values;\n        for %value.keys -> $k {\n            %value{$k} /= $sum;\n        }\n    }\n}\n\n","type":"ambient","location":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":5285,"line":179,"column":1}}},{"type":"block","content":[{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"NAME\n","margin":"","content":[{"type":"text","value":"NAME\n"}],"location":{"start":{"offset":5297,"line":181,"column":1},"end":{"offset":5309,"line":182,"column":1}}}],"name":"head","location":{"start":{"offset":5297,"line":181,"column":1},"end":{"offset":5309,"line":182,"column":1}},"level":"1","id":"NAME"},{"type":"blankline"},{"text":"Algorithm::Viterbi - Decoding HMMs\n","margin":"","type":"para","content":[{"type":"text","value":"Algorithm::Viterbi - Decoding HMMs\n"}],"location":{"start":{"offset":5310,"line":183,"column":1},"end":{"offset":5345,"line":184,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"DESCRIPTION\n","margin":"","content":[{"type":"text","value":"DESCRIPTION\n"}],"location":{"start":{"offset":5346,"line":185,"column":1},"end":{"offset":5365,"line":186,"column":1}}}],"name":"head","location":{"start":{"offset":5346,"line":185,"column":1},"end":{"offset":5365,"line":186,"column":1}},"level":"1","id":"DESCRIPTION"},{"type":"blankline"},{"text":"This module is a fairly straightforward implementation of Viterbi's algorithm\nfor decoding hidden Markov models. The code is based on a Common Lisp\nimplementation I wrote as coursework, itself based on pseudo-code from\nJurafsky & Martin - Speech and language processing (2nd ed).\n","margin":"","type":"para","content":[{"type":"text","value":"This module is a fairly straightforward implementation of Viterbi's algorithm\nfor decoding hidden Markov models. The code is based on a Common Lisp\nimplementation I wrote as coursework, itself based on pseudo-code from\nJurafsky & Martin - Speech and language processing (2nd ed).\n"}],"location":{"start":{"offset":5366,"line":187,"column":1},"end":{"offset":5646,"line":191,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"SYNOPSIS\n","margin":"","content":[{"type":"text","value":"SYNOPSIS\n"}],"location":{"start":{"offset":5647,"line":192,"column":1},"end":{"offset":5663,"line":193,"column":1}}}],"name":"head","location":{"start":{"offset":5647,"line":192,"column":1},"end":{"offset":5663,"line":193,"column":1}},"level":"1","id":"SYNOPSIS"},{"type":"blankline"},{"type":"block","content":[{"type":"verbatim","value":"\n    use Algorithm::Viterbi;\n\n    my Algorithm::Viterbi $hmm .= new(:alphabet<H C>);\n    $hmm.train(\"training-data.tt\"); # Train from file\n    $hmm.train([ [a => 1, b => 2, a => 1],\n                 [b => 3, c => 1, a => 2] ]); # Train from hardcoded data\n    $hmm.decode(<a b c>);\n\n"}],"name":"code","margin":"","text":"=begin code\n\n    use Algorithm::Viterbi;\n\n    my Algorithm::Viterbi $hmm .= new(:alphabet<H C>);\n    $hmm.train(\"training-data.tt\"); # Train from file\n    $hmm.train([ [a => 1, b => 2, a => 1],\n                 [b => 3, c => 1, a => 2] ]); # Train from hardcoded data\n    $hmm.decode(<a b c>);\n\n=end code\n","config":[],"location":{"start":{"offset":5664,"line":194,"column":1},"end":{"offset":5969,"line":205,"column":1}},"id":"aR85Ah9xCYnEiC11VDLRI"},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"FIELDS\n","margin":"","content":[{"type":"text","value":"FIELDS\n"}],"location":{"start":{"offset":5970,"line":206,"column":1},"end":{"offset":5984,"line":207,"column":1}}}],"name":"head","location":{"start":{"offset":5970,"line":206,"column":1},"end":{"offset":5984,"line":207,"column":1}},"level":"1","id":"FIELDS"},{"type":"blankline"},{"text":"=over 4\n","margin":"","type":"para","content":[{"type":"text","value":"=over 4\n"}],"location":{"start":{"offset":5985,"line":208,"column":1},"end":{"offset":5993,"line":209,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"%.p-transition\n","margin":"","content":[{"type":"text","value":"%.p-transition\n"}],"location":{"start":{"offset":5994,"line":210,"column":1},"end":{"offset":6015,"line":211,"column":1}}}],"name":"item","location":{"start":{"offset":5994,"line":210,"column":1},"end":{"offset":6015,"line":211,"column":1}},"level":1,"id":"F-P6B1OJcJkJEIXH7YKi7"},{"type":"blankline"}],"list":"itemized"},{"text":"The transition probabilities. A hash of hashes, indexed by tag name.\n","margin":"","type":"para","content":[{"type":"text","value":"The transition probabilities. A hash of hashes, indexed by tag name.\n"}],"location":{"start":{"offset":6016,"line":212,"column":1},"end":{"offset":6085,"line":213,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"%.p-emission\n","margin":"","content":[{"type":"text","value":"%.p-emission\n"}],"location":{"start":{"offset":6086,"line":214,"column":1},"end":{"offset":6105,"line":215,"column":1}}}],"name":"item","location":{"start":{"offset":6086,"line":214,"column":1},"end":{"offset":6105,"line":215,"column":1}},"level":1,"id":"or-kvaOoQ2y7DpxuZEqwT"},{"type":"blankline"}],"list":"itemized"},{"text":"The emission probabilities for a given tag. A hash of hashes, indexed first by\ntag, then by observation.\n","margin":"","type":"para","content":[{"type":"text","value":"The emission probabilities for a given tag. A hash of hashes, indexed first by\ntag, then by observation.\n"}],"location":{"start":{"offset":6106,"line":216,"column":1},"end":{"offset":6211,"line":218,"column":1}}},{"type":"blankline"},{"text":"=back\n","margin":"","type":"para","content":[{"type":"text","value":"=back\n"}],"location":{"start":{"offset":6212,"line":219,"column":1},"end":{"offset":6218,"line":220,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"METHODS\n","margin":"","content":[{"type":"text","value":"METHODS\n"}],"location":{"start":{"offset":6219,"line":221,"column":1},"end":{"offset":6234,"line":222,"column":1}}}],"name":"head","location":{"start":{"offset":6219,"line":221,"column":1},"end":{"offset":6234,"line":222,"column":1}},"level":"1","id":"METHODS"},{"type":"blankline"},{"text":"=over 4\n","margin":"","type":"para","content":[{"type":"text","value":"=over 4\n"}],"location":{"start":{"offset":6235,"line":223,"column":1},"end":{"offset":6243,"line":224,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method new(:@alphabet!, :%p-transition, :%p-emission)\n","margin":"","content":[{"type":"text","value":"method new(:@alphabet!, :%p-transition, :%p-emission)\n"}],"location":{"start":{"offset":6244,"line":225,"column":1},"end":{"offset":6304,"line":226,"column":1}}}],"name":"item","location":{"start":{"offset":6244,"line":225,"column":1},"end":{"offset":6304,"line":226,"column":1}},"level":1,"id":"cacqUHvPKAAQ6O-nB-s9E"},{"type":"blankline"}],"list":"itemized"},{"text":"The alphabet parameter is required (an alphabet-less HMM doesn't make too much\nsense). The transition and emission probabilities are also required for\ncorrect operation of C<decode>, but can be specified either on construction,\nwith the C<train> method, or by manual specification via the corresponding\nfields.\n","margin":"","type":"para","content":[{"type":"text","value":"The alphabet parameter is required (an alphabet-less HMM doesn't make too much\nsense). The transition and emission probabilities are also required for\ncorrect operation of "},{"content":[{"type":"text","value":"decode"}],"type":"fcode","name":"C"},{"type":"text","value":", but can be specified either on construction,\nwith the "},{"content":[{"type":"text","value":"train"}],"type":"fcode","name":"C"},{"type":"text","value":" method, or by manual specification via the corresponding\nfields.\n"}],"location":{"start":{"offset":6305,"line":227,"column":1},"end":{"offset":6616,"line":232,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method decode(Str @input)\n","margin":"","content":[{"type":"text","value":"method decode(Str @input)\n"}],"location":{"start":{"offset":6617,"line":233,"column":1},"end":{"offset":6649,"line":234,"column":1}}}],"name":"item","location":{"start":{"offset":6617,"line":233,"column":1},"end":{"offset":6649,"line":234,"column":1}},"level":1,"id":"G2EHkiuw088ivNeyHy6Jq"},{"type":"blankline"}],"list":"itemized"},{"text":"The C<decode> method decodes the input according to the probabilities\nspecified in the C<%.p-transition> and C<%.p-emission> fields.\n","margin":"","type":"para","content":[{"type":"text","value":"The "},{"content":[{"type":"text","value":"decode"}],"type":"fcode","name":"C"},{"type":"text","value":" method decodes the input according to the probabilities\nspecified in the "},{"content":[{"type":"text","value":"%.p-transition"}],"type":"fcode","name":"C"},{"type":"text","value":" and "},{"content":[{"type":"text","value":"%.p-emission"}],"type":"fcode","name":"C"},{"type":"text","value":" fields.\n"}],"location":{"start":{"offset":6650,"line":235,"column":1},"end":{"offset":6783,"line":237,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method train(Str $file)\n","margin":"","content":[{"type":"text","value":"method train(Str $file)\n"}],"location":{"start":{"offset":6784,"line":238,"column":1},"end":{"offset":6814,"line":239,"column":1}}}],"name":"item","location":{"start":{"offset":6784,"line":238,"column":1},"end":{"offset":6814,"line":239,"column":1}},"level":1,"id":"jxcjIg3JK8dvulP5sULtJ"},{"type":"blankline"}],"list":"itemized"},{"text":"Computes unsmoothed bigram probabilities from an input file. The input format\nis described by this grammar:\n","margin":"","type":"para","content":[{"type":"text","value":"Computes unsmoothed bigram probabilities from an input file. The input format\nis described by this grammar:\n"}],"location":{"start":{"offset":6815,"line":240,"column":1},"end":{"offset":6923,"line":242,"column":1}}},{"type":"blankline"},{"type":"block","content":[{"type":"verbatim","value":"\n    grammar G {\n        token TOP { <chunk>+ }\n        token chunk { <record>+ \\n }\n        token record { \\w+ \\t \\w+ \\n }\n    }\n\n"}],"name":"code","margin":"","text":"=begin code\n\n    grammar G {\n        token TOP { <chunk>+ }\n        token chunk { <record>+ \\n }\n        token record { \\w+ \\t \\w+ \\n }\n    }\n\n=end code\n","config":[],"location":{"start":{"offset":6924,"line":243,"column":1},"end":{"offset":7077,"line":252,"column":1}},"id":"w7JPK0823zzKhgQNuSgUz"},{"type":"blankline"},{"text":"The records are observation, then the associated tag.\n","margin":"","type":"para","content":[{"type":"text","value":"The records are observation, then the associated tag.\n"}],"location":{"start":{"offset":7078,"line":253,"column":1},"end":{"offset":7132,"line":254,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method train(Array of Pair @data)\n","margin":"","content":[{"type":"text","value":"method train(Array of Pair @data)\n"}],"location":{"start":{"offset":7133,"line":255,"column":1},"end":{"offset":7173,"line":256,"column":1}}}],"name":"item","location":{"start":{"offset":7133,"line":255,"column":1},"end":{"offset":7173,"line":256,"column":1}},"level":1,"id":"Pd0jZ90qdsx857Endmvou"},{"type":"blankline"}],"list":"itemized"},{"text":"Computes unsmoothed bigram probabilities from an Array of Array of Pairs.\nEach pair is a single observation-tag pair, and each element of the top-level\narray is a sequence that is learnt.\n","margin":"","type":"para","content":[{"type":"text","value":"Computes unsmoothed bigram probabilities from an Array of Array of Pairs.\nEach pair is a single observation-tag pair, and each element of the top-level\narray is a sequence that is learnt.\n"}],"location":{"start":{"offset":7174,"line":257,"column":1},"end":{"offset":7362,"line":260,"column":1}}},{"type":"blankline"},{"text":"=back\n","margin":"","type":"para","content":[{"type":"text","value":"=back\n"}],"location":{"start":{"offset":7363,"line":261,"column":1},"end":{"offset":7369,"line":262,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"AUTHOR\n","margin":"","content":[{"type":"text","value":"AUTHOR\n"}],"location":{"start":{"offset":7370,"line":263,"column":1},"end":{"offset":7384,"line":264,"column":1}}}],"name":"head","location":{"start":{"offset":7370,"line":263,"column":1},"end":{"offset":7384,"line":264,"column":1}},"level":"1","id":"AUTHOR"},{"type":"blankline"},{"text":"Arne Skjærholt - L<mailto:arnsholt@gmail.com>.\n","margin":"","type":"para","content":[{"type":"text","value":"Arne Skjærholt - "},{"content":[{"type":"text","value":"mailto:arnsholt@gmail.com"}],"type":"fcode","name":"L","meta":null},{"type":"text","value":".\n"}],"location":{"start":{"offset":7385,"line":265,"column":1},"end":{"offset":7432,"line":266,"column":1}}},{"type":"blankline"}],"name":"pod","margin":"","config":[],"location":{"start":{"offset":5285,"line":179,"column":1},"end":{"offset":7442,"line":268,"column":1}},"id":"Rb-5pwhPX-cy0R28iDYiN"}]},"publishUrl":"/mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm","pluginsData":{"moduleInfo":{"meta":{"name":"Algorithm::Viterbi","description":"HMM decoding with the Viterbi algorithm.","version":"*","depends":[],"source-url":"git://github.com/arnsholt/Algorithm-Viterbi.git","source-type":"git"},"files":[{"file":"work_mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm","publishUrl":"/mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm"}],"src":"all","url":"/mods/all/Algorithm::Viterbi"}}}
=end data


React

    =begin React :component<RenderItem>
    =begin data
    {"type":"page","title":"","description":"","footer":"","file":"work_mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm","sources":[],"node":{"type":"block","id":"BMyQpOnI7rg1T-FGEStcL","margin":"","name":"root","content":[{"text":"use v6;\n\nclass Algorithm::Viterbi;\n\n#our class Start {};\n#our class End {};\n\n# TODO:\nour role Observation {};\n\nmy grammar Grammar {\n    token TOP {\n        <chunk>+\n        [ $ || <.panic: \"Syntax error\"> ]\n    }\n\n    token chunk {\n        <record>+ \\n\n    }\n\n    token record {\n        $<observation>=[\\w+] \\t $<tag>=[\\w+] \\n\n    }\n}\n\nmy class Actions {\n    method TOP($/) {\n        make $<chunk>>>.ast;\n    }\n\n    method chunk($/) {\n        make $<record>>>.ast;\n    }\n\n    method record($/) {\n        make ~$<observation> => ~$<tag>;\n    }\n}\n\nhas @!alphabet; # The HMM's alphabet\nhas %.p-transition;\nhas %.p-emission;\n\n# TODO: It might be nice to be able to do the computations both using\n# logarithms and the way it works now?\nsubmethod BUILD(:@!alphabet!, :%!p-transition, :%!p-emission) { }\n\n# TODO: Algorithm::Viterbi on CPAN also computes the Forward probability of\n# the sequence. Should be doable to compute as well.\n# TODO: An improvement might be to create a Role for observations so that\n# domain objects can be passed directly to the decoder.\n#method decode($hmm: Array of Observation @input) {\nmethod decode($hmm: @input) {\n    # We represent the trellis as a 2D list. The first dimension is the \"tick\"\n    # along the input, the second the state space. @trellis contains the\n    # accumulated probabilities, @trace the state we came from.\n    my @trellis = [];\n    my @trace = [];\n\n    # Initialise the first row of the matrix.\n    my $first = @input.shift; # Shift the first observation off the input.\n    @trellis[0][0] = 0;\n    for ^@!alphabet -> $state {\n        my $tag = @!alphabet[$state];\n        @trellis[0][$state] = %!p-transition{'Start'}{$tag}\n                            * %!p-emission{$tag}{$first};\n        @trace[0][$state] = 'Start';\n    }\n\n    # Iterate over the input, calculating probabilities as we go.\n    for @input.kv -> $index, $observation {\n        for ^@!alphabet -> $state {\n            my ($max-p, $i) = (0, 0);\n            my $tag = @!alphabet[$state];\n\n            # Do the argmax to figure out which previous state is the optimal\n            # fit for this current state.\n            for ^@!alphabet -> $prev-state {\n                my $prev-tag = @!alphabet[$prev-state];\n                my $new-p = @trellis[$index][$prev-state]\n                          * %!p-transition{$prev-tag}{$tag}\n                          * %!p-emission{$tag}{$observation};\n\n                if $new-p > $max-p {\n                    $max-p = $new-p;\n                    $i = $prev-state;\n                }\n            }\n\n            # Update the trellis and the trace.\n            @trellis[$index+1][$state] = $max-p;\n            @trace[$index+1][$state] = $i;\n        }\n    }\n\n    # Finalisation.\n    my $index = @input.end + 1;\n    my ($max-p, $i) = (0, 0);\n    # Do the argmax to find the optimal previous state before the End state.\n    for ^@!alphabet -> $prev-state {\n        my $prev-tag = @!alphabet[$prev-state];\n        my $new-p = @trellis[$index][$prev-state]\n                  * %!p-transition{$prev-tag}{'End'};\n\n        if $new-p > $max-p {\n            $max-p = $new-p;\n            $i = $prev-state;\n        }\n    }\n\n    # Compute the resulting list of tags by unshifting tags onto @result from\n    # the reversed trace.\n    my $final-tag = $i;\n    my @result;\n    for @trace.reverse -> @arr {\n        @result.unshift: @!alphabet[$final-tag];\n        $final-tag = @arr[$final-tag];\n    }\n\n    return @result;\n}\n\n# Compute unsmoothed bigram probabilities from an input file.\nmulti method train($hmm: Str $file) {\n    # XXX: It'd probably be more efficient to do the counting inline in the\n    # actions and then have a private method that normalises the counts,\n    # instead of keeping the whole corpus in memory and dispatching to the\n    # other train().\n    my $res = Grammar.parsefile($file, :actions(Actions.new));\n    $hmm.train($res.ast);\n}\n\n#multi method train($hmm: Array of Pair @input) {\nmulti method train($hmm: @input) {\n    # First, count the number of transitions between pairs of tags, and\n    # emission counts for each tag-observation pair.\n    for @input.lol -> @sequence {\n        my $prev = 'Start';\n        for @sequence -> $pair {\n            my ($observation, $tag) = ($pair.key, $pair.value);\n\n            # Increment transition count.\n            %!p-transition{$prev} //= {};\n            %!p-transition{$prev}{$tag}++;\n            # Increment emission count.\n            %!p-emission{$tag} //= {};\n            %!p-emission{$tag}{$observation}++;\n\n            $prev = $tag;\n        }\n\n        %!p-transition{$prev} //= {};\n        %!p-transition{$prev}{'End'}++;\n    }\n\n    # XXX: Development testing code\n    #say %!p-transition{'Start'}<H>; # Should be: 77\n    #say %!p-transition<C><H>; # Should be: 26\n    #say %!p-transition<C>{'End'}; # Should be: 44\n    #say %!p-emission<C><3>; # Should be: 20\n\n    # Compute the actual transition probabilities.\n    for %!p-transition.kv -> $from, %to {\n        my $sum = [+] %to.values;\n        for %to.keys -> $k {\n            %to{$k} /= $sum;\n        }\n    }\n\n    # Compute the actual emission probabilities.\n    for %!p-emission.kv -> $tag, %value {\n        my $sum = [+] %value.values;\n        for %value.keys -> $k {\n            %value{$k} /= $sum;\n        }\n    }\n}\n\n","type":"ambient","location":{"start":{"offset":0,"line":1,"column":1},"end":{"offset":5285,"line":179,"column":1}}},{"type":"block","content":[{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"NAME\n","margin":"","content":[{"type":"text","value":"NAME\n"}],"location":{"start":{"offset":5297,"line":181,"column":1},"end":{"offset":5309,"line":182,"column":1}}}],"name":"head","location":{"start":{"offset":5297,"line":181,"column":1},"end":{"offset":5309,"line":182,"column":1}},"level":"1","id":"NAME"},{"type":"blankline"},{"text":"Algorithm::Viterbi - Decoding HMMs\n","margin":"","type":"para","content":[{"type":"text","value":"Algorithm::Viterbi - Decoding HMMs\n"}],"location":{"start":{"offset":5310,"line":183,"column":1},"end":{"offset":5345,"line":184,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"DESCRIPTION\n","margin":"","content":[{"type":"text","value":"DESCRIPTION\n"}],"location":{"start":{"offset":5346,"line":185,"column":1},"end":{"offset":5365,"line":186,"column":1}}}],"name":"head","location":{"start":{"offset":5346,"line":185,"column":1},"end":{"offset":5365,"line":186,"column":1}},"level":"1","id":"DESCRIPTION"},{"type":"blankline"},{"text":"This module is a fairly straightforward implementation of Viterbi's algorithm\nfor decoding hidden Markov models. The code is based on a Common Lisp\nimplementation I wrote as coursework, itself based on pseudo-code from\nJurafsky & Martin - Speech and language processing (2nd ed).\n","margin":"","type":"para","content":[{"type":"text","value":"This module is a fairly straightforward implementation of Viterbi's algorithm\nfor decoding hidden Markov models. The code is based on a Common Lisp\nimplementation I wrote as coursework, itself based on pseudo-code from\nJurafsky & Martin - Speech and language processing (2nd ed).\n"}],"location":{"start":{"offset":5366,"line":187,"column":1},"end":{"offset":5646,"line":191,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"SYNOPSIS\n","margin":"","content":[{"type":"text","value":"SYNOPSIS\n"}],"location":{"start":{"offset":5647,"line":192,"column":1},"end":{"offset":5663,"line":193,"column":1}}}],"name":"head","location":{"start":{"offset":5647,"line":192,"column":1},"end":{"offset":5663,"line":193,"column":1}},"level":"1","id":"SYNOPSIS"},{"type":"blankline"},{"type":"block","content":[{"type":"verbatim","value":"\n    use Algorithm::Viterbi;\n\n    my Algorithm::Viterbi $hmm .= new(:alphabet<H C>);\n    $hmm.train(\"training-data.tt\"); # Train from file\n    $hmm.train([ [a => 1, b => 2, a => 1],\n                 [b => 3, c => 1, a => 2] ]); # Train from hardcoded data\n    $hmm.decode(<a b c>);\n\n"}],"name":"code","margin":"","text":"=begin code\n\n    use Algorithm::Viterbi;\n\n    my Algorithm::Viterbi $hmm .= new(:alphabet<H C>);\n    $hmm.train(\"training-data.tt\"); # Train from file\n    $hmm.train([ [a => 1, b => 2, a => 1],\n                 [b => 3, c => 1, a => 2] ]); # Train from hardcoded data\n    $hmm.decode(<a b c>);\n\n=end code\n","config":[],"location":{"start":{"offset":5664,"line":194,"column":1},"end":{"offset":5969,"line":205,"column":1}},"id":"aR85Ah9xCYnEiC11VDLRI"},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"FIELDS\n","margin":"","content":[{"type":"text","value":"FIELDS\n"}],"location":{"start":{"offset":5970,"line":206,"column":1},"end":{"offset":5984,"line":207,"column":1}}}],"name":"head","location":{"start":{"offset":5970,"line":206,"column":1},"end":{"offset":5984,"line":207,"column":1}},"level":"1","id":"FIELDS"},{"type":"blankline"},{"text":"=over 4\n","margin":"","type":"para","content":[{"type":"text","value":"=over 4\n"}],"location":{"start":{"offset":5985,"line":208,"column":1},"end":{"offset":5993,"line":209,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"%.p-transition\n","margin":"","content":[{"type":"text","value":"%.p-transition\n"}],"location":{"start":{"offset":5994,"line":210,"column":1},"end":{"offset":6015,"line":211,"column":1}}}],"name":"item","location":{"start":{"offset":5994,"line":210,"column":1},"end":{"offset":6015,"line":211,"column":1}},"level":1,"id":"F-P6B1OJcJkJEIXH7YKi7"},{"type":"blankline"}],"list":"itemized"},{"text":"The transition probabilities. A hash of hashes, indexed by tag name.\n","margin":"","type":"para","content":[{"type":"text","value":"The transition probabilities. A hash of hashes, indexed by tag name.\n"}],"location":{"start":{"offset":6016,"line":212,"column":1},"end":{"offset":6085,"line":213,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"%.p-emission\n","margin":"","content":[{"type":"text","value":"%.p-emission\n"}],"location":{"start":{"offset":6086,"line":214,"column":1},"end":{"offset":6105,"line":215,"column":1}}}],"name":"item","location":{"start":{"offset":6086,"line":214,"column":1},"end":{"offset":6105,"line":215,"column":1}},"level":1,"id":"or-kvaOoQ2y7DpxuZEqwT"},{"type":"blankline"}],"list":"itemized"},{"text":"The emission probabilities for a given tag. A hash of hashes, indexed first by\ntag, then by observation.\n","margin":"","type":"para","content":[{"type":"text","value":"The emission probabilities for a given tag. A hash of hashes, indexed first by\ntag, then by observation.\n"}],"location":{"start":{"offset":6106,"line":216,"column":1},"end":{"offset":6211,"line":218,"column":1}}},{"type":"blankline"},{"text":"=back\n","margin":"","type":"para","content":[{"type":"text","value":"=back\n"}],"location":{"start":{"offset":6212,"line":219,"column":1},"end":{"offset":6218,"line":220,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"METHODS\n","margin":"","content":[{"type":"text","value":"METHODS\n"}],"location":{"start":{"offset":6219,"line":221,"column":1},"end":{"offset":6234,"line":222,"column":1}}}],"name":"head","location":{"start":{"offset":6219,"line":221,"column":1},"end":{"offset":6234,"line":222,"column":1}},"level":"1","id":"METHODS"},{"type":"blankline"},{"text":"=over 4\n","margin":"","type":"para","content":[{"type":"text","value":"=over 4\n"}],"location":{"start":{"offset":6235,"line":223,"column":1},"end":{"offset":6243,"line":224,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method new(:@alphabet!, :%p-transition, :%p-emission)\n","margin":"","content":[{"type":"text","value":"method new(:@alphabet!, :%p-transition, :%p-emission)\n"}],"location":{"start":{"offset":6244,"line":225,"column":1},"end":{"offset":6304,"line":226,"column":1}}}],"name":"item","location":{"start":{"offset":6244,"line":225,"column":1},"end":{"offset":6304,"line":226,"column":1}},"level":1,"id":"cacqUHvPKAAQ6O-nB-s9E"},{"type":"blankline"}],"list":"itemized"},{"text":"The alphabet parameter is required (an alphabet-less HMM doesn't make too much\nsense). The transition and emission probabilities are also required for\ncorrect operation of C<decode>, but can be specified either on construction,\nwith the C<train> method, or by manual specification via the corresponding\nfields.\n","margin":"","type":"para","content":[{"type":"text","value":"The alphabet parameter is required (an alphabet-less HMM doesn't make too much\nsense). The transition and emission probabilities are also required for\ncorrect operation of "},{"content":[{"type":"text","value":"decode"}],"type":"fcode","name":"C"},{"type":"text","value":", but can be specified either on construction,\nwith the "},{"content":[{"type":"text","value":"train"}],"type":"fcode","name":"C"},{"type":"text","value":" method, or by manual specification via the corresponding\nfields.\n"}],"location":{"start":{"offset":6305,"line":227,"column":1},"end":{"offset":6616,"line":232,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method decode(Str @input)\n","margin":"","content":[{"type":"text","value":"method decode(Str @input)\n"}],"location":{"start":{"offset":6617,"line":233,"column":1},"end":{"offset":6649,"line":234,"column":1}}}],"name":"item","location":{"start":{"offset":6617,"line":233,"column":1},"end":{"offset":6649,"line":234,"column":1}},"level":1,"id":"G2EHkiuw088ivNeyHy6Jq"},{"type":"blankline"}],"list":"itemized"},{"text":"The C<decode> method decodes the input according to the probabilities\nspecified in the C<%.p-transition> and C<%.p-emission> fields.\n","margin":"","type":"para","content":[{"type":"text","value":"The "},{"content":[{"type":"text","value":"decode"}],"type":"fcode","name":"C"},{"type":"text","value":" method decodes the input according to the probabilities\nspecified in the "},{"content":[{"type":"text","value":"%.p-transition"}],"type":"fcode","name":"C"},{"type":"text","value":" and "},{"content":[{"type":"text","value":"%.p-emission"}],"type":"fcode","name":"C"},{"type":"text","value":" fields.\n"}],"location":{"start":{"offset":6650,"line":235,"column":1},"end":{"offset":6783,"line":237,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method train(Str $file)\n","margin":"","content":[{"type":"text","value":"method train(Str $file)\n"}],"location":{"start":{"offset":6784,"line":238,"column":1},"end":{"offset":6814,"line":239,"column":1}}}],"name":"item","location":{"start":{"offset":6784,"line":238,"column":1},"end":{"offset":6814,"line":239,"column":1}},"level":1,"id":"jxcjIg3JK8dvulP5sULtJ"},{"type":"blankline"}],"list":"itemized"},{"text":"Computes unsmoothed bigram probabilities from an input file. The input format\nis described by this grammar:\n","margin":"","type":"para","content":[{"type":"text","value":"Computes unsmoothed bigram probabilities from an input file. The input format\nis described by this grammar:\n"}],"location":{"start":{"offset":6815,"line":240,"column":1},"end":{"offset":6923,"line":242,"column":1}}},{"type":"blankline"},{"type":"block","content":[{"type":"verbatim","value":"\n    grammar G {\n        token TOP { <chunk>+ }\n        token chunk { <record>+ \\n }\n        token record { \\w+ \\t \\w+ \\n }\n    }\n\n"}],"name":"code","margin":"","text":"=begin code\n\n    grammar G {\n        token TOP { <chunk>+ }\n        token chunk { <record>+ \\n }\n        token record { \\w+ \\t \\w+ \\n }\n    }\n\n=end code\n","config":[],"location":{"start":{"offset":6924,"line":243,"column":1},"end":{"offset":7077,"line":252,"column":1}},"id":"w7JPK0823zzKhgQNuSgUz"},{"type":"blankline"},{"text":"The records are observation, then the associated tag.\n","margin":"","type":"para","content":[{"type":"text","value":"The records are observation, then the associated tag.\n"}],"location":{"start":{"offset":7078,"line":253,"column":1},"end":{"offset":7132,"line":254,"column":1}}},{"type":"blankline"},{"type":"list","level":1,"content":[{"margin":"","type":"block","content":[{"type":"para","text":"method train(Array of Pair @data)\n","margin":"","content":[{"type":"text","value":"method train(Array of Pair @data)\n"}],"location":{"start":{"offset":7133,"line":255,"column":1},"end":{"offset":7173,"line":256,"column":1}}}],"name":"item","location":{"start":{"offset":7133,"line":255,"column":1},"end":{"offset":7173,"line":256,"column":1}},"level":1,"id":"Pd0jZ90qdsx857Endmvou"},{"type":"blankline"}],"list":"itemized"},{"text":"Computes unsmoothed bigram probabilities from an Array of Array of Pairs.\nEach pair is a single observation-tag pair, and each element of the top-level\narray is a sequence that is learnt.\n","margin":"","type":"para","content":[{"type":"text","value":"Computes unsmoothed bigram probabilities from an Array of Array of Pairs.\nEach pair is a single observation-tag pair, and each element of the top-level\narray is a sequence that is learnt.\n"}],"location":{"start":{"offset":7174,"line":257,"column":1},"end":{"offset":7362,"line":260,"column":1}}},{"type":"blankline"},{"text":"=back\n","margin":"","type":"para","content":[{"type":"text","value":"=back\n"}],"location":{"start":{"offset":7363,"line":261,"column":1},"end":{"offset":7369,"line":262,"column":1}}},{"type":"blankline"},{"margin":"","type":"block","content":[{"type":"para","text":"AUTHOR\n","margin":"","content":[{"type":"text","value":"AUTHOR\n"}],"location":{"start":{"offset":7370,"line":263,"column":1},"end":{"offset":7384,"line":264,"column":1}}}],"name":"head","location":{"start":{"offset":7370,"line":263,"column":1},"end":{"offset":7384,"line":264,"column":1}},"level":"1","id":"AUTHOR"},{"type":"blankline"},{"text":"Arne Skjærholt - L<mailto:arnsholt@gmail.com>.\n","margin":"","type":"para","content":[{"type":"text","value":"Arne Skjærholt - "},{"content":[{"type":"text","value":"mailto:arnsholt@gmail.com"}],"type":"fcode","name":"L","meta":null},{"type":"text","value":".\n"}],"location":{"start":{"offset":7385,"line":265,"column":1},"end":{"offset":7432,"line":266,"column":1}}},{"type":"blankline"}],"name":"pod","margin":"","config":[],"location":{"start":{"offset":5285,"line":179,"column":1},"end":{"offset":7442,"line":268,"column":1}},"id":"Rb-5pwhPX-cy0R28iDYiN"}]},"publishUrl":"/mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm","pluginsData":{"moduleInfo":{"meta":{"name":"Algorithm::Viterbi","description":"HMM decoding with the Viterbi algorithm.","version":"*","depends":[],"source-url":"git://github.com/arnsholt/Algorithm-Viterbi.git","source-type":"git"},"files":[{"file":"work_mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm","publishUrl":"/mods/all/Algorithm::Viterbi/lib/Algorithm/Viterbi.pm"}],"src":"all","url":"/mods/all/Algorithm::Viterbi"}}}
    =end data
    =end React


root

Small experimental language with a license to macro


Alma


a character keyed trie using the datrie library.


Algorithm::Trie::libdatrie


=begin React :component<HeaderCol> :id<menu> 
=para L<Documentation|/doc/introduction>
=para L<Modules| /mods>
=para L<Examples| file:../examples/index.podlite>
=para L<Download |file:../download/index.podlite>
=para L<About|file:../about/index.podlite>
=para 🔍 K<⌘K>/K<ctrl-K>
=end React


=begin React :component<HeaderCol> :id<footer>  
=begin nested :!nested
=item1 B<Language>
    =item2 L<Get started|file:../getting-started/getting-started.podlite>
    =item2 L<Why Raku?|/doc/language/faq#Why-should-I-learn-Raku-What's-so-great-about-it>
    =item2 L<Try Raku|https://glot.io/new/raku>
    =item2 L<Raku cheat sheet|https://github.com/Raku/mu/blob/master/docs/Perl6/Cheatsheet/cheatsheet.txt>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<Raku on Exercism|https://exercism.org/tracks/raku>
    =item2 L<Wikipedia|https://en.wikipedia.org/wiki/Raku_(programming_language)>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=end nested
=begin nested :!nested

=item1 B<L<Documentation|/doc>>
    =item2 L<Getting started, Migration guides from other languages, & Tutorials | file:../doc/introduction.podlite>
    =item2 L<Language References|file:../doc/reference.podlite>
    =item2 L<Type Reference| file:../doc/types.podlite>
    =item2 L<Miscellaneous| file:../doc/miscellaneous.podlite>
    =item2 L<FAQs (Frequently Asked Questions)|/doc/language/faq>
    =item2 L<Community|/doc/language/community>
    =item2 L<The list of all documents|file:../doc/index.podlite>

=item1 B<Resources>
    =item2 L<The Raku Guide|https://raku.guide/>
    =item2 L<Books |https://perl6book.com/>
    =item2 L<Rosetta Code | https://www.raku.org/community/rosettacode>

=end nested
=begin nested :!nested

=item1 B<Resoures>
    =item2 L<Download | file:../download/index.podlite>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=item1 B<Learning>
    =item2 L<The Raku Guide|https://raku.guide/>
    =item2 L<Wikibook |https://en.wikibooks.org/wiki/Raku_Programming>
    =item2 L<Books |https://perl6book.com/>
    =item2 L<Rosetta Code | https://www.raku.org/community/rosettacode>
    =item2 L<Learn Raku in Y minutes|https://learnxinyminutes.com/docs/raku/>
    =item2 L<Golfing |https://github.com/AlexDaniel/raku-golf-cheatsheet>
=end nested
=begin nested :!nested

=item1 B<Explore>
    =item2 L<Raku Blog Aggregator|https://planet.raku.org/>
    =item2 L<Rakudo Weekly|https://rakudoweekly.blog/>
    =item2 L<The Weekly Challenge |https://perlweeklychallenge.org/>
    =item2 L<Raku Advent Calendar|https://raku-advent.blog/>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=end nested
=end React


=begin React :component<CookieConsent> :id<CookieConsent> :buttonCaption("Got it!")
=para
This website uses cookies for analytics.
=end React