para

TITLE

Lemmatize strings and lists using a provided English dataset or by providing your own.


SUBTITLE

data

=begin data
{"type":"page","file":"work_mods/zef/lemmatize/dist/README.md","sources":[],"node":{"type":"block","id":"V6GAA3kCQbAwoDijGvXXt","margin":"","name":"root","content":[{"type":"block","id":"YcPXCgORkoFrcJ7GWn2kL","margin":"","level":1,"name":"head","location":{"start":{"line":1,"column":1,"offset":0},"end":{"line":1,"column":17,"offset":16}},"content":["Raku Lemmatize"]},{"type":"para","id":"gfJ27HEMq6UzZM38eWRdQ","margin":"","location":{"start":{"line":3,"column":1,"offset":18},"end":{"line":3,"column":46,"offset":63}},"text":"text","content":["A Raku module to lemmatize strings and lists."]},{"type":"block","id":"a-Fja7ukZ64pk6F287xoH","margin":"","level":2,"name":"head","location":{"start":{"line":5,"column":1,"offset":65},"end":{"line":5,"column":16,"offset":80}},"content":["Installation"]},{"type":"para","id":"wlPb_QVE0qAj1rF3AjvmA","margin":"","location":{"start":{"line":7,"column":1,"offset":82},"end":{"line":7,"column":19,"offset":100}},"text":"text","content":["Install using zef:"]},{"type":"block","id":"TsOTVN1XlnGglKjiquOWD","margin":"","name":"code","config":[],"location":{"start":{"line":9,"column":1,"offset":102},"end":{"line":11,"column":4,"offset":131}},"content":[{"type":"verbatim","value":"zef install lemmatize"}]},{"type":"para","id":"R-0moAmdnZRXBWkkX64c7","margin":"","location":{"start":{"line":13,"column":1,"offset":133},"end":{"line":13,"column":36,"offset":168}},"text":"text","content":["Or simply download the GitHub repo."]},{"type":"block","id":"OyvT3aGHeQEGWiIJYOIVb","margin":"","level":2,"name":"head","location":{"start":{"line":15,"column":1,"offset":170},"end":{"line":15,"column":9,"offset":178}},"content":["Usage"]},{"type":"para","id":"eew5Ez4pI9z9b6cghSmm7","margin":"","location":{"start":{"line":17,"column":1,"offset":180},"end":{"line":18,"column":27,"offset":454}},"text":"text","content":["The package uses a .csv containing predefined English lemmas in a two column format, with the lemma on the left and its derivatives on the right. Any similarly formatted .csv can be used to run the code, allowing for easy use of custom lemma lists\nand non-English languages."]},{"type":"para","id":"qBOCo_KyhOanMu0hbhR54","margin":"","location":{"start":{"line":20,"column":1,"offset":456},"end":{"line":20,"column":46,"offset":501}},"text":"text","content":["The following four subroutines can be called:"]},{"type":"para","id":"c9y37aQVFE_Bn5EtO-s48","margin":"","location":{"start":{"line":22,"column":1,"offset":503},"end":{"line":22,"column":83,"offset":585}},"text":"text","content":["To construct your hash table of lemma pairs; this must be done before lemmatizing:"]},{"type":"block","id":"1lEBWD7VmO8fPwaHb4RFa","margin":"","name":"code","config":[],"location":{"start":{"line":24,"column":1,"offset":587},"end":{"line":27,"column":4,"offset":693}},"content":[{"type":"verbatim","value":"construct_hash('resources/lemmas.csv');\n# or substitute your own filename in place of 'lemmas.csv'"}]},{"type":"para","id":"UnrXXt4tKThi0EmgnhO9l","margin":"","location":{"start":{"line":29,"column":1,"offset":695},"end":{"line":29,"column":88,"offset":782}},"text":"text","content":["To lemmatize a string (which also converts every character in the string to lowercase):"]},{"type":"block","id":"QdEPPpKtSTFk5T8c-tR0W","margin":"","name":"code","config":[],"location":{"start":{"line":31,"column":1,"offset":784},"end":{"line":33,"column":4,"offset":823}},"content":[{"type":"verbatim","value":"lemmatize_string($your_string);"}]},{"type":"para","id":"VQq8mMj2Mlgj0MHCaWDi9","margin":"","location":{"start":{"line":35,"column":1,"offset":825},"end":{"line":35,"column":92,"offset":916}},"text":"text","content":["To lemmatize an array of words (which also converts every string in the list to lowercase):"]},{"type":"block","id":"Ny7IpWaH48G8lJd_6EqxO","margin":"","name":"code","config":[],"location":{"start":{"line":37,"column":1,"offset":918},"end":{"line":39,"column":4,"offset":955}},"content":[{"type":"verbatim","value":"lemmatize_array(@your_array);"}]},{"type":"para","id":"TY8TGescEm_HZK_EK9k26","margin":"","location":{"start":{"line":41,"column":1,"offset":957},"end":{"line":41,"column":56,"offset":1012}},"text":"text","content":["To convert a string to an array of its component words:"]},{"type":"block","id":"r9VVhbWzkz8AIAlxa3Cww","margin":"","name":"code","config":[],"location":{"start":{"line":43,"column":1,"offset":1014},"end":{"line":45,"column":4,"offset":1051}},"content":[{"type":"verbatim","value":"words_to_array($your_string);"}]},{"type":"block","id":"Uw5_XDmKvklouS8DKxCjo","margin":"","level":2,"name":"head","location":{"start":{"line":47,"column":1,"offset":1053},"end":{"line":47,"column":29,"offset":1081}},"content":["Lemma List and Formatting"]},{"type":"block","id":"CVanq0thzJMbjWnY3DxAo","margin":"","level":3,"name":"head","location":{"start":{"line":49,"column":1,"offset":1083},"end":{"line":49,"column":11,"offset":1093}},"content":["Source"]},{"type":"para","id":"_RaaP23BIOd1kFKUxG8yD","margin":"","location":{"start":{"line":51,"column":1,"offset":1095},"end":{"line":51,"column":156,"offset":1250}},"text":"text","content":["The list of lemmas included here was sourced from ",{"meta":"https://github.com/skywind3000/lemma.en","type":"fcode","name":"L","content":["this GitHub repo"]}," by ",{"meta":"https://github.com/skywind3000","type":"fcode","name":"L","content":["Lin Wei."]}]},{"type":"para","id":"vcckg81QwKdbA9J_JZbja","margin":"","location":{"start":{"line":53,"column":1,"offset":1252},"end":{"line":54,"column":24,"offset":1398}},"text":"text","content":["The list was created by referencing the British Nation Corpus (BNC), NodeBox Linguistics and Yasumasa Someya's lemma list.\nFrom the original repo:"]},{"type":"block","id":"f52wB-B9EQhzfx_CIpDXw","margin":"","name":"nested","location":{"start":{"line":56,"column":1,"offset":1400},"end":{"line":56,"column":178,"offset":1577}},"content":[{"type":"para","id":"Gm2TMTcvDvdev8oBkHkLp","margin":"","location":{"start":{"line":56,"column":3,"offset":1402},"end":{"line":56,"column":178,"offset":1577}},"text":"text","content":["This lemma list is provided \"as is\" and is free to use for any research and/or educational purposes. The list currently contains 186,523 words (tokens) in 84,487 lemma groups."]}]},{"type":"block","id":"zkHR90Z8WcsH0tmPrFU-T","margin":"","level":3,"name":"head","location":{"start":{"line":58,"column":1,"offset":1579},"end":{"line":58,"column":15,"offset":1593}},"content":["Formatting"]},{"type":"para","id":"qtmrzrLpA1seIMhTGT8X_","margin":"","location":{"start":{"line":60,"column":1,"offset":1595},"end":{"line":60,"column":229,"offset":1823}},"text":"text","content":["To create your own list of lemmas for use with the library, create a csv file formatted like the one included here. Use two columns, the first containing your lemmas and the second containing comma-separated forms of the lemmas."]}]},"publishUrl":"/mods/zef/lemmatize/dist/README.md","pluginsData":{"moduleInfo":{"meta":{"api":0,"auth":"zef:ian-nai","build-depends":[],"depends":[],"description":"Lemmatize strings and lists using a provided English dataset or by providing your own.","dist":"lemmatize:ver<0.0.1>:auth<zef:ian-nai>","name":"lemmatize","path":"L/EM/LEMMATIZE/84841e5292a2b88c8a6a9d5aeb009be1cb19c707.tar.gz","provides":{"lemmatize":"lib/lemmatize.rakumod"},"resources":["lemmas.csv","lemmas_test.csv"],"tags":["lemmatize","lemma","nlp","text analysis"],"test-depends":[],"version":"0.0.1"},"files":[{"file":"work_mods/zef/lemmatize/dist/README.md","publishUrl":"/mods/zef/lemmatize/dist/README.md"}],"src":"zef","url":"/mods/zef/lemmatize"}}}
=end data


React

    =begin React :component<RenderItem>
    =begin data
    {"type":"page","file":"work_mods/zef/lemmatize/dist/README.md","sources":[],"node":{"type":"block","id":"V6GAA3kCQbAwoDijGvXXt","margin":"","name":"root","content":[{"type":"block","id":"YcPXCgORkoFrcJ7GWn2kL","margin":"","level":1,"name":"head","location":{"start":{"line":1,"column":1,"offset":0},"end":{"line":1,"column":17,"offset":16}},"content":["Raku Lemmatize"]},{"type":"para","id":"gfJ27HEMq6UzZM38eWRdQ","margin":"","location":{"start":{"line":3,"column":1,"offset":18},"end":{"line":3,"column":46,"offset":63}},"text":"text","content":["A Raku module to lemmatize strings and lists."]},{"type":"block","id":"a-Fja7ukZ64pk6F287xoH","margin":"","level":2,"name":"head","location":{"start":{"line":5,"column":1,"offset":65},"end":{"line":5,"column":16,"offset":80}},"content":["Installation"]},{"type":"para","id":"wlPb_QVE0qAj1rF3AjvmA","margin":"","location":{"start":{"line":7,"column":1,"offset":82},"end":{"line":7,"column":19,"offset":100}},"text":"text","content":["Install using zef:"]},{"type":"block","id":"TsOTVN1XlnGglKjiquOWD","margin":"","name":"code","config":[],"location":{"start":{"line":9,"column":1,"offset":102},"end":{"line":11,"column":4,"offset":131}},"content":[{"type":"verbatim","value":"zef install lemmatize"}]},{"type":"para","id":"R-0moAmdnZRXBWkkX64c7","margin":"","location":{"start":{"line":13,"column":1,"offset":133},"end":{"line":13,"column":36,"offset":168}},"text":"text","content":["Or simply download the GitHub repo."]},{"type":"block","id":"OyvT3aGHeQEGWiIJYOIVb","margin":"","level":2,"name":"head","location":{"start":{"line":15,"column":1,"offset":170},"end":{"line":15,"column":9,"offset":178}},"content":["Usage"]},{"type":"para","id":"eew5Ez4pI9z9b6cghSmm7","margin":"","location":{"start":{"line":17,"column":1,"offset":180},"end":{"line":18,"column":27,"offset":454}},"text":"text","content":["The package uses a .csv containing predefined English lemmas in a two column format, with the lemma on the left and its derivatives on the right. Any similarly formatted .csv can be used to run the code, allowing for easy use of custom lemma lists\nand non-English languages."]},{"type":"para","id":"qBOCo_KyhOanMu0hbhR54","margin":"","location":{"start":{"line":20,"column":1,"offset":456},"end":{"line":20,"column":46,"offset":501}},"text":"text","content":["The following four subroutines can be called:"]},{"type":"para","id":"c9y37aQVFE_Bn5EtO-s48","margin":"","location":{"start":{"line":22,"column":1,"offset":503},"end":{"line":22,"column":83,"offset":585}},"text":"text","content":["To construct your hash table of lemma pairs; this must be done before lemmatizing:"]},{"type":"block","id":"1lEBWD7VmO8fPwaHb4RFa","margin":"","name":"code","config":[],"location":{"start":{"line":24,"column":1,"offset":587},"end":{"line":27,"column":4,"offset":693}},"content":[{"type":"verbatim","value":"construct_hash('resources/lemmas.csv');\n# or substitute your own filename in place of 'lemmas.csv'"}]},{"type":"para","id":"UnrXXt4tKThi0EmgnhO9l","margin":"","location":{"start":{"line":29,"column":1,"offset":695},"end":{"line":29,"column":88,"offset":782}},"text":"text","content":["To lemmatize a string (which also converts every character in the string to lowercase):"]},{"type":"block","id":"QdEPPpKtSTFk5T8c-tR0W","margin":"","name":"code","config":[],"location":{"start":{"line":31,"column":1,"offset":784},"end":{"line":33,"column":4,"offset":823}},"content":[{"type":"verbatim","value":"lemmatize_string($your_string);"}]},{"type":"para","id":"VQq8mMj2Mlgj0MHCaWDi9","margin":"","location":{"start":{"line":35,"column":1,"offset":825},"end":{"line":35,"column":92,"offset":916}},"text":"text","content":["To lemmatize an array of words (which also converts every string in the list to lowercase):"]},{"type":"block","id":"Ny7IpWaH48G8lJd_6EqxO","margin":"","name":"code","config":[],"location":{"start":{"line":37,"column":1,"offset":918},"end":{"line":39,"column":4,"offset":955}},"content":[{"type":"verbatim","value":"lemmatize_array(@your_array);"}]},{"type":"para","id":"TY8TGescEm_HZK_EK9k26","margin":"","location":{"start":{"line":41,"column":1,"offset":957},"end":{"line":41,"column":56,"offset":1012}},"text":"text","content":["To convert a string to an array of its component words:"]},{"type":"block","id":"r9VVhbWzkz8AIAlxa3Cww","margin":"","name":"code","config":[],"location":{"start":{"line":43,"column":1,"offset":1014},"end":{"line":45,"column":4,"offset":1051}},"content":[{"type":"verbatim","value":"words_to_array($your_string);"}]},{"type":"block","id":"Uw5_XDmKvklouS8DKxCjo","margin":"","level":2,"name":"head","location":{"start":{"line":47,"column":1,"offset":1053},"end":{"line":47,"column":29,"offset":1081}},"content":["Lemma List and Formatting"]},{"type":"block","id":"CVanq0thzJMbjWnY3DxAo","margin":"","level":3,"name":"head","location":{"start":{"line":49,"column":1,"offset":1083},"end":{"line":49,"column":11,"offset":1093}},"content":["Source"]},{"type":"para","id":"_RaaP23BIOd1kFKUxG8yD","margin":"","location":{"start":{"line":51,"column":1,"offset":1095},"end":{"line":51,"column":156,"offset":1250}},"text":"text","content":["The list of lemmas included here was sourced from ",{"meta":"https://github.com/skywind3000/lemma.en","type":"fcode","name":"L","content":["this GitHub repo"]}," by ",{"meta":"https://github.com/skywind3000","type":"fcode","name":"L","content":["Lin Wei."]}]},{"type":"para","id":"vcckg81QwKdbA9J_JZbja","margin":"","location":{"start":{"line":53,"column":1,"offset":1252},"end":{"line":54,"column":24,"offset":1398}},"text":"text","content":["The list was created by referencing the British Nation Corpus (BNC), NodeBox Linguistics and Yasumasa Someya's lemma list.\nFrom the original repo:"]},{"type":"block","id":"f52wB-B9EQhzfx_CIpDXw","margin":"","name":"nested","location":{"start":{"line":56,"column":1,"offset":1400},"end":{"line":56,"column":178,"offset":1577}},"content":[{"type":"para","id":"Gm2TMTcvDvdev8oBkHkLp","margin":"","location":{"start":{"line":56,"column":3,"offset":1402},"end":{"line":56,"column":178,"offset":1577}},"text":"text","content":["This lemma list is provided \"as is\" and is free to use for any research and/or educational purposes. The list currently contains 186,523 words (tokens) in 84,487 lemma groups."]}]},{"type":"block","id":"zkHR90Z8WcsH0tmPrFU-T","margin":"","level":3,"name":"head","location":{"start":{"line":58,"column":1,"offset":1579},"end":{"line":58,"column":15,"offset":1593}},"content":["Formatting"]},{"type":"para","id":"qtmrzrLpA1seIMhTGT8X_","margin":"","location":{"start":{"line":60,"column":1,"offset":1595},"end":{"line":60,"column":229,"offset":1823}},"text":"text","content":["To create your own list of lemmas for use with the library, create a csv file formatted like the one included here. Use two columns, the first containing your lemmas and the second containing comma-separated forms of the lemmas."]}]},"publishUrl":"/mods/zef/lemmatize/dist/README.md","pluginsData":{"moduleInfo":{"meta":{"api":0,"auth":"zef:ian-nai","build-depends":[],"depends":[],"description":"Lemmatize strings and lists using a provided English dataset or by providing your own.","dist":"lemmatize:ver<0.0.1>:auth<zef:ian-nai>","name":"lemmatize","path":"L/EM/LEMMATIZE/84841e5292a2b88c8a6a9d5aeb009be1cb19c707.tar.gz","provides":{"lemmatize":"lib/lemmatize.rakumod"},"resources":["lemmas.csv","lemmas_test.csv"],"tags":["lemmatize","lemma","nlp","text analysis"],"test-depends":[],"version":"0.0.1"},"files":[{"file":"work_mods/zef/lemmatize/dist/README.md","publishUrl":"/mods/zef/lemmatize/dist/README.md"}],"src":"zef","url":"/mods/zef/lemmatize"}}}
    =end data
    =end React


root

Quick hack to determine whether a shared libray is present


LibraryCheck


Encoding and decoding of integers in the LEB128 encoding.


LEB128


=begin React :component<HeaderCol> :id<menu> 
=para L<Documentation|/doc/introduction>
=para L<Modules| /mods>
=para L<Examples| file:../examples/index.podlite>
=para L<Download |file:../download/index.podlite>
=para L<About|file:../about/index.podlite>
=para 🔍 K<⌘K>/K<ctrl-K>
=end React


=begin React :component<HeaderCol> :id<footer>  
=begin nested :!nested
=item1 B<Language>
    =item2 L<Get started|file:../getting-started/getting-started.podlite>
    =item2 L<Why Raku?|/doc/language/faq#Why-should-I-learn-Raku-What's-so-great-about-it>
    =item2 L<Try Raku|https://glot.io/new/raku>
    =item2 L<Raku cheat sheet|https://github.com/Raku/mu/blob/master/docs/Perl6/Cheatsheet/cheatsheet.txt>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<Raku on Exercism|https://exercism.org/tracks/raku>
    =item2 L<Wikipedia|https://en.wikipedia.org/wiki/Raku_(programming_language)>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=end nested
=begin nested :!nested

=item1 B<L<Documentation|/doc>>
    =item2 L<Getting started, Migration guides from other languages, & Tutorials | file:../doc/introduction.podlite>
    =item2 L<Language References|file:../doc/reference.podlite>
    =item2 L<Type Reference| file:../doc/types.podlite>
    =item2 L<Miscellaneous| file:../doc/miscellaneous.podlite>
    =item2 L<FAQs (Frequently Asked Questions)|/doc/language/faq>
    =item2 L<Community|/doc/language/community>
    =item2 L<The list of all documents|file:../doc/index.podlite>

=item1 B<Resources>
    =item2 L<The Raku Guide|https://raku.guide/>
    =item2 L<Books |https://perl6book.com/>
    =item2 L<Rosetta Code | https://www.raku.org/community/rosettacode>

=end nested
=begin nested :!nested

=item1 B<Resoures>
    =item2 L<Download | file:../download/index.podlite>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=item1 B<Learning>
    =item2 L<The Raku Guide|https://raku.guide/>
    =item2 L<Wikibook |https://en.wikibooks.org/wiki/Raku_Programming>
    =item2 L<Books |https://perl6book.com/>
    =item2 L<Rosetta Code | https://www.raku.org/community/rosettacode>
    =item2 L<Learn Raku in Y minutes|https://learnxinyminutes.com/docs/raku/>
    =item2 L<Golfing |https://github.com/AlexDaniel/raku-golf-cheatsheet>
=end nested
=begin nested :!nested

=item1 B<Explore>
    =item2 L<Raku Blog Aggregator|https://planet.raku.org/>
    =item2 L<Rakudo Weekly|https://rakudoweekly.blog/>
    =item2 L<The Weekly Challenge |https://perlweeklychallenge.org/>
    =item2 L<Raku Advent Calendar|https://raku-advent.blog/>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>
    =item2 L<empty|#>

=end nested
=end React


=begin React :component<CookieConsent> :id<CookieConsent> :buttonCaption("Got it!")
=para
This website uses cookies for analytics.
=end React


lemmatize

Raku Lemmatize

Installation

Usage

Lemma List and Formatting

Source

Formatting

lemmatize v0.0.1

Authors

License

Dependencies

Test Dependencies

Provides

Documentation