From 385ab45c22dec45694c5a884faf46dad9eff6dd2 Mon Sep 17 00:00:00 2001 From: Christos Mantas Date: Wed, 5 Oct 2016 11:41:35 +0300 Subject: [PATCH 1/2] [refactor] Simplify some methods' API Methods `noun_suffix_machine` and `derivational_suffix_machine` did not need to take a block (containing a single word) and yield. They weree refactored to take the word as an argument. --- lib/turkish_stemmer.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/turkish_stemmer.rb b/lib/turkish_stemmer.rb index b199547..2b263ac 100644 --- a/lib/turkish_stemmer.rb +++ b/lib/turkish_stemmer.rb @@ -40,13 +40,13 @@ def stem(original_word) # Process stems = [] - stems << nominal_verbs_suffix_machine { word } + stems << nominal_verbs_suffix_machine(word) stems << original_word stems.flatten!.uniq! - stems << stems.map { |word| noun_suffix_machine { word }} + stems << stems.map { |word| noun_suffix_machine(word) } stems << original_word stems.flatten!.uniq! - stems << stems.map { |word| derivational_suffix_machine { word }} + stems << stems.map { |word| derivational_suffix_machine(word) } # Postprocess stem_post_process(stems, original_word) @@ -349,20 +349,20 @@ def last_consonant!(word) end # Helper method. This is just a shortcut. - def nominal_verbs_suffix_machine - affix_morphological_stripper(yield, states: self::NOMINAL_VERB_STATES, + def nominal_verbs_suffix_machine(term) + affix_morphological_stripper(term, states: self::NOMINAL_VERB_STATES, suffixes: self::NOMINAL_VERB_SUFFIXES) end # Helper method. This is just a shortcut. - def noun_suffix_machine - affix_morphological_stripper(yield, states: self::NOUN_STATES, + def noun_suffix_machine(term) + affix_morphological_stripper(term, states: self::NOUN_STATES, suffixes: self::NOUN_SUFFIXES) end # Helper method - def derivational_suffix_machine - affix_morphological_stripper(yield, states: self::DERIVATIONAL_STATES, + def derivational_suffix_machine(term) + affix_morphological_stripper(term, states: self::DERIVATIONAL_STATES, suffixes: self::DERIVATIONAL_SUFFIXES) end From 5cafb32f29aa93041e6243017f89ab9aed482de8 Mon Sep 17 00:00:00 2001 From: Christos Mantas Date: Wed, 5 Oct 2016 12:00:53 +0300 Subject: [PATCH 2/2] [refactor] Simplify the stem candidate creation All the stem candidates are appended to the list and uniq! is called once. --- lib/turkish_stemmer.rb | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/turkish_stemmer.rb b/lib/turkish_stemmer.rb index 2b263ac..4e859be 100644 --- a/lib/turkish_stemmer.rb +++ b/lib/turkish_stemmer.rb @@ -36,19 +36,16 @@ def stem(original_word) # Preprocess return original_word if !proceed_to_stem?(original_word) - word = original_word.dup - # Process - stems = [] - stems << nominal_verbs_suffix_machine(word) - stems << original_word - stems.flatten!.uniq! - stems << stems.map { |word| noun_suffix_machine(word) } - stems << original_word - stems.flatten!.uniq! - stems << stems.map { |word| derivational_suffix_machine(word) } - - # Postprocess + # set of stem candidates + stems = [original_word, *nominal_verbs_suffix_machine(original_word.dup)] + noun_suffix_stems = stems.map(&method(:noun_suffix_machine)).flatten + stems.push(*noun_suffix_stems) + derivational_suffix_stems = stems.map(&method(:derivational_suffix_machine)) + stems.push(*derivational_suffix_stems) + stems.uniq! + + # Postprocess: filter and choose among the stem candidates stem_post_process(stems, original_word) end