From 6c9e6e0c6eb8eb59bd16fc360299c58945b96669 Mon Sep 17 00:00:00 2001 From: zharkovstas Date: Sun, 19 May 2019 05:27:52 +0500 Subject: [PATCH] stemmer fix --- stemmer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stemmer.py b/stemmer.py index b7e5b79..9396de7 100644 --- a/stemmer.py +++ b/stemmer.py @@ -8,6 +8,7 @@ many_spaces_regex = re.compile(r' +') def normalize(line): result = line.lower() result = result.replace('ё', 'е') + result = result.replace('́', '') result = re.sub(punctuation_regex, ' \g<0> ', result) result = re.sub(bad_characters_regex, ' ', result) result = re.sub(many_spaces_regex, ' ', result) @@ -20,4 +21,4 @@ def stemming(text): return ' '.join(stemmer.stemWords(normilized_text.split())) if __name__ == "__main__": - print(stemming('Спланой мост')) + print(stemming('Декабри́сты')) -- 2.50.1