From: zharkovstas Date: Sun, 19 May 2019 00:27:52 +0000 (+0500) Subject: stemmer fix X-Git-Url: https://git.xn--bdkaa.com/?a=commitdiff_plain;h=6c9e6e0c6eb8eb59bd16fc360299c58945b96669;p=where-are-you.py.git stemmer fix --- diff --git a/stemmer.py b/stemmer.py index b7e5b79..9396de7 100644 --- a/stemmer.py +++ b/stemmer.py @@ -8,6 +8,7 @@ many_spaces_regex = re.compile(r' +') def normalize(line): result = line.lower() result = result.replace('ё', 'е') + result = result.replace('́', '') result = re.sub(punctuation_regex, ' \g<0> ', result) result = re.sub(bad_characters_regex, ' ', result) result = re.sub(many_spaces_regex, ' ', result) @@ -20,4 +21,4 @@ def stemming(text): return ' '.join(stemmer.stemWords(normilized_text.split())) if __name__ == "__main__": - print(stemming('Спланой мост')) + print(stemming('Декабри́сты'))