def normalize(line):
result = line.lower()
result = result.replace('ё', 'е')
+ result = result.replace('́', '')
result = re.sub(punctuation_regex, ' \g<0> ', result)
result = re.sub(bad_characters_regex, ' ', result)
result = re.sub(many_spaces_regex, ' ', result)
return ' '.join(stemmer.stemWords(normilized_text.split()))
if __name__ == "__main__":
- print(stemming('Спланой моÑ\81Ñ\82'))
+ print(stemming('Ð\94екабÑ\80иÌ\81Ñ\81Ñ\82Ñ\8b'))