From e8929ccaeefbc7b6aa745c02c220393eefd6410c Mon Sep 17 00:00:00 2001 From: Patrick Uiterwijk Date: Jun 02 2016 19:46:09 +0000 Subject: Unify content simplification rules Signed-off-by: Patrick Uiterwijk --- diff --git a/basset/score/content.py b/basset/score/content.py index 38603e4..f47d165 100644 --- a/basset/score/content.py +++ b/basset/score/content.py @@ -71,12 +71,24 @@ class ContentScore(ScorePlugin): return scores def _get_title_score(self, title): - title = title.replace(' ', '_').replace('.', '').replace("-","").lower() + replacements = [ + ('+', ' '), + ('%20', ' '), + (' ', '_'), + ('-', ''), + ('.', ''), + ] + for replacement in replacements: + title = title.replace(*replacement) + title = title.lower() mapping = {} for option in self.config: if option.startswith('title_'): - key = option[len('title_'):].lower() - mapping[key.replace('-','')] = self.config[option] + key = option[len('title_'):] + for replacement in replacements: + key = key.replace(*replacement) + key = key.lower() + mapping[key] = self.config[option] total_score = 0 for word in mapping: