From 1236c4d7f040fed1c2a60063715ab76d9ab2de2d Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Jul 20 2016 21:20:45 +0000 Subject: Add a clean_input module in hubs.widget This allows to clean the input provided by the users to ensure there is nothing malicious in it. --- diff --git a/hubs/widgets/clean_input.py b/hubs/widgets/clean_input.py new file mode 100644 index 0000000..46bccbc --- /dev/null +++ b/hubs/widgets/clean_input.py @@ -0,0 +1,40 @@ +import urlparse + +import bleach + + +def filter_img_src(name, value): + ''' Filter in img html tags images coming from a different domain. ''' + import hubs.app + if name in ('alt', 'height', 'width', 'class'): + return True + if name == 'src': + p = urlparse.urlparse(value) + return (not p.netloc) or p.netloc == urlparse.urlparse( + hubs.app.app.config['APP_URL']).netloc + return False + + +def clean(text, ignore=None): + """ For a given html text, escape everything we do not want to support + to avoid potential security breach. + """ + if ignore and not isinstance(ignore, (tuple, set, list)): + ignore = [ignore] + + attrs = bleach.ALLOWED_ATTRIBUTES + if not ignore or not 'img' in ignore: + attrs['img'] = filter_img_src + + tags = bleach.ALLOWED_TAGS + [ + 'p', 'br', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'table', 'td', 'tr', 'th', + 'col', 'tbody', 'pre', 'img', 'hr', 'dl', 'dt', 'dd', 'span', + 'kbd', 'var', + ] + if ignore: + for tag in ignore: + if tag in tags: + tags.remove(tag) + + return bleach.clean(text, tags=tags, attributes=attrs) diff --git a/requirements.txt b/requirements.txt index 01e9cc9..c2dadcf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ arrow +bleach datanommer.models dogpile.cache fedmsg