| |
@@ -0,0 +1,40 @@
|
| |
+ import urlparse
|
| |
+
|
| |
+ import bleach
|
| |
+
|
| |
+
|
| |
+ def filter_img_src(name, value):
|
| |
+ ''' Filter in img html tags images coming from a different domain. '''
|
| |
+ import hubs.app
|
| |
+ if name in ('alt', 'height', 'width', 'class'):
|
| |
+ return True
|
| |
+ if name == 'src':
|
| |
+ p = urlparse.urlparse(value)
|
| |
+ return (not p.netloc) or p.netloc == urlparse.urlparse(
|
| |
+ hubs.app.app.config['APP_URL']).netloc
|
| |
+ return False
|
| |
+
|
| |
+
|
| |
+ def clean(text, ignore=None):
|
| |
+ """ For a given html text, escape everything we do not want to support
|
| |
+ to avoid potential security breach.
|
| |
+ """
|
| |
+ if ignore and not isinstance(ignore, (tuple, set, list)):
|
| |
+ ignore = [ignore]
|
| |
+
|
| |
+ attrs = bleach.ALLOWED_ATTRIBUTES
|
| |
+ if not ignore or not 'img' in ignore:
|
| |
+ attrs['img'] = filter_img_src
|
| |
+
|
| |
+ tags = bleach.ALLOWED_TAGS + [
|
| |
+ 'p', 'br', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
| |
+ 'table', 'td', 'tr', 'th',
|
| |
+ 'col', 'tbody', 'pre', 'img', 'hr', 'dl', 'dt', 'dd', 'span',
|
| |
+ 'kbd', 'var',
|
| |
+ ]
|
| |
+ if ignore:
|
| |
+ for tag in ignore:
|
| |
+ if tag in tags:
|
| |
+ tags.remove(tag)
|
| |
+
|
| |
+ return bleach.clean(text, tags=tags, attributes=attrs)
|
| |