diff options
author | Christopher Allan Webber <cwebber@dustycloud.org> | 2011-10-01 21:27:36 -0500 |
---|---|---|
committer | Christopher Allan Webber <cwebber@dustycloud.org> | 2011-10-01 21:27:36 -0500 |
commit | b43b17fc2686f5524413a66f8e98f3ab0cc11a60 (patch) | |
tree | 36fb9cf3155a2ff715b1e93f29be6a17cb2113e9 /mediagoblin/tools/text.py | |
parent | e27396802caaab9a939c56d19c991339157c493f (diff) | |
parent | 91e42c467d898ef70dec2d2d34e4173ea771d2ed (diff) | |
download | mediagoblin-b43b17fc2686f5524413a66f8e98f3ab0cc11a60.tar.lz mediagoblin-b43b17fc2686f5524413a66f8e98f3ab0cc11a60.tar.xz mediagoblin-b43b17fc2686f5524413a66f8e98f3ab0cc11a60.zip |
Merge remote branch 'remotes/aaronw/bug444_fix_utils_py_redux'
Conflicts:
mediagoblin/util.py
Diffstat (limited to 'mediagoblin/tools/text.py')
-rw-r--r-- | mediagoblin/tools/text.py | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/mediagoblin/tools/text.py b/mediagoblin/tools/text.py new file mode 100644 index 00000000..de4bb281 --- /dev/null +++ b/mediagoblin/tools/text.py @@ -0,0 +1,117 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import wtforms +import markdown +from lxml.html.clean import Cleaner + +from mediagoblin import mg_globals +from mediagoblin.tools import url + +# A super strict version of the lxml.html cleaner class +HTML_CLEANER = Cleaner( + scripts=True, + javascript=True, + comments=True, + style=True, + links=True, + page_structure=True, + processing_instructions=True, + embedded=True, + frames=True, + forms=True, + annoying_tags=True, + allow_tags=[ + 'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'], + remove_unknown_tags=False, # can't be used with allow_tags + safe_attrs_only=True, + add_nofollow=True, # for now + host_whitelist=(), + whitelist_tags=set([])) + +def clean_html(html): + # clean_html barfs on an empty string + if not html: + return u'' + + return HTML_CLEANER.clean_html(html) + +def convert_to_tag_list_of_dicts(tag_string): + """ + Filter input from incoming string containing user tags, + + Strips trailing, leading, and internal whitespace, and also converts + the "tags" text into an array of tags + """ + taglist = [] + if tag_string: + + # Strip out internal, trailing, and leading whitespace + stripped_tag_string = u' '.join(tag_string.strip().split()) + + # Split the tag string into a list of tags + for tag in stripped_tag_string.split( + mg_globals.app_config['tags_delimiter']): + + # Ignore empty or duplicate tags + if tag.strip() and tag.strip() not in [t['name'] for t in taglist]: + + taglist.append({'name': tag.strip(), + 'slug': url.slugify(tag.strip())}) + return taglist + +def media_tags_as_string(media_entry_tags): + """ + Generate a string from a media item's tags, stored as a list of dicts + + This is the opposite of convert_to_tag_list_of_dicts + """ + media_tag_string = '' + if media_entry_tags: + media_tag_string = mg_globals.app_config['tags_delimiter'].join( + [tag['name'] for tag in media_entry_tags]) + return media_tag_string + +TOO_LONG_TAG_WARNING = \ + u'Tags must be shorter than %s characters. Tags that are too long: %s' + +def tag_length_validator(form, field): + """ + Make sure tags do not exceed the maximum tag length. + """ + tags = convert_to_tag_list_of_dicts(field.data) + too_long_tags = [ + tag['name'] for tag in tags + if len(tag['name']) > mg_globals.app_config['tags_max_length']] + + if too_long_tags: + raise wtforms.ValidationError( + TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \ + ', '.join(too_long_tags))) + + +MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape') + +def cleaned_markdown_conversion(text): + """ + Take a block of text, run it through MarkDown, and clean its HTML. + """ + # Markdown will do nothing with and clean_html can do nothing with + # an empty string :) + if not text: + return u'' + + return clean_html(MARKDOWN_INSTANCE.convert(text)) |