diff --git a/fragments.py b/fragments.py index aec298a..52257c8 100644 --- a/fragments.py +++ b/fragments.py @@ -4,8 +4,7 @@ import string from lxml import etree -FRAGMENT_PREFIX = "in" -FRAGMENT_TAG = ";%s" % FRAGMENT_PREFIX +FRAGMENT_TAG = "+?" FRAGMENT_CLASS = "fragment" # Search and delete the FRAGMENT_TAG anywhere in the given HTML @@ -23,7 +22,7 @@ def defragmentize(html): class_list += " %s" % FRAGMENT_CLASS fragment.set('class', class_list) - fragment.text = re.sub(r"\s*;%s\s*" % FRAGMENT_PREFIX, '', fragment.text).strip() + fragment.text = re.sub(r"\s*%s\s*" % re.escape(FRAGMENT_TAG), '', fragment.text).strip() return etree.tostring(dom, method='html', encoding='utf-8', pretty_print=True).decode('utf-8')