diff --git a/fragments.py b/fragments.py index 4f1a2d6..aec298a 100644 --- a/fragments.py +++ b/fragments.py @@ -5,7 +5,7 @@ import string from lxml import etree FRAGMENT_PREFIX = "in" -FRAGMENT_TAG = "^%s" % FRAGMENT_PREFIX +FRAGMENT_TAG = ";%s" % FRAGMENT_PREFIX FRAGMENT_CLASS = "fragment" # Search and delete the FRAGMENT_TAG anywhere in the given HTML @@ -23,7 +23,7 @@ def defragmentize(html): class_list += " %s" % FRAGMENT_CLASS fragment.set('class', class_list) - fragment.text = re.sub(r"\W*\^%s\W*" % FRAGMENT_PREFIX, '', fragment.text).strip() + fragment.text = re.sub(r"\s*;%s\s*" % FRAGMENT_PREFIX, '', fragment.text).strip() return etree.tostring(dom, method='html', encoding='utf-8', pretty_print=True).decode('utf-8')