use regex instead of beautiful soup to clean tags before looking for links to embed

2015-12-07 09:11:53 -08:00 · 2015-12-07 09:11:53 -08:00 · 486e3f5827
commit 486e3f5827
parent 474fcf07ba
1 changed files with 5 additions and 5 deletions
--- a/woodwind/views.py
+++ b/woodwind/views.py
@ -573,11 +573,11 @@ def add_preview(content):
        # don't add  a preview to a post that already has one
        return content

-    # flatten links
-    soup = bs4.BeautifulSoup(content)
-    for link in soup.find_all('a'):
-        link.replace_with(link.get('href'))
-    flat = soup.get_text().strip()
+    # flatten links and strip tags
+    flat = content
+    flat = re.sub(r'<a [^>]*href="([^"]+)"[^>]*>[^<]*</a>', r'\1', flat)
+    flat = re.sub(r'</?\w+[^>]*>', '', flat)
+    flat = flat.strip()

    instagram_regex = r'https?://(?:www\.)?instagram.com/p/[\w\-]+/?'
    vimeo_regex = r'https?://(?:www\.)?vimeo.com/(\d+)/?'