use regex instead of beautiful soup to clean tags before looking for links to embed
This commit is contained in:
parent
474fcf07ba
commit
486e3f5827
1 changed files with 5 additions and 5 deletions
|
@ -573,11 +573,11 @@ def add_preview(content):
|
|||
# don't add a preview to a post that already has one
|
||||
return content
|
||||
|
||||
# flatten links
|
||||
soup = bs4.BeautifulSoup(content)
|
||||
for link in soup.find_all('a'):
|
||||
link.replace_with(link.get('href'))
|
||||
flat = soup.get_text().strip()
|
||||
# flatten links and strip tags
|
||||
flat = content
|
||||
flat = re.sub(r'<a [^>]*href="([^"]+)"[^>]*>[^<]*</a>', r'\1', flat)
|
||||
flat = re.sub(r'</?\w+[^>]*>', '', flat)
|
||||
flat = flat.strip()
|
||||
|
||||
instagram_regex = r'https?://(?:www\.)?instagram.com/p/[\w\-]+/?'
|
||||
vimeo_regex = r'https?://(?:www\.)?vimeo.com/(\d+)/?'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue