use new authorship implementation from mf2util

This commit is contained in:
Kyle Mahan 2016-04-25 13:58:47 -07:00
parent f6f11bf41b
commit d008d58412
2 changed files with 12 additions and 2 deletions

View file

@ -18,7 +18,7 @@ itsdangerous==0.24
Jinja2==2.8
MarkupSafe==0.23
mf2py==1.0.4
-e git+https://github.com/kylewm/mf2util.git@master#egg=mf2util-master
mf2util==0.4.0
psycopg2==2.6.1
pyasn1==0.1.9
pycparser==2.14

View file

@ -438,6 +438,15 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
def process_html_feed_for_new_entries(feed, content, backfill, now):
mf2_cache = {}
def fetch_mf2(url):
if url in mf2_cache:
return mf2_cache[url]
p = mf2py.parse(url=url)
mf2_cache[url] = p
return p
# strip noscript tags before parsing, since we definitely aren't
# going to preserve js
content = re.sub('</?noscript[^>]*>', '', content, flags=re.IGNORECASE)
@ -449,7 +458,8 @@ def process_html_feed_for_new_entries(feed, content, backfill, now):
parsed = mf2util.interpret_feed(
mf2py.parse(doc, feed.feed),
source_url=feed.feed, base_href=base_href)
source_url=feed.feed, base_href=base_href,
fetch_mf2_func=fetch_mf2)
hfeed = parsed.get('entries', [])
for hentry in hfeed: