From d008d58412d3a2fb7a406cbdc06a54b2e3af5f29 Mon Sep 17 00:00:00 2001
From: Kyle Mahan <kyle.mahan@gmail.com>
Date: Mon, 25 Apr 2016 13:58:47 -0700
Subject: [PATCH] use new authorship implementation from mf2util

---
 requirements.txt  |  2 +-
 woodwind/tasks.py | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 23c1397..c53d454 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,7 +18,7 @@ itsdangerous==0.24
 Jinja2==2.8
 MarkupSafe==0.23
 mf2py==1.0.4
--e git+https://github.com/kylewm/mf2util.git@master#egg=mf2util-master
+mf2util==0.4.0
 psycopg2==2.6.1
 pyasn1==0.1.9
 pycparser==2.14
diff --git a/woodwind/tasks.py b/woodwind/tasks.py
index 644e53e..1a5b540 100644
--- a/woodwind/tasks.py
+++ b/woodwind/tasks.py
@@ -438,6 +438,15 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
 
 
 def process_html_feed_for_new_entries(feed, content, backfill, now):
+    mf2_cache = {}
+
+    def fetch_mf2(url):
+        if url in mf2_cache:
+            return mf2_cache[url]
+        p = mf2py.parse(url=url)
+        mf2_cache[url] = p
+        return p
+
     # strip noscript tags before parsing, since we definitely aren't
     # going to preserve js
     content = re.sub('</?noscript[^>]*>', '', content, flags=re.IGNORECASE)
@@ -449,7 +458,8 @@ def process_html_feed_for_new_entries(feed, content, backfill, now):
 
     parsed = mf2util.interpret_feed(
         mf2py.parse(doc, feed.feed),
-        source_url=feed.feed, base_href=base_href)
+        source_url=feed.feed, base_href=base_href,
+        fetch_mf2_func=fetch_mf2)
     hfeed = parsed.get('entries', [])
 
     for hentry in hfeed: