From 6761835fa53b69d0197993a1bb757883f0971aed Mon Sep 17 00:00:00 2001 From: Kyle Mahan Date: Wed, 22 Apr 2015 22:07:25 +0000 Subject: [PATCH] use mf2py parsed 'alternates' --- woodwind/tasks.py | 22 +++++++++++++++------- woodwind/views.py | 10 ++++------ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/woodwind/tasks.py b/woodwind/tasks.py index 6db2e61..0ead8a1 100644 --- a/woodwind/tasks.py +++ b/woodwind/tasks.py @@ -116,13 +116,17 @@ def update_feed(feed_id, content=None, is_polling=True): if not old: # set a default value for published if none is provided entry.published = entry.published or now - new_ids.append(entry.id) - for irt in entry.get_property('in-reply-to', []): - reply_pairs.append((entry.id, irt)) + in_reply_tos = entry.get_property('in-reply-to', []) feed.entries.append(entry) db.session.commit() + + new_ids.append(entry.id) + for irt in in_reply_tos: + reply_pairs.append((entry.id, irt)) + elif not is_content_equal(old, entry): entry.published = entry.published or old.published + in_reply_tos = entry.get_property('in-reply-to', []) # we're updating an old entriy, use the original # retrieved time entry.retrieved = old.retrieved @@ -130,11 +134,13 @@ def update_feed(feed_id, content=None, is_polling=True): # punt on deleting for now, learn about cascade # and stuff later # session.delete(old) - updated_ids.append(entry.id) - for irt in entry.get_property('in-reply-to', []): - reply_pairs.append((entry.id, irt)) feed.entries.append(entry) db.session.commit() + + updated_ids.append(entry.id) + for irt in in_reply_tos: + reply_pairs.append((entry.id, irt)) + else: current_app.logger.debug( 'skipping previously seen post %s', old.permalink) @@ -225,7 +231,9 @@ def notify_feed_updated(app, feed, entry_ids): """ from flask import render_template import flask.ext.login as flask_login - + current_app.logger.debug( + 'notifying feed updated for entries %r', entry_ids) + entries = Entry.query\ .filter(Entry.id.in_(entry_ids))\ .order_by(Entry.retrieved.desc(), diff --git a/woodwind/views.py b/woodwind/views.py index 8a24943..a392a3e 100644 --- a/woodwind/views.py +++ b/woodwind/views.py @@ -371,9 +371,9 @@ def find_possible_feeds(origin): }) elif content_type == 'text/html': + parsed = mf2py.parse(doc=resp.text, url=origin) # if text/html, then parse and look for h-entries - hfeed = mf2util.interpret_feed( - mf2py.Parser(doc=resp.text).to_dict(), origin) + hfeed = mf2util.interpret_feed(parsed, origin) if hfeed.get('entries'): feeds.append({ 'origin': origin, @@ -382,13 +382,11 @@ def find_possible_feeds(origin): }) # then look for link rel="alternate" - soup = bs4.BeautifulSoup(resp.text) - for link in soup.find_all('link', {'rel': 'alternate'}): + for link in parsed.get('alternates', []): if link.get('type') in xml_feed_types: - feed_url = urllib.parse.urljoin(origin, link.get('href')) feeds.append({ 'origin': origin, - 'feed': feed_url, + 'feed': link.get('url'), 'type': 'xml', }) return feeds