add prelim support for pubsubhubbub enabled feeds

This commit is contained in:
Kyle Mahan 2015-02-21 22:59:26 -08:00
parent 8e3a671531
commit 6f2dc85110
5 changed files with 134 additions and 13 deletions

10
run.py Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env python
def main():
from woodwind.app import create_app
app = create_app()
app.run(debug=True, port=4000)
main()

View file

@ -1,9 +1,9 @@
from . import extensions
from .views import views
from .api import api
from .push import push
from config import Config
import flask
import logging
def create_app():
@ -12,4 +12,5 @@ def create_app():
extensions.init_app(app)
app.register_blueprint(views)
app.register_blueprint(api)
app.register_blueprint(push)
return app

View file

@ -108,6 +108,11 @@ class Feed(db.Model):
last_checked = db.Column(db.DateTime)
etag = db.Column(db.String(512))
push_hub = db.Column(db.String(512))
push_topic = db.Column(db.String(512))
push_verified = db.Column(db.Boolean)
last_pinged = db.Column(db.DateTime)
def get_feed_code(self):
return binascii.hexlify(self.feed.encode())

50
woodwind/push.py Normal file
View file

@ -0,0 +1,50 @@
from . import tasks
from .extensions import db
from .models import Feed
from flask import Blueprint, request, abort, current_app, make_response
import datetime
push = Blueprint('push', __name__)
@push.route('/_notify/<int:feed_id>', methods=['GET', 'POST'])
def notify(feed_id):
current_app.logger.debug(
'received PuSH notification for feed id %d', feed_id)
feed = Feed.query.get(feed_id)
if not feed:
current_app.logger.debug(
'could not find feed corresponding to %d', feed_id)
abort(404)
current_app.logger.debug('processing PuSH notification for feed %r', feed)
if request.method == 'GET':
# verify subscribe or unsusbscribe
mode = request.args.get('hub.mode')
topic = request.args.get('hub.topic')
challenge = request.args.get('hub.challenge')
current_app.logger.debug(
'PuSH verification. feed=%r, mode=%s, topic=%s, challenge=%s',
feed, mode, topic, challenge)
if mode == 'subscribe' and topic == feed.push_topic:
current_app.logger.debug(
'PuSH verify subscribe for feed=%r, topic=%s', feed, topic)
feed.push_verified = True
db.session.commit()
return challenge
elif mode == 'unsubscribe' and topic != feed.push_topic:
current_app.logger.debug(
'PuSH verify unsubscribe for feed=%r, topic=%s', feed, topic)
return challenge
current_app.logger.debug('PuSH cannot verify %s for feed=%r, topic=%s',
mode, feed, topic)
abort(404)
# could it be? an actual push notification!?
current_app.logger.debug('received PuSH ping for %r', feed)
feed.last_pinged = datetime.datetime.utcnow()
db.session.commit()
tasks.update_feed.delay(feed.id)
return make_response('', 204)

View file

@ -12,10 +12,12 @@ import sqlalchemy
import sqlalchemy.orm
import time
import urllib.parse
import requests
UPDATE_INTERVAL = datetime.timedelta(hours=1)
TWITTER_RE = re.compile(r'https?://(?:www\.|mobile\.)?twitter\.com/(\w+)/status(?:es)?/(\w+)')
TWITTER_RE = re.compile(
r'https?://(?:www\.|mobile\.)?twitter\.com/(\w+)/status(?:es)?/(\w+)')
app = celery.Celery('woodwind')
app.config_from_object('celeryconfig')
@ -57,17 +59,25 @@ def update_feed(feed_id):
with session_scope() as session:
feed = session.query(Feed).get(feed_id)
logger.info('Updating {}'.format(feed))
process_feed_for_new_entries(session, feed)
process_feed(session, feed)
def process_feed_for_new_entries(session, feed):
def process_feed(session, feed):
now = datetime.datetime.utcnow()
found_new = False
try:
logger.info('fetching feed: %s', feed)
response = requests.get(feed.feed)
if response.status_code // 100 != 2:
logger.warn('bad response from %s. %r: %r', feed.feed,
response, response.text)
return
check_push_subscription(session, feed, response)
if feed.type == 'xml':
result = process_xml_feed_for_new_entries(session, feed)
result = process_xml_feed_for_new_entries(session, feed, response)
elif feed.type == 'html':
result = process_html_feed_for_new_entries(session, feed)
result = process_html_feed_for_new_entries(session, feed, response)
else:
result = []
@ -101,6 +111,44 @@ def process_feed_for_new_entries(session, feed):
feed.last_updated = now
def check_push_subscription(session, feed, response):
def build_callback_url():
return '{}://{}/_notify/{}'.format(
getattr(Config, 'PREFERRED_URL_SCHEME', 'http'),
Config.SERVER_NAME,
feed.id)
def send_request(mode, hub, topic):
logger.debug(
'sending %s request for hub=%r, topic=%r', mode, hub, topic)
r = requests.post(hub, data={
'hub.mode': mode,
'hub.topic': topic,
'hub.callback': build_callback_url(),
# TODO secret should only be used over HTTPS
# 'hub.secret': secret,
})
logger.debug('%s response %r', mode, r)
old_hub = feed.push_hub
old_topic = feed.push_topic
hub = response.links.get('hub', {}).get('url')
topic = response.links.get('self', {}).get('url')
if hub != old_hub or topic != old_topic or not feed.push_verified:
feed.push_hub = hub
feed.push_topic = topic
feed.push_verified = False
session.commit()
if old_hub and old_topic:
send_request('unsubscribe', old_hub, old_topic)
if hub and topic:
send_request('subscribe', hub, topic)
def is_content_equal(e1, e2):
"""The criteria for determining if an entry that we've seen before
has been updated. If any of these fields have changed, we'll scrub the
@ -114,12 +162,11 @@ def is_content_equal(e1, e2):
and e1.properties == e2.properties)
def process_xml_feed_for_new_entries(session, feed):
def process_xml_feed_for_new_entries(session, feed, response):
logger.debug('fetching xml feed: %s', feed)
now = datetime.datetime.utcnow()
parsed = feedparser.parse(feed.feed)
parsed = feedparser.parse(get_response_content(response))
feed_props = parsed.get('feed', {})
default_author_url = feed_props.get('author_detail', {}).get('href')
default_author_name = feed_props.get('author_detail', {}).get('name')
@ -178,11 +225,10 @@ def process_xml_feed_for_new_entries(session, feed):
yield entry
def process_html_feed_for_new_entries(session, feed):
logger.info('fetching html feed: %s', feed)
def process_html_feed_for_new_entries(session, feed, response):
doc = get_response_content(response)
parsed = mf2util.interpret_feed(
mf2py.Parser(url=feed.feed).to_dict(), feed.feed)
mf2py.Parser(url=feed.feed, doc=doc).to_dict(), feed.feed)
hfeed = parsed.get('entries', [])
for hentry in hfeed:
@ -271,3 +317,12 @@ def fallback_photo(url):
"""Use favatar to find an appropriate photo for any URL"""
domain = urllib.parse.urlparse(url).netloc
return 'http://www.google.com/s2/favicons?domain=' + domain
def get_response_content(response):
"""Kartik's trick for handling responses that don't specify their
encoding. Response.text will guess badly if they don't.
"""
if 'charset' not in response.headers.get('content-type', ''):
return response.content
return response.text