add prelim support for pubsubhubbub enabled feeds
This commit is contained in:
parent
8e3a671531
commit
6f2dc85110
5 changed files with 134 additions and 13 deletions
10
run.py
Executable file
10
run.py
Executable file
|
@ -0,0 +1,10 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from woodwind.app import create_app
|
||||||
|
app = create_app()
|
||||||
|
app.run(debug=True, port=4000)
|
||||||
|
|
||||||
|
|
||||||
|
main()
|
|
@ -1,9 +1,9 @@
|
||||||
from . import extensions
|
from . import extensions
|
||||||
from .views import views
|
from .views import views
|
||||||
from .api import api
|
from .api import api
|
||||||
|
from .push import push
|
||||||
from config import Config
|
from config import Config
|
||||||
import flask
|
import flask
|
||||||
import logging
|
|
||||||
|
|
||||||
|
|
||||||
def create_app():
|
def create_app():
|
||||||
|
@ -12,4 +12,5 @@ def create_app():
|
||||||
extensions.init_app(app)
|
extensions.init_app(app)
|
||||||
app.register_blueprint(views)
|
app.register_blueprint(views)
|
||||||
app.register_blueprint(api)
|
app.register_blueprint(api)
|
||||||
|
app.register_blueprint(push)
|
||||||
return app
|
return app
|
||||||
|
|
|
@ -108,6 +108,11 @@ class Feed(db.Model):
|
||||||
last_checked = db.Column(db.DateTime)
|
last_checked = db.Column(db.DateTime)
|
||||||
etag = db.Column(db.String(512))
|
etag = db.Column(db.String(512))
|
||||||
|
|
||||||
|
push_hub = db.Column(db.String(512))
|
||||||
|
push_topic = db.Column(db.String(512))
|
||||||
|
push_verified = db.Column(db.Boolean)
|
||||||
|
last_pinged = db.Column(db.DateTime)
|
||||||
|
|
||||||
def get_feed_code(self):
|
def get_feed_code(self):
|
||||||
return binascii.hexlify(self.feed.encode())
|
return binascii.hexlify(self.feed.encode())
|
||||||
|
|
||||||
|
|
50
woodwind/push.py
Normal file
50
woodwind/push.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
from . import tasks
|
||||||
|
from .extensions import db
|
||||||
|
from .models import Feed
|
||||||
|
from flask import Blueprint, request, abort, current_app, make_response
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
push = Blueprint('push', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@push.route('/_notify/<int:feed_id>', methods=['GET', 'POST'])
|
||||||
|
def notify(feed_id):
|
||||||
|
current_app.logger.debug(
|
||||||
|
'received PuSH notification for feed id %d', feed_id)
|
||||||
|
feed = Feed.query.get(feed_id)
|
||||||
|
if not feed:
|
||||||
|
current_app.logger.debug(
|
||||||
|
'could not find feed corresponding to %d', feed_id)
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
current_app.logger.debug('processing PuSH notification for feed %r', feed)
|
||||||
|
if request.method == 'GET':
|
||||||
|
# verify subscribe or unsusbscribe
|
||||||
|
mode = request.args.get('hub.mode')
|
||||||
|
topic = request.args.get('hub.topic')
|
||||||
|
challenge = request.args.get('hub.challenge')
|
||||||
|
current_app.logger.debug(
|
||||||
|
'PuSH verification. feed=%r, mode=%s, topic=%s, challenge=%s',
|
||||||
|
feed, mode, topic, challenge)
|
||||||
|
|
||||||
|
if mode == 'subscribe' and topic == feed.push_topic:
|
||||||
|
current_app.logger.debug(
|
||||||
|
'PuSH verify subscribe for feed=%r, topic=%s', feed, topic)
|
||||||
|
feed.push_verified = True
|
||||||
|
db.session.commit()
|
||||||
|
return challenge
|
||||||
|
elif mode == 'unsubscribe' and topic != feed.push_topic:
|
||||||
|
current_app.logger.debug(
|
||||||
|
'PuSH verify unsubscribe for feed=%r, topic=%s', feed, topic)
|
||||||
|
return challenge
|
||||||
|
current_app.logger.debug('PuSH cannot verify %s for feed=%r, topic=%s',
|
||||||
|
mode, feed, topic)
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
# could it be? an actual push notification!?
|
||||||
|
current_app.logger.debug('received PuSH ping for %r', feed)
|
||||||
|
feed.last_pinged = datetime.datetime.utcnow()
|
||||||
|
db.session.commit()
|
||||||
|
tasks.update_feed.delay(feed.id)
|
||||||
|
return make_response('', 204)
|
|
@ -12,10 +12,12 @@ import sqlalchemy
|
||||||
import sqlalchemy.orm
|
import sqlalchemy.orm
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
UPDATE_INTERVAL = datetime.timedelta(hours=1)
|
UPDATE_INTERVAL = datetime.timedelta(hours=1)
|
||||||
TWITTER_RE = re.compile(r'https?://(?:www\.|mobile\.)?twitter\.com/(\w+)/status(?:es)?/(\w+)')
|
TWITTER_RE = re.compile(
|
||||||
|
r'https?://(?:www\.|mobile\.)?twitter\.com/(\w+)/status(?:es)?/(\w+)')
|
||||||
|
|
||||||
app = celery.Celery('woodwind')
|
app = celery.Celery('woodwind')
|
||||||
app.config_from_object('celeryconfig')
|
app.config_from_object('celeryconfig')
|
||||||
|
@ -57,17 +59,25 @@ def update_feed(feed_id):
|
||||||
with session_scope() as session:
|
with session_scope() as session:
|
||||||
feed = session.query(Feed).get(feed_id)
|
feed = session.query(Feed).get(feed_id)
|
||||||
logger.info('Updating {}'.format(feed))
|
logger.info('Updating {}'.format(feed))
|
||||||
process_feed_for_new_entries(session, feed)
|
process_feed(session, feed)
|
||||||
|
|
||||||
|
|
||||||
def process_feed_for_new_entries(session, feed):
|
def process_feed(session, feed):
|
||||||
now = datetime.datetime.utcnow()
|
now = datetime.datetime.utcnow()
|
||||||
found_new = False
|
found_new = False
|
||||||
try:
|
try:
|
||||||
|
logger.info('fetching feed: %s', feed)
|
||||||
|
response = requests.get(feed.feed)
|
||||||
|
if response.status_code // 100 != 2:
|
||||||
|
logger.warn('bad response from %s. %r: %r', feed.feed,
|
||||||
|
response, response.text)
|
||||||
|
return
|
||||||
|
|
||||||
|
check_push_subscription(session, feed, response)
|
||||||
if feed.type == 'xml':
|
if feed.type == 'xml':
|
||||||
result = process_xml_feed_for_new_entries(session, feed)
|
result = process_xml_feed_for_new_entries(session, feed, response)
|
||||||
elif feed.type == 'html':
|
elif feed.type == 'html':
|
||||||
result = process_html_feed_for_new_entries(session, feed)
|
result = process_html_feed_for_new_entries(session, feed, response)
|
||||||
else:
|
else:
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
|
@ -101,6 +111,44 @@ def process_feed_for_new_entries(session, feed):
|
||||||
feed.last_updated = now
|
feed.last_updated = now
|
||||||
|
|
||||||
|
|
||||||
|
def check_push_subscription(session, feed, response):
|
||||||
|
def build_callback_url():
|
||||||
|
return '{}://{}/_notify/{}'.format(
|
||||||
|
getattr(Config, 'PREFERRED_URL_SCHEME', 'http'),
|
||||||
|
Config.SERVER_NAME,
|
||||||
|
feed.id)
|
||||||
|
|
||||||
|
def send_request(mode, hub, topic):
|
||||||
|
logger.debug(
|
||||||
|
'sending %s request for hub=%r, topic=%r', mode, hub, topic)
|
||||||
|
r = requests.post(hub, data={
|
||||||
|
'hub.mode': mode,
|
||||||
|
'hub.topic': topic,
|
||||||
|
'hub.callback': build_callback_url(),
|
||||||
|
# TODO secret should only be used over HTTPS
|
||||||
|
# 'hub.secret': secret,
|
||||||
|
})
|
||||||
|
logger.debug('%s response %r', mode, r)
|
||||||
|
|
||||||
|
old_hub = feed.push_hub
|
||||||
|
old_topic = feed.push_topic
|
||||||
|
hub = response.links.get('hub', {}).get('url')
|
||||||
|
topic = response.links.get('self', {}).get('url')
|
||||||
|
|
||||||
|
if hub != old_hub or topic != old_topic or not feed.push_verified:
|
||||||
|
feed.push_hub = hub
|
||||||
|
feed.push_topic = topic
|
||||||
|
feed.push_verified = False
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
if old_hub and old_topic:
|
||||||
|
send_request('unsubscribe', old_hub, old_topic)
|
||||||
|
|
||||||
|
if hub and topic:
|
||||||
|
send_request('subscribe', hub, topic)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def is_content_equal(e1, e2):
|
def is_content_equal(e1, e2):
|
||||||
"""The criteria for determining if an entry that we've seen before
|
"""The criteria for determining if an entry that we've seen before
|
||||||
has been updated. If any of these fields have changed, we'll scrub the
|
has been updated. If any of these fields have changed, we'll scrub the
|
||||||
|
@ -114,12 +162,11 @@ def is_content_equal(e1, e2):
|
||||||
and e1.properties == e2.properties)
|
and e1.properties == e2.properties)
|
||||||
|
|
||||||
|
|
||||||
def process_xml_feed_for_new_entries(session, feed):
|
def process_xml_feed_for_new_entries(session, feed, response):
|
||||||
logger.debug('fetching xml feed: %s', feed)
|
logger.debug('fetching xml feed: %s', feed)
|
||||||
|
|
||||||
now = datetime.datetime.utcnow()
|
now = datetime.datetime.utcnow()
|
||||||
parsed = feedparser.parse(feed.feed)
|
parsed = feedparser.parse(get_response_content(response))
|
||||||
|
|
||||||
feed_props = parsed.get('feed', {})
|
feed_props = parsed.get('feed', {})
|
||||||
default_author_url = feed_props.get('author_detail', {}).get('href')
|
default_author_url = feed_props.get('author_detail', {}).get('href')
|
||||||
default_author_name = feed_props.get('author_detail', {}).get('name')
|
default_author_name = feed_props.get('author_detail', {}).get('name')
|
||||||
|
@ -178,11 +225,10 @@ def process_xml_feed_for_new_entries(session, feed):
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
|
|
||||||
def process_html_feed_for_new_entries(session, feed):
|
def process_html_feed_for_new_entries(session, feed, response):
|
||||||
logger.info('fetching html feed: %s', feed)
|
doc = get_response_content(response)
|
||||||
|
|
||||||
parsed = mf2util.interpret_feed(
|
parsed = mf2util.interpret_feed(
|
||||||
mf2py.Parser(url=feed.feed).to_dict(), feed.feed)
|
mf2py.Parser(url=feed.feed, doc=doc).to_dict(), feed.feed)
|
||||||
hfeed = parsed.get('entries', [])
|
hfeed = parsed.get('entries', [])
|
||||||
|
|
||||||
for hentry in hfeed:
|
for hentry in hfeed:
|
||||||
|
@ -271,3 +317,12 @@ def fallback_photo(url):
|
||||||
"""Use favatar to find an appropriate photo for any URL"""
|
"""Use favatar to find an appropriate photo for any URL"""
|
||||||
domain = urllib.parse.urlparse(url).netloc
|
domain = urllib.parse.urlparse(url).netloc
|
||||||
return 'http://www.google.com/s2/favicons?domain=' + domain
|
return 'http://www.google.com/s2/favicons?domain=' + domain
|
||||||
|
|
||||||
|
|
||||||
|
def get_response_content(response):
|
||||||
|
"""Kartik's trick for handling responses that don't specify their
|
||||||
|
encoding. Response.text will guess badly if they don't.
|
||||||
|
"""
|
||||||
|
if 'charset' not in response.headers.get('content-type', ''):
|
||||||
|
return response.content
|
||||||
|
return response.text
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue