keep track of feeds that are failing, and don't poll them so often

This commit is contained in:
Kyle Mahan 2015-08-27 22:47:41 -07:00
parent aa7f8ace80
commit 47deaa2b6b
4 changed files with 53 additions and 25 deletions

View file

@ -1,32 +1,24 @@
Flask==0.10.1
Flask-DebugToolbar==0.10.0 Flask-DebugToolbar==0.10.0
Flask-Login==0.2.11 Flask-Login==0.2.11
Flask-Micropub==0.2.2 Flask-Micropub==0.2.2
Flask-SQLAlchemy==2.0 Flask-SQLAlchemy==2.0
Flask==0.10.1
Jinja2==2.7.3 Jinja2==2.7.3
MarkupSafe==0.23 MarkupSafe==0.23
SQLAlchemy==1.0.0 SQLAlchemy==1.0.0
Werkzeug==0.10.4 Werkzeug==0.10.4
asyncio-redis==0.13.4 asyncio-redis==0.13.4
beautifulsoup4==4.3.2 beautifulsoup4==4.3.2
bleach==1.4.1 bleach==1.4.1
feedparser>=5.2.0 feedparser>=5.2.0
html5lib==0.99999 html5lib==0.99999
mf2py==0.2.7 mf2py==0.2.7
mf2util==0.2.3 mf2util==0.2.3
psycopg2==2.6 psycopg2==2.6
pyOpenSSL==0.15.1 pyOpenSSL==0.15.1
pyasn1==0.1.7 pyasn1==0.1.7
redis==2.10.3 redis==2.10.3
requests==2.7.0 requests==2.7.0
rq==0.5.2 rq==0.5.2
uWSGI==2.0.10 uWSGI==2.0.10
websockets==2.4 websockets==2.4

View file

@ -1,6 +1,6 @@
from .extensions import db from .extensions import db
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSON
import uuid import uuid
@ -16,7 +16,7 @@ class User(db.Model):
# domain = db.Column(db.String(256)) # domain = db.Column(db.String(256))
micropub_endpoint = db.Column(db.String(512)) micropub_endpoint = db.Column(db.String(512))
access_token = db.Column(db.String(512)) access_token = db.Column(db.String(512))
settings = db.Column(JSONB) settings = db.Column(JSON)
# Flask-Login integration # Flask-Login integration
def is_authenticated(self): def is_authenticated(self):
@ -76,6 +76,9 @@ class Feed(db.Model):
push_secret = db.Column(db.String(200)) push_secret = db.Column(db.String(200))
last_pinged = db.Column(db.DateTime) last_pinged = db.Column(db.DateTime)
last_response = db.Column(db.Text)
failure_count = db.Column(db.Integer)
def get_feed_code(self): def get_feed_code(self):
return self.feed # binascii.hexlify(self.feed.encode()) return self.feed # binascii.hexlify(self.feed.encode())
@ -117,7 +120,7 @@ class Entry(db.Model):
content = db.Column(db.Text) content = db.Column(db.Text)
content_cleaned = db.Column(db.Text) content_cleaned = db.Column(db.Text)
# other properties # other properties
properties = db.Column(JSONB) properties = db.Column(JSON)
reply_context = db.relationship( reply_context = db.relationship(
'Entry', secondary='entry_to_reply_context', 'Entry', secondary='entry_to_reply_context',
primaryjoin=id == entry_to_reply_context.c.entry_id, primaryjoin=id == entry_to_reply_context.c.entry_id,

View file

@ -1,9 +1,9 @@
from contextlib import contextmanager from contextlib import contextmanager
from flask import current_app
from redis import StrictRedis from redis import StrictRedis
from woodwind import util from woodwind import util
from woodwind.models import Feed, Entry
from woodwind.extensions import db from woodwind.extensions import db
from flask import current_app from woodwind.models import Feed, Entry
import bs4 import bs4
import datetime import datetime
import feedparser import feedparser
@ -14,6 +14,7 @@ import mf2util
import re import re
import requests import requests
import rq import rq
import sys
import time import time
import urllib.parse import urllib.parse
@ -58,17 +59,32 @@ def tick():
Makes use of uWSGI timers to run every 5 minutes, without needing Makes use of uWSGI timers to run every 5 minutes, without needing
a separate process to fire ticks. a separate process to fire ticks.
""" """
def should_update(feed, now):
if not feed.last_checked:
return True
if feed.failure_count > 8:
update_interval = datetime.timedelta(days=1)
elif feed.failure_count > 4:
update_interval = datetime.timedelta(hours=8)
elif feed.failure_count > 2:
update_interval = datetime.timedelta(hours=4)
else:
update_interval = UPDATE_INTERVAL
# PuSH feeds don't need to poll very frequently
if feed.push_verified:
update_interval = max(update_interval, UPDATE_INTERVAL_PUSH)
return now - feed.last_checked > update_interval
with flask_app(): with flask_app():
now = datetime.datetime.utcnow() now = datetime.datetime.utcnow()
current_app.logger.info('Tick {}'.format(now)) current_app.logger.info('Tick {}'.format(now))
for feed in Feed.query.all(): for feed in Feed.query.all():
current_app.logger.debug('Feed {} last checked {}'.format( current_app.logger.debug(
feed, feed.last_checked)) 'Feed %s last checked %s', feed, feed.last_checked)
if (not feed.last_checked if should_update(feed, now):
or (not feed.push_verified
and now - feed.last_checked > UPDATE_INTERVAL)
or (feed.push_verified
and now - feed.last_checked > UPDATE_INTERVAL_PUSH)):
q.enqueue(update_feed, feed.id) q.enqueue(update_feed, feed.id)
@ -105,11 +121,27 @@ def update_feed(feed_id, content=None,
len(content)) len(content))
else: else:
current_app.logger.info('fetching feed: %s', feed) current_app.logger.info('fetching feed: %s', feed)
response = util.requests_get(feed.feed)
if response.status_code // 100 != 2: try:
current_app.logger.warn('bad response from %s. %r: %r', response = util.requests_get(feed.feed)
feed.feed, response, response.text) except:
feed.last_response = 'exception while retrieving: {}'.format(
sys.exc_info()[0])
feed.failure_count += 1
return return
if response.status_code // 100 != 2:
current_app.logger.warn(
'bad response from %s. %r: %r', feed.feed, response,
response.text)
feed.last_response = 'bad response while retrieving: {}: {}'.format(
response, response.text)
feed.failure_count += 1
return
feed.failure_count = 0
feed.last_response = 'success: {}'.format(response)
if is_polling: if is_polling:
check_push_subscription(feed, response) check_push_subscription(feed, response)
content = get_response_content(response) content = get_response_content(response)

View file

@ -48,6 +48,7 @@
<ul> <ul>
<li>Last checked: {{s.feed.last_checked | relative_time}}</li> <li>Last checked: {{s.feed.last_checked | relative_time}}</li>
<li>Last updated: {{s.feed.last_updated | relative_time}}</li> <li>Last updated: {{s.feed.last_updated | relative_time}}</li>
<li>Last response: {{s.feed.last_response | e}}</li>
<li>PuSH hub: {{s.feed.push_hub}}</li> <li>PuSH hub: {{s.feed.push_hub}}</li>
<li>PuSH topic: {{s.feed.push_topic}}</li> <li>PuSH topic: {{s.feed.push_topic}}</li>
<li>PuSH verified: {{s.feed.push_verified}}</li> <li>PuSH verified: {{s.feed.push_verified}}</li>