keep track of feeds that are failing, and don't poll them so often

This commit is contained in:
Kyle Mahan 2015-08-27 22:47:41 -07:00
parent aa7f8ace80
commit 47deaa2b6b
4 changed files with 53 additions and 25 deletions

View file

@ -1,32 +1,24 @@
Flask==0.10.1
Flask-DebugToolbar==0.10.0
Flask-Login==0.2.11
Flask-Micropub==0.2.2
Flask-SQLAlchemy==2.0
Flask==0.10.1
Jinja2==2.7.3
MarkupSafe==0.23
SQLAlchemy==1.0.0
Werkzeug==0.10.4
asyncio-redis==0.13.4
beautifulsoup4==4.3.2
bleach==1.4.1
feedparser>=5.2.0
html5lib==0.99999
mf2py==0.2.7
mf2util==0.2.3
psycopg2==2.6
pyOpenSSL==0.15.1
pyasn1==0.1.7
redis==2.10.3
requests==2.7.0
rq==0.5.2
uWSGI==2.0.10
websockets==2.4

View file

@ -1,6 +1,6 @@
from .extensions import db
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import JSON
import uuid
@ -16,7 +16,7 @@ class User(db.Model):
# domain = db.Column(db.String(256))
micropub_endpoint = db.Column(db.String(512))
access_token = db.Column(db.String(512))
settings = db.Column(JSONB)
settings = db.Column(JSON)
# Flask-Login integration
def is_authenticated(self):
@ -76,6 +76,9 @@ class Feed(db.Model):
push_secret = db.Column(db.String(200))
last_pinged = db.Column(db.DateTime)
last_response = db.Column(db.Text)
failure_count = db.Column(db.Integer)
def get_feed_code(self):
return self.feed # binascii.hexlify(self.feed.encode())
@ -117,7 +120,7 @@ class Entry(db.Model):
content = db.Column(db.Text)
content_cleaned = db.Column(db.Text)
# other properties
properties = db.Column(JSONB)
properties = db.Column(JSON)
reply_context = db.relationship(
'Entry', secondary='entry_to_reply_context',
primaryjoin=id == entry_to_reply_context.c.entry_id,

View file

@ -1,9 +1,9 @@
from contextlib import contextmanager
from flask import current_app
from redis import StrictRedis
from woodwind import util
from woodwind.models import Feed, Entry
from woodwind.extensions import db
from flask import current_app
from woodwind.models import Feed, Entry
import bs4
import datetime
import feedparser
@ -14,6 +14,7 @@ import mf2util
import re
import requests
import rq
import sys
import time
import urllib.parse
@ -58,17 +59,32 @@ def tick():
Makes use of uWSGI timers to run every 5 minutes, without needing
a separate process to fire ticks.
"""
def should_update(feed, now):
if not feed.last_checked:
return True
if feed.failure_count > 8:
update_interval = datetime.timedelta(days=1)
elif feed.failure_count > 4:
update_interval = datetime.timedelta(hours=8)
elif feed.failure_count > 2:
update_interval = datetime.timedelta(hours=4)
else:
update_interval = UPDATE_INTERVAL
# PuSH feeds don't need to poll very frequently
if feed.push_verified:
update_interval = max(update_interval, UPDATE_INTERVAL_PUSH)
return now - feed.last_checked > update_interval
with flask_app():
now = datetime.datetime.utcnow()
current_app.logger.info('Tick {}'.format(now))
for feed in Feed.query.all():
current_app.logger.debug('Feed {} last checked {}'.format(
feed, feed.last_checked))
if (not feed.last_checked
or (not feed.push_verified
and now - feed.last_checked > UPDATE_INTERVAL)
or (feed.push_verified
and now - feed.last_checked > UPDATE_INTERVAL_PUSH)):
current_app.logger.debug(
'Feed %s last checked %s', feed, feed.last_checked)
if should_update(feed, now):
q.enqueue(update_feed, feed.id)
@ -105,11 +121,27 @@ def update_feed(feed_id, content=None,
len(content))
else:
current_app.logger.info('fetching feed: %s', feed)
response = util.requests_get(feed.feed)
if response.status_code // 100 != 2:
current_app.logger.warn('bad response from %s. %r: %r',
feed.feed, response, response.text)
try:
response = util.requests_get(feed.feed)
except:
feed.last_response = 'exception while retrieving: {}'.format(
sys.exc_info()[0])
feed.failure_count += 1
return
if response.status_code // 100 != 2:
current_app.logger.warn(
'bad response from %s. %r: %r', feed.feed, response,
response.text)
feed.last_response = 'bad response while retrieving: {}: {}'.format(
response, response.text)
feed.failure_count += 1
return
feed.failure_count = 0
feed.last_response = 'success: {}'.format(response)
if is_polling:
check_push_subscription(feed, response)
content = get_response_content(response)

View file

@ -48,6 +48,7 @@
<ul>
<li>Last checked: {{s.feed.last_checked | relative_time}}</li>
<li>Last updated: {{s.feed.last_updated | relative_time}}</li>
<li>Last response: {{s.feed.last_response | e}}</li>
<li>PuSH hub: {{s.feed.push_hub}}</li>
<li>PuSH topic: {{s.feed.push_topic}}</li>
<li>PuSH verified: {{s.feed.push_verified}}</li>