From 47deaa2b6bc2ef377fa5558de3d13f18d0ae44dd Mon Sep 17 00:00:00 2001 From: Kyle Mahan Date: Thu, 27 Aug 2015 22:47:41 -0700 Subject: [PATCH] keep track of feeds that are failing, and don't poll them so often --- requirements.txt | 10 +---- woodwind/models.py | 9 ++-- woodwind/tasks.py | 58 +++++++++++++++++++------ woodwind/templates/subscriptions.jinja2 | 1 + 4 files changed, 53 insertions(+), 25 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4b7dea2..f7b5d04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,32 +1,24 @@ -Flask==0.10.1 Flask-DebugToolbar==0.10.0 Flask-Login==0.2.11 Flask-Micropub==0.2.2 Flask-SQLAlchemy==2.0 - +Flask==0.10.1 Jinja2==2.7.3 MarkupSafe==0.23 SQLAlchemy==1.0.0 Werkzeug==0.10.4 - asyncio-redis==0.13.4 beautifulsoup4==4.3.2 bleach==1.4.1 - feedparser>=5.2.0 html5lib==0.99999 - mf2py==0.2.7 mf2util==0.2.3 psycopg2==2.6 - pyOpenSSL==0.15.1 pyasn1==0.1.7 - redis==2.10.3 requests==2.7.0 - rq==0.5.2 - uWSGI==2.0.10 websockets==2.4 diff --git a/woodwind/models.py b/woodwind/models.py index 1e117b1..d092a2d 100644 --- a/woodwind/models.py +++ b/woodwind/models.py @@ -1,6 +1,6 @@ from .extensions import db -from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.dialects.postgresql import JSON import uuid @@ -16,7 +16,7 @@ class User(db.Model): # domain = db.Column(db.String(256)) micropub_endpoint = db.Column(db.String(512)) access_token = db.Column(db.String(512)) - settings = db.Column(JSONB) + settings = db.Column(JSON) # Flask-Login integration def is_authenticated(self): @@ -76,6 +76,9 @@ class Feed(db.Model): push_secret = db.Column(db.String(200)) last_pinged = db.Column(db.DateTime) + last_response = db.Column(db.Text) + failure_count = db.Column(db.Integer) + def get_feed_code(self): return self.feed # binascii.hexlify(self.feed.encode()) @@ -117,7 +120,7 @@ class Entry(db.Model): content = db.Column(db.Text) content_cleaned = db.Column(db.Text) # other properties - properties = db.Column(JSONB) + properties = db.Column(JSON) reply_context = db.relationship( 'Entry', secondary='entry_to_reply_context', primaryjoin=id == entry_to_reply_context.c.entry_id, diff --git a/woodwind/tasks.py b/woodwind/tasks.py index 25e10e2..3ec2056 100644 --- a/woodwind/tasks.py +++ b/woodwind/tasks.py @@ -1,9 +1,9 @@ from contextlib import contextmanager +from flask import current_app from redis import StrictRedis from woodwind import util -from woodwind.models import Feed, Entry from woodwind.extensions import db -from flask import current_app +from woodwind.models import Feed, Entry import bs4 import datetime import feedparser @@ -14,6 +14,7 @@ import mf2util import re import requests import rq +import sys import time import urllib.parse @@ -58,17 +59,32 @@ def tick(): Makes use of uWSGI timers to run every 5 minutes, without needing a separate process to fire ticks. """ + def should_update(feed, now): + if not feed.last_checked: + return True + + if feed.failure_count > 8: + update_interval = datetime.timedelta(days=1) + elif feed.failure_count > 4: + update_interval = datetime.timedelta(hours=8) + elif feed.failure_count > 2: + update_interval = datetime.timedelta(hours=4) + else: + update_interval = UPDATE_INTERVAL + + # PuSH feeds don't need to poll very frequently + if feed.push_verified: + update_interval = max(update_interval, UPDATE_INTERVAL_PUSH) + + return now - feed.last_checked > update_interval + with flask_app(): now = datetime.datetime.utcnow() current_app.logger.info('Tick {}'.format(now)) for feed in Feed.query.all(): - current_app.logger.debug('Feed {} last checked {}'.format( - feed, feed.last_checked)) - if (not feed.last_checked - or (not feed.push_verified - and now - feed.last_checked > UPDATE_INTERVAL) - or (feed.push_verified - and now - feed.last_checked > UPDATE_INTERVAL_PUSH)): + current_app.logger.debug( + 'Feed %s last checked %s', feed, feed.last_checked) + if should_update(feed, now): q.enqueue(update_feed, feed.id) @@ -105,11 +121,27 @@ def update_feed(feed_id, content=None, len(content)) else: current_app.logger.info('fetching feed: %s', feed) - response = util.requests_get(feed.feed) - if response.status_code // 100 != 2: - current_app.logger.warn('bad response from %s. %r: %r', - feed.feed, response, response.text) + + try: + response = util.requests_get(feed.feed) + except: + feed.last_response = 'exception while retrieving: {}'.format( + sys.exc_info()[0]) + feed.failure_count += 1 return + + if response.status_code // 100 != 2: + current_app.logger.warn( + 'bad response from %s. %r: %r', feed.feed, response, + response.text) + feed.last_response = 'bad response while retrieving: {}: {}'.format( + response, response.text) + feed.failure_count += 1 + return + + feed.failure_count = 0 + feed.last_response = 'success: {}'.format(response) + if is_polling: check_push_subscription(feed, response) content = get_response_content(response) diff --git a/woodwind/templates/subscriptions.jinja2 b/woodwind/templates/subscriptions.jinja2 index e556ae5..7a83260 100644 --- a/woodwind/templates/subscriptions.jinja2 +++ b/woodwind/templates/subscriptions.jinja2 @@ -48,6 +48,7 @@