broken: trying to put all entries into one transaction and now getting all sorts of detached exceptions. rolling back
This commit is contained in:
parent
b3c3e47263
commit
ddb75f5993
2 changed files with 52 additions and 32 deletions
|
@ -36,7 +36,11 @@ def configure_logging(app):
|
|||
return
|
||||
|
||||
app.logger.setLevel(logging.DEBUG)
|
||||
app.logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
app.logger.addHandler(handler)
|
||||
|
||||
recipients = app.config.get('ADMIN_EMAILS')
|
||||
if recipients:
|
||||
|
|
|
@ -4,6 +4,7 @@ from redis import StrictRedis
|
|||
from woodwind import util
|
||||
from woodwind.extensions import db
|
||||
from woodwind.models import Feed, Entry
|
||||
import sqlalchemy
|
||||
import bs4
|
||||
import datetime
|
||||
import feedparser
|
||||
|
@ -112,12 +113,12 @@ def update_feed(feed_id, content=None,
|
|||
|
||||
with flask_app() as app:
|
||||
feed = Feed.query.get(feed_id)
|
||||
current_app.logger.info('Updating {}'.format(feed))
|
||||
current_app.logger.info('Updating {}'.format(str(feed)[:32]))
|
||||
|
||||
now = datetime.datetime.utcnow()
|
||||
|
||||
new_ids = []
|
||||
updated_ids = []
|
||||
new_entries = []
|
||||
updated_entries = []
|
||||
reply_pairs = []
|
||||
|
||||
try:
|
||||
|
@ -125,7 +126,7 @@ def update_feed(feed_id, content=None,
|
|||
current_app.logger.info('using provided content. size=%d',
|
||||
len(content))
|
||||
else:
|
||||
current_app.logger.info('fetching feed: %s', feed)
|
||||
current_app.logger.info('fetching feed: %s', str(feed)[:32])
|
||||
|
||||
try:
|
||||
response = util.requests_get(feed.feed)
|
||||
|
@ -163,24 +164,30 @@ def update_feed(feed_id, content=None,
|
|||
result = []
|
||||
|
||||
for entry in result:
|
||||
current_app.logger.debug('searching for entry with uid=%s', entry.uid)
|
||||
old = Entry.query\
|
||||
.filter(Entry.feed == feed)\
|
||||
.filter(Entry.uid == entry.uid)\
|
||||
.order_by(Entry.id.desc())\
|
||||
.first()
|
||||
current_app.logger.debug('done searcing: %s', 'found' if old else 'not found')
|
||||
|
||||
# have we seen this post before
|
||||
if not old:
|
||||
current_app.logger.debug('this is a new post, saving a new entry')
|
||||
# set a default value for published if none is provided
|
||||
entry.published = entry.published or now
|
||||
in_reply_tos = entry.get_property('in-reply-to', [])
|
||||
db.session.add(entry)
|
||||
feed.entries.append(entry)
|
||||
db.session.commit()
|
||||
|
||||
new_ids.append(entry.id)
|
||||
new_entries.append(entry)
|
||||
for irt in in_reply_tos:
|
||||
reply_pairs.append((entry.id, irt))
|
||||
reply_pairs.append((entry, irt))
|
||||
|
||||
elif not is_content_equal(old, entry):
|
||||
current_app.logger.debug('this post content has changed, updating entry')
|
||||
|
||||
entry.published = entry.published or old.published
|
||||
in_reply_tos = entry.get_property('in-reply-to', [])
|
||||
# we're updating an old entriy, use the original
|
||||
|
@ -190,28 +197,38 @@ def update_feed(feed_id, content=None,
|
|||
# punt on deleting for now, learn about cascade
|
||||
# and stuff later
|
||||
# session.delete(old)
|
||||
db.session.add(entry)
|
||||
feed.entries.append(entry)
|
||||
db.session.commit()
|
||||
|
||||
updated_ids.append(entry.id)
|
||||
updated_entries.append(entry)
|
||||
for irt in in_reply_tos:
|
||||
reply_pairs.append((entry.id, irt))
|
||||
reply_pairs.append((entry, irt))
|
||||
|
||||
else:
|
||||
current_app.logger.debug(
|
||||
'skipping previously seen post %s', old.permalink)
|
||||
|
||||
for entry_id, in_reply_to in reply_pairs:
|
||||
fetch_reply_context(entry_id, in_reply_to, now)
|
||||
for entry, in_reply_to in reply_pairs:
|
||||
fetch_reply_context(entry, in_reply_to, now)
|
||||
|
||||
db.session.commit()
|
||||
except:
|
||||
db.session.rollback()
|
||||
raise
|
||||
|
||||
finally:
|
||||
if is_polling:
|
||||
feed.last_checked = now
|
||||
if new_ids or updated_ids:
|
||||
if new_entries or updated_entries:
|
||||
feed.last_updated = now
|
||||
db.session.commit()
|
||||
if new_ids:
|
||||
notify_feed_updated(app, feed_id, new_ids)
|
||||
|
||||
if new_entries:
|
||||
|
||||
for e in new_entries:
|
||||
current_app.logger.debug('entry %s state: %s', e.uid, sqlalchemy.inspect(e))
|
||||
|
||||
notify_feed_updated(app, feed_id, new_entries)
|
||||
|
||||
|
||||
def check_push_subscription(feed, response):
|
||||
|
@ -267,6 +284,8 @@ def check_push_subscription(feed, response):
|
|||
if ((expiry and expiry - datetime.datetime.utcnow()
|
||||
<= UPDATE_INTERVAL_PUSH)
|
||||
or hub != old_hub or topic != old_topic or not feed.push_verified):
|
||||
current_app.logger.debug('push subscription expired or hub/topic changed')
|
||||
|
||||
feed.push_hub = hub
|
||||
feed.push_topic = topic
|
||||
feed.push_verified = False
|
||||
|
@ -274,29 +293,24 @@ def check_push_subscription(feed, response):
|
|||
db.session.commit()
|
||||
|
||||
if old_hub and old_topic and hub != old_hub and topic != old_topic:
|
||||
current_app.logger.debug('unsubscribing hub=%s, topic=%s', old_hub, old_topic)
|
||||
send_request('unsubscribe', old_hub, old_topic)
|
||||
|
||||
if hub and topic:
|
||||
current_app.logger.debug('subscribing hub=%s, topic=%s', hub, topic)
|
||||
send_request('subscribe', hub, topic)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def notify_feed_updated(app, feed_id, entry_ids):
|
||||
def notify_feed_updated(app, feed_id, entries):
|
||||
"""Render the new entries and publish them to redis
|
||||
"""
|
||||
from flask import render_template
|
||||
import flask.ext.login as flask_login
|
||||
current_app.logger.debug(
|
||||
'notifying feed updated for entries %r', entry_ids)
|
||||
current_app.logger.debug('notifying feed updated: %s', feed_id)
|
||||
|
||||
feed = Feed.query.get(feed_id)
|
||||
entries = Entry.query\
|
||||
.filter(Entry.id.in_(entry_ids))\
|
||||
.order_by(Entry.retrieved.desc(),
|
||||
Entry.published.desc())\
|
||||
.all()
|
||||
|
||||
for s in feed.subscriptions:
|
||||
with app.test_request_context():
|
||||
flask_login.login_user(s.user, remember=True)
|
||||
|
@ -345,7 +359,7 @@ def is_content_equal(e1, e2):
|
|||
|
||||
|
||||
def process_xml_feed_for_new_entries(feed, content, backfill, now):
|
||||
current_app.logger.debug('fetching xml feed: %s', feed)
|
||||
current_app.logger.debug('fetching xml feed: %s', str(feed)[:32])
|
||||
parsed = feedparser.parse(content, response_headers={
|
||||
'content-location': feed.feed,
|
||||
})
|
||||
|
@ -354,12 +368,11 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
|
|||
default_author_name = feed_props.get('author_detail', {}).get('name')
|
||||
default_author_photo = feed_props.get('logo')
|
||||
|
||||
current_app.logger.debug('found {} entries'.format(len(parsed.entries)))
|
||||
current_app.logger.debug('found %d entries', len(parsed.entries))
|
||||
|
||||
# work from the bottom up (oldest first, usually)
|
||||
for p_entry in reversed(parsed.entries):
|
||||
current_app.logger.debug('processing entry {}'.format(
|
||||
str(p_entry)[:256]))
|
||||
current_app.logger.debug('processing entry %s', str(p_entry)[:32])
|
||||
permalink = p_entry.get('link')
|
||||
uid = p_entry.get('id') or permalink
|
||||
|
||||
|
@ -406,6 +419,8 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
|
|||
video = VIDEO_ENCLOSURE_TMPL.format(href=link.get('href'))
|
||||
content = (content or '') + video
|
||||
|
||||
current_app.logger.debug('building entry')
|
||||
|
||||
entry = Entry(
|
||||
published=published,
|
||||
updated=updated,
|
||||
|
@ -422,6 +437,8 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
|
|||
author_photo=default_author_photo
|
||||
or fallback_photo(feed.origin))
|
||||
|
||||
current_app.logger.debug('yielding entry')
|
||||
|
||||
yield entry
|
||||
|
||||
|
||||
|
@ -527,9 +544,8 @@ def hentry_to_entry(hentry, feed, backfill, now):
|
|||
return entry
|
||||
|
||||
|
||||
def fetch_reply_context(entry_id, in_reply_to, now):
|
||||
def fetch_reply_context(entry, in_reply_to, now):
|
||||
with flask_app():
|
||||
entry = Entry.query.get(entry_id)
|
||||
context = Entry.query\
|
||||
.join(Entry.feed)\
|
||||
.filter(Entry.permalink==in_reply_to, Feed.type == 'html')\
|
||||
|
@ -542,10 +558,10 @@ def fetch_reply_context(entry_id, in_reply_to, now):
|
|||
mf2py.parse(url=proxy_url(in_reply_to)), in_reply_to)
|
||||
if parsed:
|
||||
context = hentry_to_entry(parsed, None, False, now)
|
||||
db.session.add(context)
|
||||
|
||||
if context:
|
||||
entry.reply_context.append(context)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def proxy_url(url):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue