bundle up preexisting entry queries to do all at once instead of doing 30 individual queries
trying to alleviate the frequent timeouts fetching a bunch of tweets
This commit is contained in:
parent
c8f7213b39
commit
9a927e33be
1 changed files with 66 additions and 60 deletions
|
@ -164,14 +164,23 @@ def update_feed(feed_id, content=None,
|
||||||
else:
|
else:
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
|
# realize list, only look at the first 30 entries
|
||||||
|
result = list(itertools.islice(result, 30))
|
||||||
|
|
||||||
|
old_entries = {}
|
||||||
|
all_uids = [e.uid for e in result]
|
||||||
|
if all_uids:
|
||||||
|
for entry in (Entry.query
|
||||||
|
.filter(Entry.feed == feed,
|
||||||
|
Entry.uid.in_(all_uids))
|
||||||
|
.order_by(Entry.id.desc())):
|
||||||
|
old_entries[entry.uid] = entry
|
||||||
|
|
||||||
for entry in result:
|
for entry in result:
|
||||||
current_app.logger.debug('searching for entry with uid=%s', entry.uid)
|
old = old_entries.get(entry.uid)
|
||||||
old = Entry.query\
|
current_app.logger.debug(
|
||||||
.filter(Entry.feed == feed)\
|
'entry for uid %s: %s', entry.uid,
|
||||||
.filter(Entry.uid == entry.uid)\
|
'found' if old else 'not found')
|
||||||
.order_by(Entry.id.desc())\
|
|
||||||
.first()
|
|
||||||
current_app.logger.debug('done searcing: %s', 'found' if old else 'not found')
|
|
||||||
|
|
||||||
# have we seen this post before
|
# have we seen this post before
|
||||||
if not old:
|
if not old:
|
||||||
|
@ -209,9 +218,7 @@ def update_feed(feed_id, content=None,
|
||||||
current_app.logger.debug(
|
current_app.logger.debug(
|
||||||
'skipping previously seen post %s', old.permalink)
|
'skipping previously seen post %s', old.permalink)
|
||||||
|
|
||||||
for entry, in_reply_to in reply_pairs:
|
fetch_reply_contexts(reply_pairs, now)
|
||||||
fetch_reply_context(entry, in_reply_to, now)
|
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
except:
|
except:
|
||||||
db.session.rollback()
|
db.session.rollback()
|
||||||
|
@ -344,15 +351,15 @@ def is_content_equal(e1, e2):
|
||||||
return content
|
return content
|
||||||
|
|
||||||
return (
|
return (
|
||||||
e1.title == e2.title
|
e1.title == e2.title and
|
||||||
and normalize(e1.content) == normalize(e2.content)
|
normalize(e1.content) == normalize(e2.content) and
|
||||||
and e1.author_name == e2.author_name
|
e1.author_name == e2.author_name and
|
||||||
and e1.author_url == e2.author_url
|
e1.author_url == e2.author_url and
|
||||||
and e1.author_photo == e2.author_photo
|
e1.author_photo == e2.author_photo and
|
||||||
and e1.properties == e2.properties
|
e1.properties == e2.properties and
|
||||||
and e1.published == e2.published
|
e1.published == e2.published and
|
||||||
and e1.updated == e2.updated
|
e1.updated == e2.updated and
|
||||||
and e1.deleted == e2.deleted
|
e1.deleted == e2.deleted
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -411,15 +418,12 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
|
||||||
if link.type == 'audio/mpeg' or link.type == 'audio/mp3':
|
if link.type == 'audio/mpeg' or link.type == 'audio/mp3':
|
||||||
audio = AUDIO_ENCLOSURE_TMPL.format(href=link.get('href'))
|
audio = AUDIO_ENCLOSURE_TMPL.format(href=link.get('href'))
|
||||||
content = (content or '') + audio
|
content = (content or '') + audio
|
||||||
if (link.type == 'video/x-m4v'
|
if (link.type == 'video/x-m4v' or link.type == 'video/x-mp4' or
|
||||||
or link.type == 'video/x-mp4'
|
link.type == 'video/mp4'):
|
||||||
or link.type == 'video/mp4'):
|
|
||||||
video = VIDEO_ENCLOSURE_TMPL.format(href=link.get('href'))
|
video = VIDEO_ENCLOSURE_TMPL.format(href=link.get('href'))
|
||||||
content = (content or '') + video
|
content = (content or '') + video
|
||||||
|
|
||||||
current_app.logger.debug('building entry')
|
yield Entry(
|
||||||
|
|
||||||
entry = Entry(
|
|
||||||
published=published,
|
published=published,
|
||||||
updated=updated,
|
updated=updated,
|
||||||
uid=uid,
|
uid=uid,
|
||||||
|
@ -428,16 +432,12 @@ def process_xml_feed_for_new_entries(feed, content, backfill, now):
|
||||||
title=p_entry.get('title'),
|
title=p_entry.get('title'),
|
||||||
content=content,
|
content=content,
|
||||||
content_cleaned=util.clean(content),
|
content_cleaned=util.clean(content),
|
||||||
author_name=p_entry.get('author_detail', {}).get('name')
|
author_name=p_entry.get('author_detail', {}).get('name') or
|
||||||
or default_author_name,
|
default_author_name,
|
||||||
author_url=p_entry.get('author_detail', {}).get('href')
|
author_url=p_entry.get('author_detail', {}).get('href') or
|
||||||
or default_author_url,
|
default_author_url,
|
||||||
author_photo=default_author_photo
|
author_photo=default_author_photo or
|
||||||
or fallback_photo(feed.origin))
|
fallback_photo(feed.origin))
|
||||||
|
|
||||||
current_app.logger.debug('yielding entry')
|
|
||||||
|
|
||||||
yield entry
|
|
||||||
|
|
||||||
|
|
||||||
def process_html_feed_for_new_entries(feed, content, backfill, now):
|
def process_html_feed_for_new_entries(feed, content, backfill, now):
|
||||||
|
@ -475,8 +475,8 @@ def process_html_feed_for_new_entries(feed, content, backfill, now):
|
||||||
|
|
||||||
def hentry_to_entry(hentry, feed, backfill, now):
|
def hentry_to_entry(hentry, feed, backfill, now):
|
||||||
def normalize_datetime(dt):
|
def normalize_datetime(dt):
|
||||||
if (dt and hasattr(dt, 'year') and hasattr(dt, 'month')
|
if (dt and hasattr(dt, 'year') and hasattr(dt, 'month') and
|
||||||
and hasattr(dt, 'day')):
|
hasattr(dt, 'day')):
|
||||||
# make sure published is in UTC and strip the timezone
|
# make sure published is in UTC and strip the timezone
|
||||||
if hasattr(dt, 'tzinfo') and dt.tzinfo:
|
if hasattr(dt, 'tzinfo') and dt.tzinfo:
|
||||||
return dt.astimezone(datetime.timezone.utc).replace(
|
return dt.astimezone(datetime.timezone.utc).replace(
|
||||||
|
@ -570,34 +570,40 @@ def hentry_to_entry(hentry, feed, backfill, now):
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
|
||||||
def fetch_reply_context(entry, in_reply_to, now):
|
def fetch_reply_contexts(reply_pairs, now):
|
||||||
context = Entry.query\
|
old_contexts = {}
|
||||||
.join(Entry.feed)\
|
in_reply_tos = [url for _, url in reply_pairs]
|
||||||
.filter(Entry.permalink == in_reply_to, Feed.type == 'html')\
|
if in_reply_tos:
|
||||||
.first()
|
for entry in (Entry.query
|
||||||
|
.join(Entry.feed)
|
||||||
|
.filter(Entry.permalink.in_(in_reply_tos),
|
||||||
|
Feed.type == 'html')):
|
||||||
|
old_contexts[entry.permalink] = entry
|
||||||
|
|
||||||
if not context:
|
for entry, in_reply_to in reply_pairs:
|
||||||
current_app.logger.info('fetching in-reply-to: %s', in_reply_to)
|
context = old_contexts.get(in_reply_to)
|
||||||
try:
|
if not context:
|
||||||
proxied_reply_url = proxy_url(in_reply_to)
|
current_app.logger.info('fetching in-reply-to: %s', in_reply_to)
|
||||||
parsed = mf2util.interpret(
|
try:
|
||||||
mf2py.parse(url=proxied_reply_url), in_reply_to,
|
proxied_reply_url = proxy_url(in_reply_to)
|
||||||
fetch_mf2_func=lambda url: mf2py.parse(url=url))
|
parsed = mf2util.interpret(
|
||||||
if parsed:
|
mf2py.parse(url=proxied_reply_url), in_reply_to,
|
||||||
context = hentry_to_entry(parsed, None, False, now)
|
fetch_mf2_func=lambda url: mf2py.parse(url=url))
|
||||||
except requests.exceptions.RequestException as err:
|
if parsed:
|
||||||
current_app.logger.warn(
|
context = hentry_to_entry(parsed, None, False, now)
|
||||||
'%s fetching reply context: %s for entry: %s',
|
except requests.exceptions.RequestException as err:
|
||||||
type(err).__name__, proxied_reply_url, entry.permalink)
|
current_app.logger.warn(
|
||||||
|
'%s fetching reply context: %s for entry: %s',
|
||||||
|
type(err).__name__, proxied_reply_url, entry.permalink)
|
||||||
|
|
||||||
if context:
|
if context:
|
||||||
db.session.add(context)
|
db.session.add(context)
|
||||||
entry.reply_context.append(context)
|
entry.reply_context.append(context)
|
||||||
|
|
||||||
|
|
||||||
def proxy_url(url):
|
def proxy_url(url):
|
||||||
if ('TWITTER_AU_KEY' in current_app.config
|
if ('TWITTER_AU_KEY' in current_app.config and
|
||||||
and 'TWITTER_AU_SECRET' in current_app.config):
|
'TWITTER_AU_SECRET' in current_app.config):
|
||||||
# swap out the a-u url for twitter urls
|
# swap out the a-u url for twitter urls
|
||||||
match = TWITTER_RE.match(url)
|
match = TWITTER_RE.match(url)
|
||||||
if match:
|
if match:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue