Merge branch 'master' of github.com:kylewm/woodwind
This commit is contained in:
commit
e259d6d4a6
3 changed files with 21 additions and 8 deletions
|
@ -102,7 +102,7 @@ def update_feed(feed_id, content=None,
|
||||||
len(content))
|
len(content))
|
||||||
else:
|
else:
|
||||||
current_app.logger.info('fetching feed: %s', feed)
|
current_app.logger.info('fetching feed: %s', feed)
|
||||||
response = requests.get(feed.feed)
|
response = util.requests_get(feed.feed)
|
||||||
if response.status_code // 100 != 2:
|
if response.status_code // 100 != 2:
|
||||||
current_app.logger.warn('bad response from %s. %r: %r',
|
current_app.logger.warn('bad response from %s. %r: %r',
|
||||||
feed.feed, response, response.text)
|
feed.feed, response, response.text)
|
||||||
|
@ -443,7 +443,10 @@ def hentry_to_entry(hentry, feed, backfill, now):
|
||||||
def fetch_reply_context(entry_id, in_reply_to, now):
|
def fetch_reply_context(entry_id, in_reply_to, now):
|
||||||
with flask_app():
|
with flask_app():
|
||||||
entry = Entry.query.get(entry_id)
|
entry = Entry.query.get(entry_id)
|
||||||
context = Entry.query.filter_by(permalink=in_reply_to).first()
|
context = Entry.query\
|
||||||
|
.join(Entry.feed)\
|
||||||
|
.filter(Entry.permalink==in_reply_to, Feed.type == 'html')\
|
||||||
|
.first()
|
||||||
|
|
||||||
if not context:
|
if not context:
|
||||||
current_app.logger.info('fetching in-reply-to url: %s',
|
current_app.logger.info('fetching in-reply-to url: %s',
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import bleach
|
import bleach
|
||||||
import re
|
import re
|
||||||
|
import requests
|
||||||
|
|
||||||
bleach.ALLOWED_TAGS += [
|
bleach.ALLOWED_TAGS += [
|
||||||
'a', 'img', 'p', 'br', 'marquee', 'blink',
|
'a', 'img', 'p', 'br', 'marquee', 'blink',
|
||||||
|
@ -14,7 +15,14 @@ bleach.ALLOWED_ATTRIBUTES.update({
|
||||||
'td': ['colspan'],
|
'td': ['colspan'],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
USER_AGENT = 'Woodwind (https://github.com/kylewm/woodwind)'
|
||||||
|
|
||||||
|
|
||||||
|
def requests_get(url, **kwargs):
|
||||||
|
kwargs.setdefault('headers', {})['User-Agent'] = USER_AGENT
|
||||||
|
return requests.get(url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def clean(text):
|
def clean(text):
|
||||||
"""Strip script tags and other possibly dangerous content
|
"""Strip script tags and other possibly dangerous content
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from . import tasks
|
from . import tasks, util
|
||||||
from .extensions import db, login_mgr, micropub
|
from .extensions import db, login_mgr, micropub
|
||||||
from .models import Feed, Entry, User, Subscription
|
from .models import Feed, Entry, User, Subscription
|
||||||
import flask.ext.login as flask_login
|
import flask.ext.login as flask_login
|
||||||
|
@ -84,7 +84,7 @@ def index():
|
||||||
flask.render_template('feed.jinja2', entries=entries, page=page,
|
flask.render_template('feed.jinja2', entries=entries, page=page,
|
||||||
ws_topic=ws_topic, solo=solo,
|
ws_topic=ws_topic, solo=solo,
|
||||||
all_tags=all_tags))
|
all_tags=all_tags))
|
||||||
resp.headers['Cache-control'] = 'no-cache'
|
resp.headers['Cache-control'] = 'max-age=0'
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
@ -297,7 +297,7 @@ def update_micropub_syndicate_to():
|
||||||
token = flask_login.current_user.access_token
|
token = flask_login.current_user.access_token
|
||||||
if not endpt or not token:
|
if not endpt or not token:
|
||||||
return
|
return
|
||||||
resp = requests.get(endpt, params={
|
resp = util.requests_get(endpt, params={
|
||||||
'q': 'syndicate-to',
|
'q': 'syndicate-to',
|
||||||
}, headers={
|
}, headers={
|
||||||
'Authorization': 'Bearer ' + token,
|
'Authorization': 'Bearer ' + token,
|
||||||
|
@ -367,12 +367,14 @@ def add_subscription(origin, feed_url, type, tags=None):
|
||||||
name = None
|
name = None
|
||||||
if type == 'html':
|
if type == 'html':
|
||||||
flask.current_app.logger.debug('mf2py parsing %s', feed_url)
|
flask.current_app.logger.debug('mf2py parsing %s', feed_url)
|
||||||
|
resp = util.requests_get(feed_url)
|
||||||
|
feed_text = resp.text if 'charset' in resp.headers.get('content-type', '') else resp.content
|
||||||
parsed = mf2util.interpret_feed(
|
parsed = mf2util.interpret_feed(
|
||||||
mf2py.Parser(url=feed_url).to_dict(), feed_url)
|
mf2py.parse(doc=feed_text, url=feed_url), feed_url)
|
||||||
name = parsed.get('name')
|
name = parsed.get('name')
|
||||||
elif type == 'xml':
|
elif type == 'xml':
|
||||||
flask.current_app.logger.debug('feedparser parsing %s', feed_url)
|
flask.current_app.logger.debug('feedparser parsing %s', feed_url)
|
||||||
parsed = feedparser.parse(feed_url)
|
parsed = feedparser.parse(feed_url, agent=util.USER_AGENT)
|
||||||
if parsed.feed:
|
if parsed.feed:
|
||||||
name = parsed.feed.get('title')
|
name = parsed.feed.get('title')
|
||||||
else:
|
else:
|
||||||
|
@ -399,7 +401,7 @@ def add_subscription(origin, feed_url, type, tags=None):
|
||||||
def find_possible_feeds(origin):
|
def find_possible_feeds(origin):
|
||||||
# scrape an origin source to find possible alternative feeds
|
# scrape an origin source to find possible alternative feeds
|
||||||
try:
|
try:
|
||||||
resp = requests.get(origin)
|
resp = util.requests_get(origin)
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
flask.flash('Error fetching source {}'.format(repr(e)))
|
flask.flash('Error fetching source {}'.format(repr(e)))
|
||||||
flask.current_app.logger.warn(
|
flask.current_app.logger.warn(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue