diff --git a/woodwind/api.py b/woodwind/api.py index a5c4e62..1ddfefd 100644 --- a/woodwind/api.py +++ b/woodwind/api.py @@ -40,8 +40,6 @@ def publish(): }) - - @api.route('/_forward', methods=['GET', 'POST']) def forward_request(): if flask.request.method == 'GET': diff --git a/woodwind/util.py b/woodwind/util.py index 96e9674..dc404a8 100644 --- a/woodwind/util.py +++ b/woodwind/util.py @@ -1,7 +1,13 @@ -import bleach +import pickle import re + +from flask import current_app +from redis import StrictRedis +import bleach import requests +redis = StrictRedis() + bleach.ALLOWED_TAGS += [ 'a', 'img', 'p', 'br', 'marquee', 'blink', 'audio', 'video', 'table', 'tbody', 'td', 'tr', 'div', 'span', @@ -19,10 +25,30 @@ USER_AGENT = 'Woodwind (https://github.com/kylewm/woodwind)' def requests_get(url, **kwargs): - kwargs.setdefault('headers', {})['User-Agent'] = USER_AGENT - return requests.get(url, **kwargs) + lastresp = redis.get('resp:' + url) + if lastresp: + lastresp = pickle.loads(lastresp) + + headers = kwargs.setdefault('headers', {}) + headers['User-Agent'] = USER_AGENT + + if lastresp: + if 'Etag' in lastresp.headers: + headers['If-None-Match'] = lastresp.headers['Etag'] + if 'Last-Modified' in lastresp.headers: + headers['If-Modified-Since'] = lastresp.headers['Last-Modified'] + + current_app.logger.debug('fetching %s with args %s', url, kwargs) + resp = requests.get(url, **kwargs) + + current_app.logger.debug('fetching %s got response %s', url, resp) + if resp.status_code == 304: + return lastresp + if resp.status_code // 100 == 2: + redis.setex('resp:' + url, 24 * 3600, pickle.dumps(resp)) + return resp + - def clean(text): """Strip script tags and other possibly dangerous content """