add celery scheduler and break app up into modules
This commit is contained in:
parent
d0a9851ae6
commit
9e1e3ad03f
19 changed files with 555 additions and 3 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
*.css
|
||||||
|
*.css.map
|
13
celeryconfig.py
Normal file
13
celeryconfig.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
BROKER_URL = 'redis://'
|
||||||
|
CELERY_RESULT_BACKEND = 'redis'
|
||||||
|
CELERY_TASK_SERIALIZER = 'json'
|
||||||
|
CELERY_RESULT_SERIALIZER = 'json'
|
||||||
|
CELERY_ACCEPT_CONTENT = ['json']
|
||||||
|
CELERYBEAT_SCHEDULE = {
|
||||||
|
'tick-every-5-minutes': {
|
||||||
|
'task': 'tasks.tick',
|
||||||
|
'schedule': datetime.timedelta(minutes=5),
|
||||||
|
}
|
||||||
|
}
|
6
config.py
Normal file
6
config.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
SECRET_KEY = 'super secret key'
|
||||||
|
SQLALCHEMY_DATABASE_URI = 'sqlite:///db.sqlite'
|
3
woodwind/__init__.py
Normal file
3
woodwind/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
from .app import create_app
|
||||||
|
|
||||||
|
__all__ = ['create_app']
|
10
woodwind/__main__.py
Normal file
10
woodwind/__main__.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
__all__ = ['main']
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from woodwind.app import create_app
|
||||||
|
app = create_app()
|
||||||
|
app.run(debug=True, port=4000)
|
||||||
|
|
||||||
|
|
||||||
|
main()
|
12
woodwind/app.py
Normal file
12
woodwind/app.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from . import extensions
|
||||||
|
from .views import ui
|
||||||
|
from config import Config
|
||||||
|
import flask
|
||||||
|
|
||||||
|
|
||||||
|
def create_app():
|
||||||
|
app = flask.Flask('woodwind')
|
||||||
|
app.config.from_object(Config)
|
||||||
|
extensions.init_app(app)
|
||||||
|
app.register_blueprint(ui)
|
||||||
|
return app
|
15
woodwind/extensions.py
Normal file
15
woodwind/extensions.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from flask.ext.login import LoginManager
|
||||||
|
from flask.ext.micropub import MicropubClient
|
||||||
|
from flask.ext.sqlalchemy import SQLAlchemy
|
||||||
|
|
||||||
|
|
||||||
|
db = SQLAlchemy()
|
||||||
|
micropub = MicropubClient(client_id='redwind-reader')
|
||||||
|
login_mgr = LoginManager()
|
||||||
|
login_mgr.login_view = 'login'
|
||||||
|
|
||||||
|
|
||||||
|
def init_app(app):
|
||||||
|
db.init_app(app)
|
||||||
|
micropub.init_app(app)
|
||||||
|
login_mgr.init_app(app)
|
82
woodwind/models.py
Normal file
82
woodwind/models.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
import bleach
|
||||||
|
from .extensions import db
|
||||||
|
|
||||||
|
|
||||||
|
bleach.ALLOWED_TAGS += ['a', 'img', 'p', 'br', 'marquee', 'blink']
|
||||||
|
bleach.ALLOWED_ATTRIBUTES.update({
|
||||||
|
'img': ['src', 'alt', 'title']
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class User(db.Model):
|
||||||
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
domain = db.Column(db.String(256))
|
||||||
|
micropub_endpoint = db.Column(db.String(512))
|
||||||
|
access_token = db.Column(db.String(512))
|
||||||
|
|
||||||
|
# Flask-Login integration
|
||||||
|
def is_authenticated(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def is_active(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def is_anonymous(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_id(self):
|
||||||
|
return self.domain
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if type(other) is type(self):
|
||||||
|
return self.domain == other.domain
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<User:{}>'.format(self.domain)
|
||||||
|
|
||||||
|
|
||||||
|
class Feed(db.Model):
|
||||||
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
user_id = db.Column(db.Integer, db.ForeignKey(User.id))
|
||||||
|
user = db.relationship(User, backref='feeds')
|
||||||
|
# the name of this feed
|
||||||
|
name = db.Column(db.String(256))
|
||||||
|
# url that we subscribed to; periodically check if the feed url
|
||||||
|
# has changed
|
||||||
|
origin = db.Column(db.String(512))
|
||||||
|
# url of the feed itself
|
||||||
|
feed = db.Column(db.String(512))
|
||||||
|
# h-feed, xml, etc.
|
||||||
|
type = db.Column(db.String(64))
|
||||||
|
# last time this feed returned new data
|
||||||
|
last_updated = db.Column(db.DateTime)
|
||||||
|
# last time we checked this feed
|
||||||
|
last_checked = db.Column(db.DateTime)
|
||||||
|
etag = db.Column(db.String(512))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Feed:{},{}>'.format(self.name, self.feed)
|
||||||
|
|
||||||
|
|
||||||
|
class Entry(db.Model):
|
||||||
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
feed_id = db.Column(db.Integer, db.ForeignKey(Feed.id))
|
||||||
|
feed = db.relationship(Feed, backref='entries')
|
||||||
|
published = db.Column(db.DateTime)
|
||||||
|
updated = db.Column(db.DateTime)
|
||||||
|
retrieved = db.Column(db.DateTime)
|
||||||
|
uid = db.Column(db.String(512))
|
||||||
|
permalink = db.Column(db.String(512))
|
||||||
|
author_name = db.Column(db.String(512))
|
||||||
|
author_url = db.Column(db.String(512))
|
||||||
|
author_photo = db.Column(db.String(512))
|
||||||
|
title = db.Column(db.String(512))
|
||||||
|
content = db.Column(db.Text)
|
||||||
|
|
||||||
|
def content_cleaned(self):
|
||||||
|
if self.content:
|
||||||
|
return bleach.clean(self.content, strip=True)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Entry:{},{}>'.format(self.title, (self.content or '')[:140])
|
|
@ -16,7 +16,7 @@ $box-shadow: 0 0 2px $sirocco;
|
||||||
|
|
||||||
|
|
||||||
body {
|
body {
|
||||||
font: 12pt/1.5em $body-font;
|
font: 12pt $body-font;
|
||||||
background: $athens-gray;
|
background: $athens-gray;
|
||||||
}
|
}
|
||||||
|
|
173
woodwind/tasks.py
Normal file
173
woodwind/tasks.py
Normal file
|
@ -0,0 +1,173 @@
|
||||||
|
from woodwind.extensions import db
|
||||||
|
from woodwind.models import Feed, Entry
|
||||||
|
|
||||||
|
import celery
|
||||||
|
import requests
|
||||||
|
import celery.utils.log
|
||||||
|
import feedparser
|
||||||
|
import mf2py
|
||||||
|
import mf2util
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
UPDATE_INTERVAL = datetime.timedelta(hours=1)
|
||||||
|
|
||||||
|
queue = celery.Celery('woodwind')
|
||||||
|
queue.config_from_object('celeryconfig')
|
||||||
|
|
||||||
|
logger = celery.utils.log.get_task_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@queue.task
|
||||||
|
def tick():
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
logger.debug('Tick {}'.format(now))
|
||||||
|
for feed in Feed.query.all():
|
||||||
|
logger.debug('Feed {} last checked {}'.format(
|
||||||
|
feed, feed.last_checked))
|
||||||
|
if (not feed.last_checked
|
||||||
|
or now - feed.last_checked > UPDATE_INTERVAL):
|
||||||
|
update_feed.delay(feed.id)
|
||||||
|
|
||||||
|
|
||||||
|
@queue.task
|
||||||
|
def update_feed(feed_id):
|
||||||
|
feed = Feed.query.get(feed_id)
|
||||||
|
logger.info('Updating {}'.format(feed))
|
||||||
|
new_entries = process_feed_for_new_entries(feed)
|
||||||
|
for entry in new_entries:
|
||||||
|
logger.debug('Got new entry: {}'.format(entry))
|
||||||
|
|
||||||
|
|
||||||
|
def process_feed_for_new_entries(feed):
|
||||||
|
try:
|
||||||
|
if feed.type == 'xml':
|
||||||
|
result = process_xml_feed_for_new_entries(feed)
|
||||||
|
elif feed.type == 'html':
|
||||||
|
result = process_html_feed_for_new_entries(feed)
|
||||||
|
else:
|
||||||
|
result = None
|
||||||
|
return result
|
||||||
|
finally:
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
feed.last_checked = now
|
||||||
|
if result:
|
||||||
|
feed.last_updated = now
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def process_xml_feed_for_new_entries(feed):
|
||||||
|
logger.debug('updating feed: %s', feed)
|
||||||
|
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
parsed = feedparser.parse(feed.feed)
|
||||||
|
|
||||||
|
feed_props = parsed.get('feed', {})
|
||||||
|
default_author_url = feed_props.get('author_detail', {}).get('href')
|
||||||
|
default_author_name = feed_props.get('author_detail', {}).get('name')
|
||||||
|
default_author_photo = feed_props.get('logo')
|
||||||
|
|
||||||
|
all_uids = [e.id or e.link for e in parsed.entries]
|
||||||
|
preexisting = set(row[0] for row in db.session.query(Entry.uid)
|
||||||
|
.filter(Entry.uid.in_(all_uids))
|
||||||
|
.filter(Entry.feed == feed))
|
||||||
|
|
||||||
|
for p_entry in parsed.entries:
|
||||||
|
permalink = p_entry.link
|
||||||
|
uid = p_entry.id or permalink
|
||||||
|
|
||||||
|
if not uid or uid in preexisting:
|
||||||
|
continue
|
||||||
|
|
||||||
|
updated = datetime.datetime.fromtimestamp(
|
||||||
|
time.mktime(p_entry.updated_parsed)
|
||||||
|
) if p_entry.updated_parsed else None
|
||||||
|
published = datetime.datetime.fromtimestamp(
|
||||||
|
time.mktime(p_entry.published_parsed)
|
||||||
|
) if p_entry.published_parsed else None
|
||||||
|
|
||||||
|
title = p_entry.get('title')
|
||||||
|
|
||||||
|
content = None
|
||||||
|
content_list = p_entry.get('content')
|
||||||
|
if content_list:
|
||||||
|
content = content_list[0].value
|
||||||
|
else:
|
||||||
|
content = p_entry.get('summary')
|
||||||
|
|
||||||
|
if title and content:
|
||||||
|
title_trimmed = title.rstrip('...').rstrip('…')
|
||||||
|
if content.startswith(title_trimmed):
|
||||||
|
title = None
|
||||||
|
|
||||||
|
entry = Entry(
|
||||||
|
feed=feed,
|
||||||
|
published=published,
|
||||||
|
updated=updated,
|
||||||
|
uid=uid,
|
||||||
|
permalink=permalink,
|
||||||
|
retrieved=now,
|
||||||
|
title=p_entry.get('title'),
|
||||||
|
content=content,
|
||||||
|
author_name=p_entry.get('author_detail', {}).get('name')
|
||||||
|
or default_author_name,
|
||||||
|
author_url=p_entry.get('author_detail', {}).get('href')
|
||||||
|
or default_author_url,
|
||||||
|
author_photo=default_author_photo
|
||||||
|
or fallback_photo(feed.origin))
|
||||||
|
|
||||||
|
db.session.add(entry)
|
||||||
|
db.session.commit()
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
|
||||||
|
def process_html_feed_for_new_entries(feed):
|
||||||
|
logger.debug('updating feed: %s', feed)
|
||||||
|
|
||||||
|
now = datetime.datetime.utcnow()
|
||||||
|
parsed = mf2util.interpret_feed(
|
||||||
|
mf2py.parse(url=feed.feed), feed.feed)
|
||||||
|
hfeed = parsed.get('entries', [])
|
||||||
|
|
||||||
|
all_uids = [e.get('uid') or e.get('url') for e in hfeed]
|
||||||
|
preexisting = set(row[0] for row in db.session.query(Entry.uid)
|
||||||
|
.filter(Entry.uid.in_(all_uids))
|
||||||
|
.filter(Entry.feed == feed))
|
||||||
|
|
||||||
|
# logger.debug('preexisting urls: %r', preexisting)
|
||||||
|
|
||||||
|
for hentry in hfeed:
|
||||||
|
permalink = url = hentry.get('url')
|
||||||
|
uid = hentry.get('uid') or url
|
||||||
|
|
||||||
|
if not uid or uid in preexisting:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# hentry = mf2util.interpret(mf2py.parse(url=url), url)
|
||||||
|
# permalink = hentry.get('url') or url
|
||||||
|
# uid = hentry.get('uid') or uid
|
||||||
|
entry = Entry(
|
||||||
|
feed=feed,
|
||||||
|
published=hentry.get('published'),
|
||||||
|
updated=hentry.get('updated'),
|
||||||
|
uid=uid,
|
||||||
|
permalink=permalink,
|
||||||
|
retrieved=now,
|
||||||
|
title=hentry.get('name'),
|
||||||
|
content=hentry.get('content'),
|
||||||
|
author_name=hentry.get('author', {}).get('name'),
|
||||||
|
author_photo=hentry.get('author', {}).get('photo')
|
||||||
|
or fallback_photo(feed.origin),
|
||||||
|
author_url=hentry.get('author', {}).get('url'))
|
||||||
|
db.session.add(entry)
|
||||||
|
db.session.commit()
|
||||||
|
logger.debug('saved entry: %s', entry.permalink)
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
|
||||||
|
def fallback_photo(url):
|
||||||
|
"""Use favatar to find an appropriate photo for any URL"""
|
||||||
|
domain = urllib.parse.urlparse(url).netloc
|
||||||
|
return 'http://www.google.com/s2/favicons?domain=' + domain
|
|
@ -5,8 +5,13 @@
|
||||||
{% for entry in entries %}
|
{% for entry in entries %}
|
||||||
<article>
|
<article>
|
||||||
<header>
|
<header>
|
||||||
<img src="{{entry.author_photo}}"/>
|
{% if entry.author_photo %}
|
||||||
{{ entry.author_name }} - {{ entry.feed.name }}
|
<img src="{{entry.author_photo}}"/>
|
||||||
|
{% endif %}
|
||||||
|
{% if entry.author_name %}
|
||||||
|
{{ entry.author_name }} -
|
||||||
|
{% endif %}
|
||||||
|
{{ entry.feed.name }}
|
||||||
</header>
|
</header>
|
||||||
{% if entry.title %}
|
{% if entry.title %}
|
||||||
<h1>{{ entry.title }}</h1>
|
<h1>{{ entry.title }}</h1>
|
18
woodwind/templates/feeds.jinja2
Normal file
18
woodwind/templates/feeds.jinja2
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
{% extends "base.jinja2" %}
|
||||||
|
{% block body %}
|
||||||
|
|
||||||
|
<main>
|
||||||
|
{% for feed in feeds %}
|
||||||
|
|
||||||
|
<form action="{{ url_for('.edit_feed', id=feed.id) }}" method="POST">
|
||||||
|
<input type="hidden" name="id" value="{{ feed.id }}"/>
|
||||||
|
<input type="text" name="name" value="{{ feed.name }}"/>
|
||||||
|
<input type="text" name="feed" value="{{ feed.feed }}"/>
|
||||||
|
<button type="submit">Save</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
|
||||||
|
{% endfor %}
|
||||||
|
</main>
|
||||||
|
|
||||||
|
{% endblock body %}
|
213
woodwind/views.py
Normal file
213
woodwind/views.py
Normal file
|
@ -0,0 +1,213 @@
|
||||||
|
from . import tasks
|
||||||
|
from .extensions import db, login_mgr, micropub
|
||||||
|
from .models import Feed, Entry, User
|
||||||
|
import flask.ext.login as flask_login
|
||||||
|
import bs4
|
||||||
|
import feedparser
|
||||||
|
import flask
|
||||||
|
import mf2py
|
||||||
|
import mf2util
|
||||||
|
import requests
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
ui = flask.Blueprint('ui', __name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/')
|
||||||
|
def index():
|
||||||
|
if flask_login.current_user.is_authenticated():
|
||||||
|
feed_ids = [f.id for f in flask_login.current_user.feeds]
|
||||||
|
entries = Entry.query.filter(
|
||||||
|
Entry.feed_id.in_(feed_ids)).order_by(
|
||||||
|
Entry.published.desc()).limit(100).all()
|
||||||
|
else:
|
||||||
|
entries = []
|
||||||
|
return flask.render_template('feed.jinja2', entries=entries)
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/install')
|
||||||
|
def install():
|
||||||
|
db.drop_all()
|
||||||
|
db.create_all()
|
||||||
|
|
||||||
|
user = User(domain='kylewm.com',)
|
||||||
|
db.session.add(user)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
flask_login.login_user(user)
|
||||||
|
|
||||||
|
return 'Success!'
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/feeds')
|
||||||
|
def feeds():
|
||||||
|
feeds = Feed.query.filter(Feed.user == flask_login.current_user).all()
|
||||||
|
return flask.render_template('feeds.jinja2', feeds=feeds)
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/update_feed')
|
||||||
|
def update_feed():
|
||||||
|
feed_id = flask.request.args.get('id')
|
||||||
|
tasks.update_feed.delay(feed_id)
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/delete_feed')
|
||||||
|
def delete_feed():
|
||||||
|
feed_id = flask.request.args.get('id')
|
||||||
|
feed = Feed.query.get(feed_id)
|
||||||
|
db.session.delete(feed)
|
||||||
|
db.session.commit()
|
||||||
|
flask.flash('Deleted {} ({})'.format(feed.name, feed.feed))
|
||||||
|
return flask.redirect(flask.url_for('.feeds'))
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/edit_feed', methods=['POST'])
|
||||||
|
def edit_feed():
|
||||||
|
feed_id = flask.request.form.get('id')
|
||||||
|
feed_name = flask.request.form.get('name')
|
||||||
|
feed_url = flask.request.form.get('feed')
|
||||||
|
|
||||||
|
feed = Feed.query.get(feed_id)
|
||||||
|
if feed_name:
|
||||||
|
feed.name = feed_name
|
||||||
|
if feed_url:
|
||||||
|
feed.feed = feed_url
|
||||||
|
|
||||||
|
db.session.commit()
|
||||||
|
flask.flash('Edited {} ({})'.format(feed.name, feed.feed))
|
||||||
|
return flask.redirect(flask.url_for('.feeds'))
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/login')
|
||||||
|
def login():
|
||||||
|
if True:
|
||||||
|
flask_login.login_user(User.query.all()[0], remember=True)
|
||||||
|
|
||||||
|
me = flask.request.args.get('me')
|
||||||
|
if me:
|
||||||
|
return micropub.authorize(
|
||||||
|
me, flask.url_for('.login_callback', _external=True),
|
||||||
|
next_url=flask.request.args.get('next'),
|
||||||
|
scope='write')
|
||||||
|
return flask.render_template('login.jinja2')
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/login-callback')
|
||||||
|
@micropub.authorized_handler
|
||||||
|
def login_callback(resp):
|
||||||
|
if not resp.me:
|
||||||
|
flask.flash('Login error: ' + resp.error)
|
||||||
|
return flask.redirect(flask.url_for('.login'))
|
||||||
|
|
||||||
|
domain = urllib.parse.urlparse(resp.me).netloc
|
||||||
|
user = load_user(domain)
|
||||||
|
if not user:
|
||||||
|
user = User()
|
||||||
|
user.domain = domain
|
||||||
|
db.session.add(user)
|
||||||
|
|
||||||
|
user.micropub_endpoint = resp.micropub_endpoint
|
||||||
|
user.access_token = resp.access_token
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
flask_login.login_user(user, remember=True)
|
||||||
|
return flask.redirect(resp.next_url or flask.url_for('.index'))
|
||||||
|
|
||||||
|
|
||||||
|
@login_mgr.user_loader
|
||||||
|
def load_user(domain):
|
||||||
|
return User.query.filter_by(domain=domain).first()
|
||||||
|
|
||||||
|
|
||||||
|
@ui.route('/subscribe', methods=['GET', 'POST'])
|
||||||
|
def subscribe():
|
||||||
|
if flask.request.method == 'POST':
|
||||||
|
origin = flask.request.form.get('origin')
|
||||||
|
if origin:
|
||||||
|
type = None
|
||||||
|
feed = None
|
||||||
|
typed_feed = flask.request.form.get('feed')
|
||||||
|
if typed_feed:
|
||||||
|
type, feed = typed_feed.split('|', 1)
|
||||||
|
else:
|
||||||
|
feeds = find_possible_feeds(origin)
|
||||||
|
if not feeds:
|
||||||
|
flask.flash('No feeds found for: ' + origin)
|
||||||
|
return flask.redirect(flask.url_for('.subscribe'))
|
||||||
|
if len(feeds) > 1:
|
||||||
|
return flask.render_template(
|
||||||
|
'select-feed.jinja2', origin=origin, feeds=feeds)
|
||||||
|
feed = feeds[0]['feed']
|
||||||
|
type = feeds[0]['type']
|
||||||
|
new_feed = add_subscription(origin, feed, type)
|
||||||
|
flask.flash('Successfully subscribed to: {}'.format(new_feed.name))
|
||||||
|
return flask.redirect(flask.url_for('.index'))
|
||||||
|
else:
|
||||||
|
flask.abort(400)
|
||||||
|
|
||||||
|
return flask.render_template('subscribe.jinja2')
|
||||||
|
|
||||||
|
|
||||||
|
def add_subscription(origin, feed, type):
|
||||||
|
if type == 'html':
|
||||||
|
parsed = mf2util.interpret_feed(mf2py.parse(url=feed), feed)
|
||||||
|
name = parsed.get('name')
|
||||||
|
if not name or len(name) > 140:
|
||||||
|
p = urllib.parse.urlparse(origin)
|
||||||
|
name = p.netloc + p.path
|
||||||
|
|
||||||
|
feed = Feed(user=flask_login.current_user, name=name,
|
||||||
|
origin=origin, feed=feed, type=type)
|
||||||
|
|
||||||
|
db.session.add(feed)
|
||||||
|
db.session.commit()
|
||||||
|
return feed
|
||||||
|
|
||||||
|
elif type == 'xml':
|
||||||
|
parsed = feedparser.parse(feed)
|
||||||
|
feed = Feed(user=flask_login.current_user,
|
||||||
|
name=parsed.feed.title, origin=origin, feed=feed,
|
||||||
|
type=type)
|
||||||
|
|
||||||
|
db.session.add(feed)
|
||||||
|
db.session.commit()
|
||||||
|
return feed
|
||||||
|
|
||||||
|
|
||||||
|
def find_possible_feeds(origin):
|
||||||
|
# scrape an origin source to find possible alternative feeds
|
||||||
|
resp = requests.get(origin)
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
xml_feed_types = [
|
||||||
|
'application/rss+xml',
|
||||||
|
'application/atom+xml',
|
||||||
|
'application/rdf+xml',
|
||||||
|
]
|
||||||
|
|
||||||
|
content_type = resp.headers['content-type']
|
||||||
|
content_type = content_type.split(';', 1)[0].strip()
|
||||||
|
if content_type in xml_feed_types:
|
||||||
|
feeds.append({
|
||||||
|
'origin': origin,
|
||||||
|
'feed': origin,
|
||||||
|
'type': 'xml',
|
||||||
|
})
|
||||||
|
|
||||||
|
elif content_type == 'text/html':
|
||||||
|
# if text/html, then parse and look for rel="alternate"
|
||||||
|
soup = bs4.BeautifulSoup(resp.text)
|
||||||
|
for link in soup.find_all('link', {'rel': 'alternate'}):
|
||||||
|
if link.get('type') in xml_feed_types:
|
||||||
|
feeds.append({
|
||||||
|
'origin': origin,
|
||||||
|
'feed': link.get('href'),
|
||||||
|
'type': 'xml',
|
||||||
|
})
|
||||||
|
feeds.append({
|
||||||
|
'origin': origin,
|
||||||
|
'feed': origin,
|
||||||
|
'type': 'html',
|
||||||
|
})
|
||||||
|
|
||||||
|
return feeds
|
Loading…
Add table
Add a link
Reference in a new issue