diff --git a/migrations/20150318-clean-content.py b/migrations/20150318-clean-content.py new file mode 100644 index 0000000..6f07c79 --- /dev/null +++ b/migrations/20150318-clean-content.py @@ -0,0 +1,27 @@ +from config import Config +import sqlalchemy +import sqlalchemy.orm +from woodwind.models import Entry +from woodwind import util + +engine = sqlalchemy.create_engine(Config.SQLALCHEMY_DATABASE_URI) +Session = sqlalchemy.orm.sessionmaker(bind=engine) + +try: + engine.execute('alter table entry add column content_cleaned text') +except: + pass + +try: + session = Session() + + for entry in session.query(Entry).all(): + print('processing', entry.id) + entry.content_cleaned = util.clean(entry.content) + + session.commit() +except: + session.rollback() + raise +finally: + session.close() diff --git a/setup.py b/setup.py index d72b8fe..d87ba2b 100644 --- a/setup.py +++ b/setup.py @@ -26,4 +26,6 @@ setup(name='Woodwind', 'rq', 'uwsgi', 'websockets', + 'pyOpenSSL', + 'pyasn1', ]) diff --git a/woodwind-dev.ini b/woodwind-dev.ini index 2f03bb7..eb9c6fe 100644 --- a/woodwind-dev.ini +++ b/woodwind-dev.ini @@ -1,8 +1,10 @@ [uwsgi] master=true processes=1 -http=:4000 +socket=/tmp/woodwind.sock +chmod-socket=666 module=woodwind.wsgi import=timers attach-daemon=rqworker attach-daemon=python -m woodwind.websocket_server +python-auto-reload=true diff --git a/woodwind/extensions.py b/woodwind/extensions.py index dff0a13..2d565a2 100644 --- a/woodwind/extensions.py +++ b/woodwind/extensions.py @@ -1,15 +1,18 @@ from flask.ext.login import LoginManager from flask.ext.micropub import MicropubClient from flask.ext.sqlalchemy import SQLAlchemy +from flask_debugtoolbar import DebugToolbarExtension db = SQLAlchemy() micropub = MicropubClient(client_id='http://reader.kylewm.com') login_mgr = LoginManager() login_mgr.login_view = 'views.index' +toolbar = DebugToolbarExtension() def init_app(app): db.init_app(app) micropub.init_app(app) login_mgr.init_app(app) + toolbar.init_app(app) diff --git a/woodwind/models.py b/woodwind/models.py index 66c6de8..13536eb 100644 --- a/woodwind/models.py +++ b/woodwind/models.py @@ -8,14 +8,6 @@ from sqlalchemy.ext.orderinglist import ordering_list from sqlalchemy.ext.associationproxy import association_proxy -bleach.ALLOWED_TAGS += ['a', 'img', 'p', 'br', 'marquee', 'blink', - 'audio', 'video', 'table', 'tbody', 'td', 'tr'] -bleach.ALLOWED_ATTRIBUTES.update({ - 'img': ['src', 'alt', 'title'], - 'audio': ['preload', 'controls', 'src'], - 'video': ['preload', 'controls', 'src'], - 'td': ['colspan'], -}) class JsonType(db.TypeDecorator): @@ -135,6 +127,7 @@ class Entry(db.Model): author_photo = db.Column(db.String(512)) title = db.Column(db.String(512)) content = db.Column(db.Text) + content_cleaned = db.Column(db.Text) # other properties properties = db.Column(JsonType) # # association with the InReplyTo objects @@ -153,12 +146,6 @@ class Entry(db.Model): super().__init__(*args, **kwargs) self._syndicated_copies = [] - def content_cleaned(self): - if self.content: - text = self.content - text = re.sub('', '', text, flags=re.DOTALL) - return bleach.clean(text, strip=True) - def get_property(self, key, default=None): if self.properties is None: return default diff --git a/woodwind/tasks.py b/woodwind/tasks.py index 4a3a26f..d9494fa 100644 --- a/woodwind/tasks.py +++ b/woodwind/tasks.py @@ -2,6 +2,7 @@ from config import Config from contextlib import contextmanager from redis import StrictRedis from woodwind.models import Feed, Entry +from woodwind import util import bs4 import datetime import feedparser @@ -290,6 +291,7 @@ def process_xml_feed_for_new_entries(session, feed, response, backfill): retrieved=retrieved, title=p_entry.get('title'), content=content, + content_cleaned=util.clean(content), author_name=p_entry.get('author_detail', {}).get('name') or default_author_name, author_url=p_entry.get('author_detail', {}).get('href') @@ -346,6 +348,7 @@ def hentry_to_entry(hentry, feed, backfill): updated=updated, title=title, content=content, + content_cleaned=util.clean(content), author_name=hentry.get('author', {}).get('name'), author_photo=hentry.get('author', {}).get('photo') or (feed and fallback_photo(feed.origin)), diff --git a/woodwind/templates/_entry.jinja2 b/woodwind/templates/_entry.jinja2 index 2a2fe63..de1cda2 100644 --- a/woodwind/templates/_entry.jinja2 +++ b/woodwind/templates/_entry.jinja2 @@ -14,7 +14,7 @@ {% endif %} {% if context.content %}
- {{ context.content_cleaned() | add_preview }} + {{ context.content_cleaned | add_preview }}
{% endif %}