diff --git a/Cargo.lock b/Cargo.lock index 0f157f3..746b7a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -129,6 +138,7 @@ dependencies = [ "gtk4", "libadwaita", "libsecret", + "regex", "reqwest", "serde", "serde_json", @@ -1113,6 +1123,35 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "reqwest" version = "0.12.28" diff --git a/Cargo.toml b/Cargo.toml index f2dc074..63a62de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ webkit6 = { version = "0.6" } gio = { version = "0.22" } glib = { version = "0.22" } reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] } +regex = "1" serde = { version = "1", features = ["derive"] } serde_json = "1" tokio = { version = "1", features = ["full"] } diff --git a/data/net.jeena.FeedTheMonkey.gschema.xml b/data/net.jeena.FeedTheMonkey.gschema.xml index 16ae75d..a69cb8a 100644 --- a/data/net.jeena.FeedTheMonkey.gschema.xml +++ b/data/net.jeena.FeedTheMonkey.gschema.xml @@ -25,5 +25,9 @@ '' Content rewrite rules, one per line: domain from to [from to …] + + true + Download and cache article images for offline reading (skipped on metered connections) + diff --git a/data/ui/preferences_dialog.blp b/data/ui/preferences_dialog.blp index 94fe343..de20a03 100644 --- a/data/ui/preferences_dialog.blp +++ b/data/ui/preferences_dialog.blp @@ -11,6 +11,15 @@ template $PreferencesDialog : Adw.Dialog { Adw.HeaderBar {} Adw.PreferencesPage { + Adw.PreferencesGroup { + title: _("Images"); + + Adw.SwitchRow cache_images_row { + title: _("Cache Images"); + subtitle: _("Download images for offline reading (skipped on metered connections)"); + } + } + Adw.PreferencesGroup { title: _("Content Filters"); description: _("One rule per line: domain find replace [find replace …]\n\nExample:\n www.imycomic.com -150x150.jpg .jpg"); diff --git a/data/ui/preferences_dialog.ui b/data/ui/preferences_dialog.ui index 6fefaad..0e78a1f 100644 --- a/data/ui/preferences_dialog.ui +++ b/data/ui/preferences_dialog.ui @@ -17,6 +17,17 @@ corresponding .blp file and regenerate this file with blueprint-compiler. + + + Images + + + Cache Images + Download images for offline reading (skipped on metered connections) + + + + Content Filters diff --git a/src/image_cache.rs b/src/image_cache.rs new file mode 100644 index 0000000..c361519 --- /dev/null +++ b/src/image_cache.rs @@ -0,0 +1,90 @@ +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; +use std::path::PathBuf; + +use crate::model::Article; + +fn images_dir() -> PathBuf { + glib::user_data_dir() + .join("net.jeena.FeedTheMonkey") + .join("images") +} + +fn url_to_filename(url: &str) -> String { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + url.hash(&mut hasher); + let hash = format!("{:016x}", hasher.finish()); + // Preserve extension so WebKit can detect the content type. + let ext = url.split('?').next() + .and_then(|u| u.rsplit('.').next()) + .filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric())) + .unwrap_or(""); + if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) } +} + +/// Download all remote images in every article and rewrite their src +/// attributes to file:// URIs so articles render offline. +pub async fn process(articles: Vec
) -> Vec
{ + let dir = images_dir(); + std::fs::create_dir_all(&dir).ok(); + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(30)) + .build() + .unwrap_or_default(); + + let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap(); + + let mut out = Vec::with_capacity(articles.len()); + for mut article in articles { + let content = article.content.clone(); + let mut rewritten = content.clone(); + + for cap in re.captures_iter(&content) { + let url = &cap[1]; + let filename = url_to_filename(url); + let path = dir.join(&filename); + + if !path.exists() { + if let Ok(resp) = client.get(url).send().await { + if let Ok(bytes) = resp.bytes().await { + std::fs::write(&path, &bytes).ok(); + } + } + } + + if path.exists() { + let file_uri = format!("file://{}", path.display()); + rewritten = rewritten.replace( + &format!("src=\"{}\"", url), + &format!("src=\"{}\"", file_uri), + ); + } + } + + article.content = rewritten; + out.push(article); + } + out +} + +/// Remove cached image files that are no longer referenced by any article. +pub fn cleanup(articles: &[Article]) { + let dir = images_dir(); + let Ok(entries) = std::fs::read_dir(&dir) else { return }; + + let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap(); + let mut referenced: HashSet = HashSet::new(); + for article in articles { + for cap in re.captures_iter(&article.content) { + referenced.insert(cap[1].to_string()); + } + } + + for entry in entries.filter_map(|e| e.ok()) { + let fname = entry.file_name().to_string_lossy().to_string(); + if !referenced.contains(&fname) { + std::fs::remove_file(entry.path()).ok(); + } + } +} diff --git a/src/main.rs b/src/main.rs index 30daa51..11ed652 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod api; mod app; mod cache; +mod image_cache; mod filters; mod preferences_dialog; mod article_row; diff --git a/src/preferences_dialog.rs b/src/preferences_dialog.rs index f20494a..8ae9a2d 100644 --- a/src/preferences_dialog.rs +++ b/src/preferences_dialog.rs @@ -22,6 +22,8 @@ pub mod imp { #[derive(CompositeTemplate, Default)] #[template(resource = "/net/jeena/FeedTheMonkey/ui/preferences_dialog.ui")] pub struct PreferencesDialog { + #[template_child] + pub cache_images_row: TemplateChild, #[template_child] pub filters_text_view: TemplateChild, } @@ -44,25 +46,28 @@ pub mod imp { impl ObjectImpl for PreferencesDialog { fn constructed(&self) { self.parent_constructed(); - self.load(); - // Save on every text change + let settings = gio::Settings::new("net.jeena.FeedTheMonkey"); + + // Cache images switch + self.cache_images_row.set_active(settings.boolean("cache-images")); + let s = settings.clone(); + self.cache_images_row.connect_active_notify(move |row| { + s.set_boolean("cache-images", row.is_active()).ok(); + }); + + // Content filters text view + self.filters_text_view.buffer().set_text(&settings.string("content-filters")); let obj_weak = self.obj().downgrade(); self.filters_text_view.buffer().connect_changed(move |_| { if let Some(obj) = obj_weak.upgrade() { - obj.imp().save(); + obj.imp().save_filters(); } }); } } impl PreferencesDialog { - fn load(&self) { - let settings = gio::Settings::new("net.jeena.FeedTheMonkey"); - let text = settings.string("content-filters"); - self.filters_text_view.buffer().set_text(&text); - } - - fn save(&self) { + fn save_filters(&self) { let buf = self.filters_text_view.buffer(); let text = buf.text(&buf.start_iter(), &buf.end_iter(), false); let settings = gio::Settings::new("net.jeena.FeedTheMonkey"); diff --git a/src/window.rs b/src/window.rs index 5f2f630..cc2c4ba 100644 --- a/src/window.rs +++ b/src/window.rs @@ -584,9 +584,22 @@ pub mod imp { } let saved_id = self.current_article_id.borrow().clone(); + + let settings = gio::Settings::new("net.jeena.FeedTheMonkey"); + let cache_images = settings.boolean("cache-images") + && !gio::NetworkMonitor::default().is_network_metered(); + let win_weak = self.obj().downgrade(); crate::runtime::spawn( - async move { api.fetch_unread().await }, + async move { + let articles = api.fetch_unread().await?; + let articles = if cache_images { + crate::image_cache::process(articles).await + } else { + articles + }; + Ok::<_, String>(articles) + }, move |result| { let Some(win) = win_weak.upgrade() else { return }; let imp = win.imp(); @@ -602,9 +615,10 @@ pub mod imp { store.append(&ArticleObject::new(a.clone())); } - // Save cache with updated article list. + // Save cache and clean up unreferenced images. let sel_id = saved_id.as_deref().unwrap_or(""); crate::cache::save(&articles, sel_id); + crate::image_cache::cleanup(&articles); if store.n_items() == 0 { imp.sidebar_content.set_visible_child_name("empty");