feature: cache article images for offline reading

After fetching articles, all remote images referenced in article content
are downloaded to ~/.local/share/net.jeena.FeedTheMonkey/images/ and
their src attributes rewritten to file:// URIs. Subsequent loads of the
same article (including from the cache on the next startup) display
images without a network connection.

Metered-connection awareness: image caching is skipped automatically
when GIO reports the network connection as metered, regardless of the
preference setting.

A "Cache Images" toggle in Preferences lets the user disable caching
entirely (stored in the cache-images GSettings key).

After each refresh, images no longer referenced by any article in the
current unread list are deleted from the cache directory to prevent
unbounded disk growth.
This commit is contained in:
Jeena 2026-03-21 01:19:49 +00:00
parent 183191727b
commit fda441bebd
9 changed files with 186 additions and 12 deletions

39
Cargo.lock generated
View file

@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "atomic-waker"
version = "1.1.2"
@ -129,6 +138,7 @@ dependencies = [
"gtk4",
"libadwaita",
"libsecret",
"regex",
"reqwest",
"serde",
"serde_json",
@ -1113,6 +1123,35 @@ dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "reqwest"
version = "0.12.28"

View file

@ -14,6 +14,7 @@ webkit6 = { version = "0.6" }
gio = { version = "0.22" }
glib = { version = "0.22" }
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
regex = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }

View file

@ -25,5 +25,9 @@
<default>''</default>
<summary>Content rewrite rules, one per line: domain from to [from to …]</summary>
</key>
<key name="cache-images" type="b">
<default>true</default>
<summary>Download and cache article images for offline reading (skipped on metered connections)</summary>
</key>
</schema>
</schemalist>

View file

@ -11,6 +11,15 @@ template $PreferencesDialog : Adw.Dialog {
Adw.HeaderBar {}
Adw.PreferencesPage {
Adw.PreferencesGroup {
title: _("Images");
Adw.SwitchRow cache_images_row {
title: _("Cache Images");
subtitle: _("Download images for offline reading (skipped on metered connections)");
}
}
Adw.PreferencesGroup {
title: _("Content Filters");
description: _("One rule per line: domain find replace [find replace …]\n\nExample:\n www.imycomic.com -150x150.jpg .jpg");

View file

@ -17,6 +17,17 @@ corresponding .blp file and regenerate this file with blueprint-compiler.
</child>
<child>
<object class="AdwPreferencesPage">
<child>
<object class="AdwPreferencesGroup">
<property name="title" translatable="yes">Images</property>
<child>
<object class="AdwSwitchRow" id="cache_images_row">
<property name="title" translatable="yes">Cache Images</property>
<property name="subtitle" translatable="yes">Download images for offline reading (skipped on metered connections)</property>
</object>
</child>
</object>
</child>
<child>
<object class="AdwPreferencesGroup">
<property name="title" translatable="yes">Content Filters</property>

90
src/image_cache.rs Normal file
View file

@ -0,0 +1,90 @@
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use crate::model::Article;
fn images_dir() -> PathBuf {
glib::user_data_dir()
.join("net.jeena.FeedTheMonkey")
.join("images")
}
fn url_to_filename(url: &str) -> String {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
url.hash(&mut hasher);
let hash = format!("{:016x}", hasher.finish());
// Preserve extension so WebKit can detect the content type.
let ext = url.split('?').next()
.and_then(|u| u.rsplit('.').next())
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
.unwrap_or("");
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
}
/// Download all remote images in every article and rewrite their src
/// attributes to file:// URIs so articles render offline.
pub async fn process(articles: Vec<Article>) -> Vec<Article> {
let dir = images_dir();
std::fs::create_dir_all(&dir).ok();
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.unwrap_or_default();
let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap();
let mut out = Vec::with_capacity(articles.len());
for mut article in articles {
let content = article.content.clone();
let mut rewritten = content.clone();
for cap in re.captures_iter(&content) {
let url = &cap[1];
let filename = url_to_filename(url);
let path = dir.join(&filename);
if !path.exists() {
if let Ok(resp) = client.get(url).send().await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&path, &bytes).ok();
}
}
}
if path.exists() {
let file_uri = format!("file://{}", path.display());
rewritten = rewritten.replace(
&format!("src=\"{}\"", url),
&format!("src=\"{}\"", file_uri),
);
}
}
article.content = rewritten;
out.push(article);
}
out
}
/// Remove cached image files that are no longer referenced by any article.
pub fn cleanup(articles: &[Article]) {
let dir = images_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return };
let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap();
let mut referenced: HashSet<String> = HashSet::new();
for article in articles {
for cap in re.captures_iter(&article.content) {
referenced.insert(cap[1].to_string());
}
}
for entry in entries.filter_map(|e| e.ok()) {
let fname = entry.file_name().to_string_lossy().to_string();
if !referenced.contains(&fname) {
std::fs::remove_file(entry.path()).ok();
}
}
}

View file

@ -1,6 +1,7 @@
mod api;
mod app;
mod cache;
mod image_cache;
mod filters;
mod preferences_dialog;
mod article_row;

View file

@ -22,6 +22,8 @@ pub mod imp {
#[derive(CompositeTemplate, Default)]
#[template(resource = "/net/jeena/FeedTheMonkey/ui/preferences_dialog.ui")]
pub struct PreferencesDialog {
#[template_child]
pub cache_images_row: TemplateChild<libadwaita::SwitchRow>,
#[template_child]
pub filters_text_view: TemplateChild<gtk4::TextView>,
}
@ -44,25 +46,28 @@ pub mod imp {
impl ObjectImpl for PreferencesDialog {
fn constructed(&self) {
self.parent_constructed();
self.load();
// Save on every text change
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
// Cache images switch
self.cache_images_row.set_active(settings.boolean("cache-images"));
let s = settings.clone();
self.cache_images_row.connect_active_notify(move |row| {
s.set_boolean("cache-images", row.is_active()).ok();
});
// Content filters text view
self.filters_text_view.buffer().set_text(&settings.string("content-filters"));
let obj_weak = self.obj().downgrade();
self.filters_text_view.buffer().connect_changed(move |_| {
if let Some(obj) = obj_weak.upgrade() {
obj.imp().save();
obj.imp().save_filters();
}
});
}
}
impl PreferencesDialog {
fn load(&self) {
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
let text = settings.string("content-filters");
self.filters_text_view.buffer().set_text(&text);
}
fn save(&self) {
fn save_filters(&self) {
let buf = self.filters_text_view.buffer();
let text = buf.text(&buf.start_iter(), &buf.end_iter(), false);
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");

View file

@ -584,9 +584,22 @@ pub mod imp {
}
let saved_id = self.current_article_id.borrow().clone();
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
let cache_images = settings.boolean("cache-images")
&& !gio::NetworkMonitor::default().is_network_metered();
let win_weak = self.obj().downgrade();
crate::runtime::spawn(
async move { api.fetch_unread().await },
async move {
let articles = api.fetch_unread().await?;
let articles = if cache_images {
crate::image_cache::process(articles).await
} else {
articles
};
Ok::<_, String>(articles)
},
move |result| {
let Some(win) = win_weak.upgrade() else { return };
let imp = win.imp();
@ -602,9 +615,10 @@ pub mod imp {
store.append(&ArticleObject::new(a.clone()));
}
// Save cache with updated article list.
// Save cache and clean up unreferenced images.
let sel_id = saved_id.as_deref().unwrap_or("");
crate::cache::save(&articles, sel_id);
crate::image_cache::cleanup(&articles);
if store.n_items() == 0 {
imp.sidebar_content.set_visible_child_name("empty");