feature: cache article images for offline reading

After fetching articles, all remote images referenced in article content are downloaded to ~/.local/share/net.jeena.FeedTheMonkey/images/ and their src attributes rewritten to file:// URIs. Subsequent loads of the same article (including from the cache on the next startup) display images without a network connection. Metered-connection awareness: image caching is skipped automatically when GIO reports the network connection as metered, regardless of the preference setting. A "Cache Images" toggle in Preferences lets the user disable caching entirely (stored in the cache-images GSettings key). After each refresh, images no longer referenced by any article in the current unread list are deleted from the cache directory to prevent unbounded disk growth.
2026-03-21 01:19:49 +00:00 · 2026-03-21 01:19:49 +00:00 · fda441bebd
commit fda441bebd
parent 183191727b
9 changed files with 186 additions and 12 deletions
--- a/src/image_cache.rs
+++ b/src/image_cache.rs
@ -0,0 +1,90 @@
+use std::collections::HashSet;
+use std::hash::{Hash, Hasher};
+use std::path::PathBuf;
+
+use crate::model::Article;
+
+fn images_dir() -> PathBuf {
+    glib::user_data_dir()
+        .join("net.jeena.FeedTheMonkey")
+        .join("images")
+}
+
+fn url_to_filename(url: &str) -> String {
+    let mut hasher = std::collections::hash_map::DefaultHasher::new();
+    url.hash(&mut hasher);
+    let hash = format!("{:016x}", hasher.finish());
+    // Preserve extension so WebKit can detect the content type.
+    let ext = url.split('?').next()
+        .and_then(|u| u.rsplit('.').next())
+        .filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
+        .unwrap_or("");
+    if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
+}
+
+/// Download all remote images in every article and rewrite their src
+/// attributes to file:// URIs so articles render offline.
+pub async fn process(articles: Vec<Article>) -> Vec<Article> {
+    let dir = images_dir();
+    std::fs::create_dir_all(&dir).ok();
+
+    let client = reqwest::Client::builder()
+        .timeout(std::time::Duration::from_secs(30))
+        .build()
+        .unwrap_or_default();
+
+    let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap();
+
+    let mut out = Vec::with_capacity(articles.len());
+    for mut article in articles {
+        let content = article.content.clone();
+        let mut rewritten = content.clone();
+
+        for cap in re.captures_iter(&content) {
+            let url = &cap[1];
+            let filename = url_to_filename(url);
+            let path = dir.join(&filename);
+
+            if !path.exists() {
+                if let Ok(resp) = client.get(url).send().await {
+                    if let Ok(bytes) = resp.bytes().await {
+                        std::fs::write(&path, &bytes).ok();
+                    }
+                }
+            }
+
+            if path.exists() {
+                let file_uri = format!("file://{}", path.display());
+                rewritten = rewritten.replace(
+                    &format!("src=\"{}\"", url),
+                    &format!("src=\"{}\"", file_uri),
+                );
+            }
+        }
+
+        article.content = rewritten;
+        out.push(article);
+    }
+    out
+}
+
+/// Remove cached image files that are no longer referenced by any article.
+pub fn cleanup(articles: &[Article]) {
+    let dir = images_dir();
+    let Ok(entries) = std::fs::read_dir(&dir) else { return };
+
+    let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap();
+    let mut referenced: HashSet<String> = HashSet::new();
+    for article in articles {
+        for cap in re.captures_iter(&article.content) {
+            referenced.insert(cap[1].to_string());
+        }
+    }
+
+    for entry in entries.filter_map(|e| e.ok()) {
+        let fname = entry.file_name().to_string_lossy().to_string();
+        if !referenced.contains(&fname) {
+            std::fs::remove_file(entry.path()).ok();
+        }
+    }
+}