feature: cache article images for offline reading

After fetching articles, all remote images referenced in article content
are downloaded to ~/.local/share/net.jeena.FeedTheMonkey/images/ and
their src attributes rewritten to file:// URIs. Subsequent loads of the
same article (including from the cache on the next startup) display
images without a network connection.

Metered-connection awareness: image caching is skipped automatically
when GIO reports the network connection as metered, regardless of the
preference setting.

A "Cache Images" toggle in Preferences lets the user disable caching
entirely (stored in the cache-images GSettings key).

After each refresh, images no longer referenced by any article in the
current unread list are deleted from the cache directory to prevent
unbounded disk growth.
This commit is contained in:
Jeena 2026-03-21 01:19:49 +00:00
parent 183191727b
commit fda441bebd
9 changed files with 186 additions and 12 deletions

90
src/image_cache.rs Normal file
View file

@ -0,0 +1,90 @@
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use crate::model::Article;
fn images_dir() -> PathBuf {
glib::user_data_dir()
.join("net.jeena.FeedTheMonkey")
.join("images")
}
fn url_to_filename(url: &str) -> String {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
url.hash(&mut hasher);
let hash = format!("{:016x}", hasher.finish());
// Preserve extension so WebKit can detect the content type.
let ext = url.split('?').next()
.and_then(|u| u.rsplit('.').next())
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
.unwrap_or("");
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
}
/// Download all remote images in every article and rewrite their src
/// attributes to file:// URIs so articles render offline.
pub async fn process(articles: Vec<Article>) -> Vec<Article> {
let dir = images_dir();
std::fs::create_dir_all(&dir).ok();
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.unwrap_or_default();
let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap();
let mut out = Vec::with_capacity(articles.len());
for mut article in articles {
let content = article.content.clone();
let mut rewritten = content.clone();
for cap in re.captures_iter(&content) {
let url = &cap[1];
let filename = url_to_filename(url);
let path = dir.join(&filename);
if !path.exists() {
if let Ok(resp) = client.get(url).send().await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&path, &bytes).ok();
}
}
}
if path.exists() {
let file_uri = format!("file://{}", path.display());
rewritten = rewritten.replace(
&format!("src=\"{}\"", url),
&format!("src=\"{}\"", file_uri),
);
}
}
article.content = rewritten;
out.push(article);
}
out
}
/// Remove cached image files that are no longer referenced by any article.
pub fn cleanup(articles: &[Article]) {
let dir = images_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return };
let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap();
let mut referenced: HashSet<String> = HashSet::new();
for article in articles {
for cap in re.captures_iter(&article.content) {
referenced.insert(cap[1].to_string());
}
}
for entry in entries.filter_map(|e| e.ok()) {
let fname = entry.file_name().to_string_lossy().to_string();
if !referenced.contains(&fname) {
std::fs::remove_file(entry.path()).ok();
}
}
}