FeedTheMonkey/src/image_cache.rs
Jeena 9bed643023 window: prefetch images and queue offline read/unread actions
After a successful article refresh, all images referenced in article
content are downloaded in the background so articles can be read
offline. The prefetch only runs when the cache-images setting is
enabled and the connection is not metered.

Read/unread state changes that fail to reach the server (e.g. when
offline) are now persisted to a local queue in
~/.cache/net.jeena.FeedTheMonkey/pending_sync.json. The queue is
flushed at the start of the next successful fetch.
2026-03-21 02:45:45 +00:00

211 lines
7.2 KiB
Rust

use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use crate::model::Article;
fn images_dir() -> PathBuf {
glib::user_cache_dir()
.join("net.jeena.FeedTheMonkey")
.join("images")
}
fn url_to_filename(url: &str) -> String {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
url.hash(&mut hasher);
let hash = format!("{:016x}", hasher.finish());
let ext = url.split('?').next()
.and_then(|u| u.rsplit('.').next())
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
.unwrap_or("");
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
}
fn percent_encode(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for b in s.bytes() {
if b.is_ascii_alphanumeric() || b"-.~_".contains(&b) {
out.push(b as char);
} else {
out.push_str(&format!("%{:02X}", b));
}
}
out
}
fn percent_decode(s: &str) -> String {
let bytes = s.as_bytes();
let mut out = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
if let Ok(b) = u8::from_str_radix(
std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or(""),
16,
) {
out.push(b);
i += 3;
continue;
}
}
out.push(bytes[i]);
i += 1;
}
String::from_utf8_lossy(&out).into_owned()
}
const SCHEME: &str = "feedthemonkey-img";
const SCHEME_PREFIX: &str = "feedthemonkey-img:///";
fn original_url_to_scheme_uri(url: &str) -> String {
format!("{}{}", SCHEME_PREFIX, percent_encode(url))
}
/// Register the feedthemonkey-img URI scheme handler on the WebView's context.
/// Call this in setup_webview() before load_html().
pub fn register_scheme(ctx: &webkit6::WebContext) {
ctx.register_uri_scheme(SCHEME, |request| {
let uri = request.uri().unwrap_or_default().to_string();
let encoded = uri.strip_prefix(SCHEME_PREFIX).unwrap_or(&uri);
let original_url = percent_decode(encoded);
let path = images_dir().join(url_to_filename(&original_url));
if path.exists() {
serve_file(request.clone(), path);
return;
}
// Not in cache — download in tokio, serve back on the main thread.
let request = request.clone();
let (tx, rx) = tokio::sync::oneshot::channel::<bool>();
let path_dl = path.clone();
crate::runtime::spawn_bg(async move {
let ok = async {
let bytes = reqwest::get(&original_url).await?.bytes().await?;
std::fs::create_dir_all(path_dl.parent().unwrap()).ok();
std::fs::write(&path_dl, &bytes).ok();
Ok::<_, reqwest::Error>(())
}
.await
.is_ok();
let _ = tx.send(ok);
});
// spawn_future_local runs on the GLib main loop so the non-Send
// URISchemeRequest can be safely held across the await point.
glib::spawn_future_local(async move {
if rx.await.unwrap_or(false) && path.exists() {
serve_file(request, path);
} else {
request.finish_error(&mut glib::Error::new(
gio::IOErrorEnum::NotFound,
"Image unavailable",
));
}
});
});
}
fn serve_file(request: webkit6::URISchemeRequest, path: PathBuf) {
match std::fs::read(&path) {
Ok(data) => {
let mime = path.extension()
.and_then(|e| e.to_str())
.map(|ext| match ext.to_ascii_lowercase().as_str() {
"jpg" | "jpeg" => "image/jpeg",
"png" => "image/png",
"gif" => "image/gif",
"webp" => "image/webp",
"svg" => "image/svg+xml",
"avif" => "image/avif",
_ => "application/octet-stream",
})
.unwrap_or("application/octet-stream");
let stream = gio::MemoryInputStream::from_bytes(&glib::Bytes::from_owned(data));
request.finish(&stream, -1, Some(mime));
}
Err(_) => {
request.finish_error(&mut glib::Error::new(
gio::IOErrorEnum::NotFound,
"Image not found",
));
}
}
}
/// Prefetch all images referenced in the articles into the cache directory.
/// Runs entirely in the background; already-cached files are skipped.
pub async fn prefetch(articles: Vec<Article>) {
let dir = images_dir();
std::fs::create_dir_all(&dir).ok();
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.unwrap_or_default();
let re = regex::Regex::new(&format!(r#"src="{}([^"]+)""#, regex::escape(SCHEME_PREFIX))).unwrap();
for article in &articles {
for cap in re.captures_iter(&article.content) {
let original_url = percent_decode(&cap[1]);
let path = dir.join(url_to_filename(&original_url));
if !path.exists() {
if let Ok(resp) = client.get(&original_url).send().await {
if let Ok(bytes) = resp.bytes().await {
std::fs::write(&path, &bytes).ok();
}
}
}
}
}
}
/// Rewrite all remote image src attributes to feedthemonkey-img:// URIs.
/// No network requests are made here — images are downloaded lazily by the
/// URI scheme handler the first time the WebView requests them, then cached.
pub fn process(articles: Vec<Article>) -> Vec<Article> {
let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap();
articles
.into_iter()
.map(|mut article| {
let content = article.content.clone();
let mut rewritten = content.clone();
for cap in re.captures_iter(&content) {
let url = &cap[1];
rewritten = rewritten.replace(
&format!("src=\"{}\"", url),
&format!("src=\"{}\"", original_url_to_scheme_uri(url)),
);
}
article.content = rewritten;
article
})
.collect()
}
/// Remove cached image files no longer referenced by any article.
pub fn cleanup(articles: &[Article]) {
let dir = images_dir();
let Ok(entries) = std::fs::read_dir(&dir) else { return };
let re = regex::Regex::new(
&format!(r#"src="{}([^"]+)""#, regex::escape(SCHEME_PREFIX)),
)
.unwrap();
let mut referenced: HashSet<String> = HashSet::new();
for article in articles {
for cap in re.captures_iter(&article.content) {
referenced.insert(url_to_filename(&percent_decode(&cap[1])));
}
}
for entry in entries.filter_map(|e| e.ok()) {
let fname = entry.file_name().to_string_lossy().to_string();
if !referenced.contains(&fname) {
std::fs::remove_file(entry.path()).ok();
}
}
}