use std::collections::HashSet; use std::hash::{Hash, Hasher}; use std::path::PathBuf; use crate::model::Article; fn images_dir() -> PathBuf { glib::user_cache_dir() .join("net.jeena.FeedTheMonkey") .join("images") } fn url_to_filename(url: &str) -> String { let mut hasher = std::collections::hash_map::DefaultHasher::new(); url.hash(&mut hasher); let hash = format!("{:016x}", hasher.finish()); let ext = url.split('?').next() .and_then(|u| u.rsplit('.').next()) .filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric())) .unwrap_or(""); if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) } } fn percent_encode(s: &str) -> String { let mut out = String::with_capacity(s.len()); for b in s.bytes() { if b.is_ascii_alphanumeric() || b"-.~_".contains(&b) { out.push(b as char); } else { out.push_str(&format!("%{:02X}", b)); } } out } fn percent_decode(s: &str) -> String { let bytes = s.as_bytes(); let mut out = Vec::with_capacity(bytes.len()); let mut i = 0; while i < bytes.len() { if bytes[i] == b'%' && i + 2 < bytes.len() { if let Ok(b) = u8::from_str_radix( std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or(""), 16, ) { out.push(b); i += 3; continue; } } out.push(bytes[i]); i += 1; } String::from_utf8_lossy(&out).into_owned() } const SCHEME: &str = "feedthemonkey-img"; const SCHEME_PREFIX: &str = "feedthemonkey-img:///"; fn original_url_to_scheme_uri(url: &str) -> String { format!("{}{}", SCHEME_PREFIX, percent_encode(url)) } /// Register the feedthemonkey-img URI scheme handler on the WebView's context. /// Call this in setup_webview() before load_html(). pub fn register_scheme(ctx: &webkit6::WebContext) { ctx.register_uri_scheme(SCHEME, |request| { let uri = request.uri().unwrap_or_default().to_string(); let encoded = uri.strip_prefix(SCHEME_PREFIX).unwrap_or(&uri); let original_url = percent_decode(encoded); let path = images_dir().join(url_to_filename(&original_url)); if path.exists() { serve_file(request.clone(), path); return; } // Not in cache — download in tokio, serve back on the main thread. let request = request.clone(); let (tx, rx) = tokio::sync::oneshot::channel::(); let path_dl = path.clone(); crate::runtime::spawn_bg(async move { let ok = async { let bytes = reqwest::get(&original_url).await?.bytes().await?; std::fs::create_dir_all(path_dl.parent().unwrap()).ok(); std::fs::write(&path_dl, &bytes).ok(); Ok::<_, reqwest::Error>(()) } .await .is_ok(); let _ = tx.send(ok); }); // spawn_future_local runs on the GLib main loop so the non-Send // URISchemeRequest can be safely held across the await point. glib::spawn_future_local(async move { if rx.await.unwrap_or(false) && path.exists() { serve_file(request, path); } else { request.finish_error(&mut glib::Error::new( gio::IOErrorEnum::NotFound, "Image unavailable", )); } }); }); } fn serve_file(request: webkit6::URISchemeRequest, path: PathBuf) { match std::fs::read(&path) { Ok(data) => { let mime = path.extension() .and_then(|e| e.to_str()) .map(|ext| match ext.to_ascii_lowercase().as_str() { "jpg" | "jpeg" => "image/jpeg", "png" => "image/png", "gif" => "image/gif", "webp" => "image/webp", "svg" => "image/svg+xml", "avif" => "image/avif", _ => "application/octet-stream", }) .unwrap_or("application/octet-stream"); let stream = gio::MemoryInputStream::from_bytes(&glib::Bytes::from_owned(data)); request.finish(&stream, -1, Some(mime)); } Err(_) => { request.finish_error(&mut glib::Error::new( gio::IOErrorEnum::NotFound, "Image not found", )); } } } /// Download all remote images in every article and rewrite their src attributes /// to feedthemonkey-img:// URIs so images are served through the cache handler, /// which re-downloads automatically on a cache miss. pub async fn process(articles: Vec

) -> Vec

{ let dir = images_dir(); std::fs::create_dir_all(&dir).ok(); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(30)) .build() .unwrap_or_default(); let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap(); let mut out = Vec::with_capacity(articles.len()); for mut article in articles { let content = article.content.clone(); let mut rewritten = content.clone(); for cap in re.captures_iter(&content) { let url = &cap[1]; let path = dir.join(url_to_filename(url)); if !path.exists() { if let Ok(resp) = client.get(url).send().await { if let Ok(bytes) = resp.bytes().await { std::fs::write(&path, &bytes).ok(); } } } // Always rewrite to the scheme URI so the handler can re-download // if the cache directory is ever deleted. rewritten = rewritten.replace( &format!("src=\"{}\"", url), &format!("src=\"{}\"", original_url_to_scheme_uri(url)), ); } article.content = rewritten; out.push(article); } out } /// Remove cached image files no longer referenced by any article. pub fn cleanup(articles: &[Article]) { let dir = images_dir(); let Ok(entries) = std::fs::read_dir(&dir) else { return }; let re = regex::Regex::new( &format!(r#"src="{}([^"]+)""#, regex::escape(SCHEME_PREFIX)), ) .unwrap(); let mut referenced: HashSet = HashSet::new(); for article in articles { for cap in re.captures_iter(&article.content) { referenced.insert(url_to_filename(&percent_decode(&cap[1]))); } } for entry in entries.filter_map(|e| e.ok()) { let fname = entry.file_name().to_string_lossy().to_string(); if !referenced.contains(&fname) { std::fs::remove_file(entry.path()).ok(); } } }