diff --git a/src/image_cache.rs b/src/image_cache.rs index 6d89209..2d87899 100644 --- a/src/image_cache.rs +++ b/src/image_cache.rs @@ -14,7 +14,6 @@ fn url_to_filename(url: &str) -> String { let mut hasher = std::collections::hash_map::DefaultHasher::new(); url.hash(&mut hasher); let hash = format!("{:016x}", hasher.finish()); - // Preserve extension so WebKit can detect the content type. let ext = url.split('?').next() .and_then(|u| u.rsplit('.').next()) .filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric())) @@ -22,8 +21,122 @@ fn url_to_filename(url: &str) -> String { if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) } } -/// Download all remote images in every article and rewrite their src -/// attributes to file:// URIs so articles render offline. +fn percent_encode(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for b in s.bytes() { + if b.is_ascii_alphanumeric() || b"-.~_".contains(&b) { + out.push(b as char); + } else { + out.push_str(&format!("%{:02X}", b)); + } + } + out +} + +fn percent_decode(s: &str) -> String { + let bytes = s.as_bytes(); + let mut out = Vec::with_capacity(bytes.len()); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' && i + 2 < bytes.len() { + if let Ok(b) = u8::from_str_radix( + std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or(""), + 16, + ) { + out.push(b); + i += 3; + continue; + } + } + out.push(bytes[i]); + i += 1; + } + String::from_utf8_lossy(&out).into_owned() +} + +const SCHEME: &str = "feedthemonkey-img"; +const SCHEME_PREFIX: &str = "feedthemonkey-img:///"; + +fn original_url_to_scheme_uri(url: &str) -> String { + format!("{}{}", SCHEME_PREFIX, percent_encode(url)) +} + +/// Register the feedthemonkey-img URI scheme handler on the WebView's context. +/// Call this in setup_webview() before load_html(). +pub fn register_scheme(ctx: &webkit6::WebContext) { + ctx.register_uri_scheme(SCHEME, |request| { + let uri = request.uri().unwrap_or_default().to_string(); + let encoded = uri.strip_prefix(SCHEME_PREFIX).unwrap_or(&uri); + let original_url = percent_decode(encoded); + let path = images_dir().join(url_to_filename(&original_url)); + + if path.exists() { + serve_file(request.clone(), path); + return; + } + + // Not in cache — download in tokio, serve back on the main thread. + let request = request.clone(); + let (tx, rx) = tokio::sync::oneshot::channel::(); + let path_dl = path.clone(); + + crate::runtime::spawn_bg(async move { + let ok = async { + let bytes = reqwest::get(&original_url).await?.bytes().await?; + std::fs::create_dir_all(path_dl.parent().unwrap()).ok(); + std::fs::write(&path_dl, &bytes).ok(); + Ok::<_, reqwest::Error>(()) + } + .await + .is_ok(); + let _ = tx.send(ok); + }); + + // spawn_future_local runs on the GLib main loop so the non-Send + // URISchemeRequest can be safely held across the await point. + glib::spawn_future_local(async move { + if rx.await.unwrap_or(false) && path.exists() { + serve_file(request, path); + } else { + request.finish_error(&mut glib::Error::new( + gio::IOErrorEnum::NotFound, + "Image unavailable", + )); + } + }); + }); +} + +fn serve_file(request: webkit6::URISchemeRequest, path: PathBuf) { + match std::fs::read(&path) { + Ok(data) => { + let mime = path.extension() + .and_then(|e| e.to_str()) + .map(|ext| match ext.to_ascii_lowercase().as_str() { + "jpg" | "jpeg" => "image/jpeg", + "png" => "image/png", + "gif" => "image/gif", + "webp" => "image/webp", + "svg" => "image/svg+xml", + "avif" => "image/avif", + _ => "application/octet-stream", + }) + .unwrap_or("application/octet-stream"); + let stream = gio::MemoryInputStream::from_bytes(&glib::Bytes::from_owned(data)); + request.finish(&stream, -1, Some(mime)); + } + Err(_) => { + request.finish_error(&mut glib::Error::new( + gio::IOErrorEnum::NotFound, + "Image not found", + )); + } + } +} + +/// Download all remote images in every article and rewrite their src attributes +/// to feedthemonkey-img:// URIs so images are served through the cache handler, +/// which re-downloads automatically on a cache miss. pub async fn process(articles: Vec
) -> Vec
{ let dir = images_dir(); std::fs::create_dir_all(&dir).ok(); @@ -42,9 +155,7 @@ pub async fn process(articles: Vec
) -> Vec
{ for cap in re.captures_iter(&content) { let url = &cap[1]; - let filename = url_to_filename(url); - let path = dir.join(&filename); - + let path = dir.join(url_to_filename(url)); if !path.exists() { if let Ok(resp) = client.get(url).send().await { if let Ok(bytes) = resp.bytes().await { @@ -52,14 +163,12 @@ pub async fn process(articles: Vec
) -> Vec
{ } } } - - if path.exists() { - let file_uri = format!("file://{}", path.display()); - rewritten = rewritten.replace( - &format!("src=\"{}\"", url), - &format!("src=\"{}\"", file_uri), - ); - } + // Always rewrite to the scheme URI so the handler can re-download + // if the cache directory is ever deleted. + rewritten = rewritten.replace( + &format!("src=\"{}\"", url), + &format!("src=\"{}\"", original_url_to_scheme_uri(url)), + ); } article.content = rewritten; @@ -68,16 +177,20 @@ pub async fn process(articles: Vec
) -> Vec
{ out } -/// Remove cached image files that are no longer referenced by any article. +/// Remove cached image files no longer referenced by any article. pub fn cleanup(articles: &[Article]) { let dir = images_dir(); let Ok(entries) = std::fs::read_dir(&dir) else { return }; - let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap(); + let re = regex::Regex::new( + &format!(r#"src="{}([^"]+)""#, regex::escape(SCHEME_PREFIX)), + ) + .unwrap(); + let mut referenced: HashSet = HashSet::new(); for article in articles { for cap in re.captures_iter(&article.content) { - referenced.insert(cap[1].to_string()); + referenced.insert(url_to_filename(&percent_decode(&cap[1]))); } } diff --git a/src/window.rs b/src/window.rs index cc2c4ba..95ba8ea 100644 --- a/src/window.rs +++ b/src/window.rs @@ -245,6 +245,9 @@ pub mod imp { fn setup_webview(&self) { let wv = &*self.web_view; + if let Some(ctx) = wv.web_context() { + crate::image_cache::register_scheme(&ctx); + } // Load content.html from GResource, inlining the CSS so WebKit // doesn't need to fetch it over a custom scheme.