image-cache: use custom URI scheme for transparent cache-miss re-download
Instead of rewriting img src to file:// URIs, rewrite to a custom feedthemonkey-img:/// scheme. A WebKit URI scheme handler is registered on the WebView's WebContext that: - Serves the image directly from the cache directory if present. - On a cache miss (e.g. after the user deletes ~/.cache), spawns a reqwest download in the tokio runtime, then resumes on the GLib main loop via glib::spawn_future_local and serves the freshly downloaded bytes — all transparent to the WebView. This means deleting the cache directory never results in permanently broken images; they are silently re-fetched on first access.
This commit is contained in:
parent
8e21c80a33
commit
00700c3211
2 changed files with 133 additions and 17 deletions
|
|
@ -14,7 +14,6 @@ fn url_to_filename(url: &str) -> String {
|
||||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||||
url.hash(&mut hasher);
|
url.hash(&mut hasher);
|
||||||
let hash = format!("{:016x}", hasher.finish());
|
let hash = format!("{:016x}", hasher.finish());
|
||||||
// Preserve extension so WebKit can detect the content type.
|
|
||||||
let ext = url.split('?').next()
|
let ext = url.split('?').next()
|
||||||
.and_then(|u| u.rsplit('.').next())
|
.and_then(|u| u.rsplit('.').next())
|
||||||
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
|
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
|
||||||
|
|
@ -22,8 +21,122 @@ fn url_to_filename(url: &str) -> String {
|
||||||
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
|
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Download all remote images in every article and rewrite their src
|
fn percent_encode(s: &str) -> String {
|
||||||
/// attributes to file:// URIs so articles render offline.
|
let mut out = String::with_capacity(s.len());
|
||||||
|
for b in s.bytes() {
|
||||||
|
if b.is_ascii_alphanumeric() || b"-.~_".contains(&b) {
|
||||||
|
out.push(b as char);
|
||||||
|
} else {
|
||||||
|
out.push_str(&format!("%{:02X}", b));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn percent_decode(s: &str) -> String {
|
||||||
|
let bytes = s.as_bytes();
|
||||||
|
let mut out = Vec::with_capacity(bytes.len());
|
||||||
|
let mut i = 0;
|
||||||
|
while i < bytes.len() {
|
||||||
|
if bytes[i] == b'%' && i + 2 < bytes.len() {
|
||||||
|
if let Ok(b) = u8::from_str_radix(
|
||||||
|
std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or(""),
|
||||||
|
16,
|
||||||
|
) {
|
||||||
|
out.push(b);
|
||||||
|
i += 3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.push(bytes[i]);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
String::from_utf8_lossy(&out).into_owned()
|
||||||
|
}
|
||||||
|
|
||||||
|
const SCHEME: &str = "feedthemonkey-img";
|
||||||
|
const SCHEME_PREFIX: &str = "feedthemonkey-img:///";
|
||||||
|
|
||||||
|
fn original_url_to_scheme_uri(url: &str) -> String {
|
||||||
|
format!("{}{}", SCHEME_PREFIX, percent_encode(url))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register the feedthemonkey-img URI scheme handler on the WebView's context.
|
||||||
|
/// Call this in setup_webview() before load_html().
|
||||||
|
pub fn register_scheme(ctx: &webkit6::WebContext) {
|
||||||
|
ctx.register_uri_scheme(SCHEME, |request| {
|
||||||
|
let uri = request.uri().unwrap_or_default().to_string();
|
||||||
|
let encoded = uri.strip_prefix(SCHEME_PREFIX).unwrap_or(&uri);
|
||||||
|
let original_url = percent_decode(encoded);
|
||||||
|
let path = images_dir().join(url_to_filename(&original_url));
|
||||||
|
|
||||||
|
if path.exists() {
|
||||||
|
serve_file(request.clone(), path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not in cache — download in tokio, serve back on the main thread.
|
||||||
|
let request = request.clone();
|
||||||
|
let (tx, rx) = tokio::sync::oneshot::channel::<bool>();
|
||||||
|
let path_dl = path.clone();
|
||||||
|
|
||||||
|
crate::runtime::spawn_bg(async move {
|
||||||
|
let ok = async {
|
||||||
|
let bytes = reqwest::get(&original_url).await?.bytes().await?;
|
||||||
|
std::fs::create_dir_all(path_dl.parent().unwrap()).ok();
|
||||||
|
std::fs::write(&path_dl, &bytes).ok();
|
||||||
|
Ok::<_, reqwest::Error>(())
|
||||||
|
}
|
||||||
|
.await
|
||||||
|
.is_ok();
|
||||||
|
let _ = tx.send(ok);
|
||||||
|
});
|
||||||
|
|
||||||
|
// spawn_future_local runs on the GLib main loop so the non-Send
|
||||||
|
// URISchemeRequest can be safely held across the await point.
|
||||||
|
glib::spawn_future_local(async move {
|
||||||
|
if rx.await.unwrap_or(false) && path.exists() {
|
||||||
|
serve_file(request, path);
|
||||||
|
} else {
|
||||||
|
request.finish_error(&mut glib::Error::new(
|
||||||
|
gio::IOErrorEnum::NotFound,
|
||||||
|
"Image unavailable",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serve_file(request: webkit6::URISchemeRequest, path: PathBuf) {
|
||||||
|
match std::fs::read(&path) {
|
||||||
|
Ok(data) => {
|
||||||
|
let mime = path.extension()
|
||||||
|
.and_then(|e| e.to_str())
|
||||||
|
.map(|ext| match ext.to_ascii_lowercase().as_str() {
|
||||||
|
"jpg" | "jpeg" => "image/jpeg",
|
||||||
|
"png" => "image/png",
|
||||||
|
"gif" => "image/gif",
|
||||||
|
"webp" => "image/webp",
|
||||||
|
"svg" => "image/svg+xml",
|
||||||
|
"avif" => "image/avif",
|
||||||
|
_ => "application/octet-stream",
|
||||||
|
})
|
||||||
|
.unwrap_or("application/octet-stream");
|
||||||
|
let stream = gio::MemoryInputStream::from_bytes(&glib::Bytes::from_owned(data));
|
||||||
|
request.finish(&stream, -1, Some(mime));
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
request.finish_error(&mut glib::Error::new(
|
||||||
|
gio::IOErrorEnum::NotFound,
|
||||||
|
"Image not found",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Download all remote images in every article and rewrite their src attributes
|
||||||
|
/// to feedthemonkey-img:// URIs so images are served through the cache handler,
|
||||||
|
/// which re-downloads automatically on a cache miss.
|
||||||
pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
||||||
let dir = images_dir();
|
let dir = images_dir();
|
||||||
std::fs::create_dir_all(&dir).ok();
|
std::fs::create_dir_all(&dir).ok();
|
||||||
|
|
@ -42,9 +155,7 @@ pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
||||||
|
|
||||||
for cap in re.captures_iter(&content) {
|
for cap in re.captures_iter(&content) {
|
||||||
let url = &cap[1];
|
let url = &cap[1];
|
||||||
let filename = url_to_filename(url);
|
let path = dir.join(url_to_filename(url));
|
||||||
let path = dir.join(&filename);
|
|
||||||
|
|
||||||
if !path.exists() {
|
if !path.exists() {
|
||||||
if let Ok(resp) = client.get(url).send().await {
|
if let Ok(resp) = client.get(url).send().await {
|
||||||
if let Ok(bytes) = resp.bytes().await {
|
if let Ok(bytes) = resp.bytes().await {
|
||||||
|
|
@ -52,14 +163,12 @@ pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Always rewrite to the scheme URI so the handler can re-download
|
||||||
if path.exists() {
|
// if the cache directory is ever deleted.
|
||||||
let file_uri = format!("file://{}", path.display());
|
rewritten = rewritten.replace(
|
||||||
rewritten = rewritten.replace(
|
&format!("src=\"{}\"", url),
|
||||||
&format!("src=\"{}\"", url),
|
&format!("src=\"{}\"", original_url_to_scheme_uri(url)),
|
||||||
&format!("src=\"{}\"", file_uri),
|
);
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
article.content = rewritten;
|
article.content = rewritten;
|
||||||
|
|
@ -68,16 +177,20 @@ pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove cached image files that are no longer referenced by any article.
|
/// Remove cached image files no longer referenced by any article.
|
||||||
pub fn cleanup(articles: &[Article]) {
|
pub fn cleanup(articles: &[Article]) {
|
||||||
let dir = images_dir();
|
let dir = images_dir();
|
||||||
let Ok(entries) = std::fs::read_dir(&dir) else { return };
|
let Ok(entries) = std::fs::read_dir(&dir) else { return };
|
||||||
|
|
||||||
let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap();
|
let re = regex::Regex::new(
|
||||||
|
&format!(r#"src="{}([^"]+)""#, regex::escape(SCHEME_PREFIX)),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let mut referenced: HashSet<String> = HashSet::new();
|
let mut referenced: HashSet<String> = HashSet::new();
|
||||||
for article in articles {
|
for article in articles {
|
||||||
for cap in re.captures_iter(&article.content) {
|
for cap in re.captures_iter(&article.content) {
|
||||||
referenced.insert(cap[1].to_string());
|
referenced.insert(url_to_filename(&percent_decode(&cap[1])));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -245,6 +245,9 @@ pub mod imp {
|
||||||
|
|
||||||
fn setup_webview(&self) {
|
fn setup_webview(&self) {
|
||||||
let wv = &*self.web_view;
|
let wv = &*self.web_view;
|
||||||
|
if let Some(ctx) = wv.web_context() {
|
||||||
|
crate::image_cache::register_scheme(&ctx);
|
||||||
|
}
|
||||||
|
|
||||||
// Load content.html from GResource, inlining the CSS so WebKit
|
// Load content.html from GResource, inlining the CSS so WebKit
|
||||||
// doesn't need to fetch it over a custom scheme.
|
// doesn't need to fetch it over a custom scheme.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue