feature: cache article images for offline reading
After fetching articles, all remote images referenced in article content are downloaded to ~/.local/share/net.jeena.FeedTheMonkey/images/ and their src attributes rewritten to file:// URIs. Subsequent loads of the same article (including from the cache on the next startup) display images without a network connection. Metered-connection awareness: image caching is skipped automatically when GIO reports the network connection as metered, regardless of the preference setting. A "Cache Images" toggle in Preferences lets the user disable caching entirely (stored in the cache-images GSettings key). After each refresh, images no longer referenced by any article in the current unread list are deleted from the cache directory to prevent unbounded disk growth.
This commit is contained in:
parent
183191727b
commit
fda441bebd
9 changed files with 186 additions and 12 deletions
90
src/image_cache.rs
Normal file
90
src/image_cache.rs
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
use std::collections::HashSet;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::model::Article;
|
||||
|
||||
fn images_dir() -> PathBuf {
|
||||
glib::user_data_dir()
|
||||
.join("net.jeena.FeedTheMonkey")
|
||||
.join("images")
|
||||
}
|
||||
|
||||
fn url_to_filename(url: &str) -> String {
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
url.hash(&mut hasher);
|
||||
let hash = format!("{:016x}", hasher.finish());
|
||||
// Preserve extension so WebKit can detect the content type.
|
||||
let ext = url.split('?').next()
|
||||
.and_then(|u| u.rsplit('.').next())
|
||||
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
|
||||
.unwrap_or("");
|
||||
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
|
||||
}
|
||||
|
||||
/// Download all remote images in every article and rewrite their src
|
||||
/// attributes to file:// URIs so articles render offline.
|
||||
pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
||||
let dir = images_dir();
|
||||
std::fs::create_dir_all(&dir).ok();
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.unwrap_or_default();
|
||||
|
||||
let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap();
|
||||
|
||||
let mut out = Vec::with_capacity(articles.len());
|
||||
for mut article in articles {
|
||||
let content = article.content.clone();
|
||||
let mut rewritten = content.clone();
|
||||
|
||||
for cap in re.captures_iter(&content) {
|
||||
let url = &cap[1];
|
||||
let filename = url_to_filename(url);
|
||||
let path = dir.join(&filename);
|
||||
|
||||
if !path.exists() {
|
||||
if let Ok(resp) = client.get(url).send().await {
|
||||
if let Ok(bytes) = resp.bytes().await {
|
||||
std::fs::write(&path, &bytes).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if path.exists() {
|
||||
let file_uri = format!("file://{}", path.display());
|
||||
rewritten = rewritten.replace(
|
||||
&format!("src=\"{}\"", url),
|
||||
&format!("src=\"{}\"", file_uri),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
article.content = rewritten;
|
||||
out.push(article);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Remove cached image files that are no longer referenced by any article.
|
||||
pub fn cleanup(articles: &[Article]) {
|
||||
let dir = images_dir();
|
||||
let Ok(entries) = std::fs::read_dir(&dir) else { return };
|
||||
|
||||
let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap();
|
||||
let mut referenced: HashSet<String> = HashSet::new();
|
||||
for article in articles {
|
||||
for cap in re.captures_iter(&article.content) {
|
||||
referenced.insert(cap[1].to_string());
|
||||
}
|
||||
}
|
||||
|
||||
for entry in entries.filter_map(|e| e.ok()) {
|
||||
let fname = entry.file_name().to_string_lossy().to_string();
|
||||
if !referenced.contains(&fname) {
|
||||
std::fs::remove_file(entry.path()).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue