feature: cache article images for offline reading
After fetching articles, all remote images referenced in article content are downloaded to ~/.local/share/net.jeena.FeedTheMonkey/images/ and their src attributes rewritten to file:// URIs. Subsequent loads of the same article (including from the cache on the next startup) display images without a network connection. Metered-connection awareness: image caching is skipped automatically when GIO reports the network connection as metered, regardless of the preference setting. A "Cache Images" toggle in Preferences lets the user disable caching entirely (stored in the cache-images GSettings key). After each refresh, images no longer referenced by any article in the current unread list are deleted from the cache directory to prevent unbounded disk growth.
This commit is contained in:
parent
183191727b
commit
fda441bebd
9 changed files with 186 additions and 12 deletions
39
Cargo.lock
generated
39
Cargo.lock
generated
|
|
@ -2,6 +2,15 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
version = "1.1.2"
|
||||
|
|
@ -129,6 +138,7 @@ dependencies = [
|
|||
"gtk4",
|
||||
"libadwaita",
|
||||
"libsecret",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -1113,6 +1123,35 @@ dependencies = [
|
|||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.28"
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ webkit6 = { version = "0.6" }
|
|||
gio = { version = "0.22" }
|
||||
glib = { version = "0.22" }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
|
||||
regex = "1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
|
|
|
|||
|
|
@ -25,5 +25,9 @@
|
|||
<default>''</default>
|
||||
<summary>Content rewrite rules, one per line: domain from to [from to …]</summary>
|
||||
</key>
|
||||
<key name="cache-images" type="b">
|
||||
<default>true</default>
|
||||
<summary>Download and cache article images for offline reading (skipped on metered connections)</summary>
|
||||
</key>
|
||||
</schema>
|
||||
</schemalist>
|
||||
|
|
|
|||
|
|
@ -11,6 +11,15 @@ template $PreferencesDialog : Adw.Dialog {
|
|||
Adw.HeaderBar {}
|
||||
|
||||
Adw.PreferencesPage {
|
||||
Adw.PreferencesGroup {
|
||||
title: _("Images");
|
||||
|
||||
Adw.SwitchRow cache_images_row {
|
||||
title: _("Cache Images");
|
||||
subtitle: _("Download images for offline reading (skipped on metered connections)");
|
||||
}
|
||||
}
|
||||
|
||||
Adw.PreferencesGroup {
|
||||
title: _("Content Filters");
|
||||
description: _("One rule per line: domain find replace [find replace …]\n\nExample:\n www.imycomic.com -150x150.jpg .jpg");
|
||||
|
|
|
|||
|
|
@ -17,6 +17,17 @@ corresponding .blp file and regenerate this file with blueprint-compiler.
|
|||
</child>
|
||||
<child>
|
||||
<object class="AdwPreferencesPage">
|
||||
<child>
|
||||
<object class="AdwPreferencesGroup">
|
||||
<property name="title" translatable="yes">Images</property>
|
||||
<child>
|
||||
<object class="AdwSwitchRow" id="cache_images_row">
|
||||
<property name="title" translatable="yes">Cache Images</property>
|
||||
<property name="subtitle" translatable="yes">Download images for offline reading (skipped on metered connections)</property>
|
||||
</object>
|
||||
</child>
|
||||
</object>
|
||||
</child>
|
||||
<child>
|
||||
<object class="AdwPreferencesGroup">
|
||||
<property name="title" translatable="yes">Content Filters</property>
|
||||
|
|
|
|||
90
src/image_cache.rs
Normal file
90
src/image_cache.rs
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
use std::collections::HashSet;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::model::Article;
|
||||
|
||||
fn images_dir() -> PathBuf {
|
||||
glib::user_data_dir()
|
||||
.join("net.jeena.FeedTheMonkey")
|
||||
.join("images")
|
||||
}
|
||||
|
||||
fn url_to_filename(url: &str) -> String {
|
||||
let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
||||
url.hash(&mut hasher);
|
||||
let hash = format!("{:016x}", hasher.finish());
|
||||
// Preserve extension so WebKit can detect the content type.
|
||||
let ext = url.split('?').next()
|
||||
.and_then(|u| u.rsplit('.').next())
|
||||
.filter(|e| e.len() <= 5 && e.bytes().all(|b| b.is_ascii_alphanumeric()))
|
||||
.unwrap_or("");
|
||||
if ext.is_empty() { hash } else { format!("{}.{}", hash, ext) }
|
||||
}
|
||||
|
||||
/// Download all remote images in every article and rewrite their src
|
||||
/// attributes to file:// URIs so articles render offline.
|
||||
pub async fn process(articles: Vec<Article>) -> Vec<Article> {
|
||||
let dir = images_dir();
|
||||
std::fs::create_dir_all(&dir).ok();
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.unwrap_or_default();
|
||||
|
||||
let re = regex::Regex::new(r#"src="(https?://[^"]+)""#).unwrap();
|
||||
|
||||
let mut out = Vec::with_capacity(articles.len());
|
||||
for mut article in articles {
|
||||
let content = article.content.clone();
|
||||
let mut rewritten = content.clone();
|
||||
|
||||
for cap in re.captures_iter(&content) {
|
||||
let url = &cap[1];
|
||||
let filename = url_to_filename(url);
|
||||
let path = dir.join(&filename);
|
||||
|
||||
if !path.exists() {
|
||||
if let Ok(resp) = client.get(url).send().await {
|
||||
if let Ok(bytes) = resp.bytes().await {
|
||||
std::fs::write(&path, &bytes).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if path.exists() {
|
||||
let file_uri = format!("file://{}", path.display());
|
||||
rewritten = rewritten.replace(
|
||||
&format!("src=\"{}\"", url),
|
||||
&format!("src=\"{}\"", file_uri),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
article.content = rewritten;
|
||||
out.push(article);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Remove cached image files that are no longer referenced by any article.
|
||||
pub fn cleanup(articles: &[Article]) {
|
||||
let dir = images_dir();
|
||||
let Ok(entries) = std::fs::read_dir(&dir) else { return };
|
||||
|
||||
let re = regex::Regex::new(r#"src="file://[^"]+/images/([^"]+)""#).unwrap();
|
||||
let mut referenced: HashSet<String> = HashSet::new();
|
||||
for article in articles {
|
||||
for cap in re.captures_iter(&article.content) {
|
||||
referenced.insert(cap[1].to_string());
|
||||
}
|
||||
}
|
||||
|
||||
for entry in entries.filter_map(|e| e.ok()) {
|
||||
let fname = entry.file_name().to_string_lossy().to_string();
|
||||
if !referenced.contains(&fname) {
|
||||
std::fs::remove_file(entry.path()).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
mod api;
|
||||
mod app;
|
||||
mod cache;
|
||||
mod image_cache;
|
||||
mod filters;
|
||||
mod preferences_dialog;
|
||||
mod article_row;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ pub mod imp {
|
|||
#[derive(CompositeTemplate, Default)]
|
||||
#[template(resource = "/net/jeena/FeedTheMonkey/ui/preferences_dialog.ui")]
|
||||
pub struct PreferencesDialog {
|
||||
#[template_child]
|
||||
pub cache_images_row: TemplateChild<libadwaita::SwitchRow>,
|
||||
#[template_child]
|
||||
pub filters_text_view: TemplateChild<gtk4::TextView>,
|
||||
}
|
||||
|
|
@ -44,25 +46,28 @@ pub mod imp {
|
|||
impl ObjectImpl for PreferencesDialog {
|
||||
fn constructed(&self) {
|
||||
self.parent_constructed();
|
||||
self.load();
|
||||
// Save on every text change
|
||||
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
|
||||
|
||||
// Cache images switch
|
||||
self.cache_images_row.set_active(settings.boolean("cache-images"));
|
||||
let s = settings.clone();
|
||||
self.cache_images_row.connect_active_notify(move |row| {
|
||||
s.set_boolean("cache-images", row.is_active()).ok();
|
||||
});
|
||||
|
||||
// Content filters text view
|
||||
self.filters_text_view.buffer().set_text(&settings.string("content-filters"));
|
||||
let obj_weak = self.obj().downgrade();
|
||||
self.filters_text_view.buffer().connect_changed(move |_| {
|
||||
if let Some(obj) = obj_weak.upgrade() {
|
||||
obj.imp().save();
|
||||
obj.imp().save_filters();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl PreferencesDialog {
|
||||
fn load(&self) {
|
||||
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
|
||||
let text = settings.string("content-filters");
|
||||
self.filters_text_view.buffer().set_text(&text);
|
||||
}
|
||||
|
||||
fn save(&self) {
|
||||
fn save_filters(&self) {
|
||||
let buf = self.filters_text_view.buffer();
|
||||
let text = buf.text(&buf.start_iter(), &buf.end_iter(), false);
|
||||
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
|
||||
|
|
|
|||
|
|
@ -584,9 +584,22 @@ pub mod imp {
|
|||
}
|
||||
|
||||
let saved_id = self.current_article_id.borrow().clone();
|
||||
|
||||
let settings = gio::Settings::new("net.jeena.FeedTheMonkey");
|
||||
let cache_images = settings.boolean("cache-images")
|
||||
&& !gio::NetworkMonitor::default().is_network_metered();
|
||||
|
||||
let win_weak = self.obj().downgrade();
|
||||
crate::runtime::spawn(
|
||||
async move { api.fetch_unread().await },
|
||||
async move {
|
||||
let articles = api.fetch_unread().await?;
|
||||
let articles = if cache_images {
|
||||
crate::image_cache::process(articles).await
|
||||
} else {
|
||||
articles
|
||||
};
|
||||
Ok::<_, String>(articles)
|
||||
},
|
||||
move |result| {
|
||||
let Some(win) = win_weak.upgrade() else { return };
|
||||
let imp = win.imp();
|
||||
|
|
@ -602,9 +615,10 @@ pub mod imp {
|
|||
store.append(&ArticleObject::new(a.clone()));
|
||||
}
|
||||
|
||||
// Save cache with updated article list.
|
||||
// Save cache and clean up unreferenced images.
|
||||
let sel_id = saved_id.as_deref().unwrap_or("");
|
||||
crate::cache::save(&articles, sel_id);
|
||||
crate::image_cache::cleanup(&articles);
|
||||
|
||||
if store.n_items() == 0 {
|
||||
imp.sidebar_content.set_visible_child_name("empty");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue