From aec4756c6f531ff54deba55fab5bf65d46549c12 Mon Sep 17 00:00:00 2001 From: slonkazoid Date: Tue, 24 Dec 2024 16:32:30 +0300 Subject: [PATCH] change how the cache works --- src/post/blag.rs | 36 +++++++++++++------ src/post/cache.rs | 72 ++++++++++++++++++++++++-------------- src/post/markdown_posts.rs | 50 ++++++++++++++------------ 3 files changed, 99 insertions(+), 59 deletions(-) diff --git a/src/post/blag.rs b/src/post/blag.rs index 56e6794..35821e1 100644 --- a/src/post/blag.rs +++ b/src/post/blag.rs @@ -82,7 +82,6 @@ impl Blag { name: Arc, path: impl AsRef, query_json: String, - dont_cache: bool, ) -> Result<(PostMetadata, String, (Duration, Duration), bool), PostError> { let start = Instant::now(); @@ -91,7 +90,6 @@ impl Blag { let mut cmd = tokio::process::Command::new(&*self.blag_bin) .arg(path.as_ref()) .env("BLAG_QUERY", query_json) - .env("DONT_CACHE", if dont_cache { "1" } else { "" }) .stdout(Stdio::piped()) .spawn() .map_err(|err| { @@ -234,11 +232,10 @@ impl PostManager for Blag { let mut hasher = DefaultHasher::new(); query_json.hash(&mut hasher); let query_hash = hasher.finish(); - let suggest_dont_cache = !query.is_empty(); let post = if let Some(cache) = &self.cache { if let Some(CacheValue { meta, body, .. }) = - cache.lookup(&name, mtime, query_hash).await + cache.lookup(name.clone(), mtime, query_hash).await { ReturnedPost::Rendered { meta, @@ -246,9 +243,8 @@ impl PostManager for Blag { perf: RenderStats::Cached(start.elapsed()), } } else { - let (meta, content, (parsed, rendered), dont_cache) = self - .render(name.clone(), path, query_json, suggest_dont_cache) - .await?; + let (meta, content, (parsed, rendered), dont_cache) = + self.render(name.clone(), path, query_json).await?; let body = content.into(); if !dont_cache { @@ -270,9 +266,8 @@ impl PostManager for Blag { } } } else { - let (meta, content, (parsed, rendered), ..) = self - .render(name, path, query_json, suggest_dont_cache) - .await?; + let (meta, content, (parsed, rendered), ..) = + self.render(name, path, query_json).await?; let total = start.elapsed(); ReturnedPost::Rendered { @@ -293,6 +288,27 @@ impl PostManager for Blag { Ok(post) } + async fn cleanup(&self) { + if let Some(cache) = &self.cache { + cache + .retain(|key, value| { + let mtime = std::fs::metadata( + self.root + .join(self.as_raw(&key.name).unwrap_or_else(|| unreachable!())), + ) + .ok() + .and_then(|metadata| metadata.modified().ok()) + .map(|mtime| as_secs(&mtime)); + + match mtime { + Some(mtime) => mtime <= value.mtime, + None => false, + } + }) + .await + } + } + fn is_raw(&self, name: &str) -> bool { name.ends_with(".sh") } diff --git a/src/post/cache.rs b/src/post/cache.rs index 17bd5ff..0ad9083 100644 --- a/src/post/cache.rs +++ b/src/post/cache.rs @@ -11,31 +11,37 @@ use tokio::io::AsyncReadExt; use tracing::{debug, info, instrument}; /// do not persist cache if this version number changed -pub const CACHE_VERSION: u16 = 3; +pub const CACHE_VERSION: u16 = 5; -#[derive(Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct CacheValue { pub meta: PostMetadata, pub body: Arc, pub mtime: u64, - pub extra: u64, } #[derive(Serialize, Deserialize, Clone)] -pub struct FileCache(HashMap, CacheValue>, u16); +pub struct Cache(HashMap, u16); -impl Default for FileCache { +impl Default for Cache { fn default() -> Self { Self(Default::default(), CACHE_VERSION) } } -impl FileCache { - pub async fn lookup(&self, name: &str, mtime: u64, extra: u64) -> Option { - match self.0.get_async(name).await { +#[derive(Serialize, Deserialize, Hash, Eq, PartialEq, Clone, Debug)] +#[repr(C)] +pub struct CacheKey { + pub name: Arc, + pub extra: u64, +} + +impl Cache { + pub async fn lookup(&self, name: Arc, mtime: u64, extra: u64) -> Option { + match self.0.get_async(&CacheKey { name, extra }).await { Some(entry) => { let cached = entry.get(); - if extra == cached.extra && mtime <= cached.mtime { + if mtime <= cached.mtime { Some(cached.clone()) } else { let _ = entry.remove(); @@ -46,8 +52,13 @@ impl FileCache { } } - pub async fn lookup_metadata(&self, name: &str, mtime: u64) -> Option { - match self.0.get_async(name).await { + pub async fn lookup_metadata( + &self, + name: Arc, + mtime: u64, + extra: u64, + ) -> Option { + match self.0.get_async(&CacheKey { name, extra }).await { Some(entry) => { let cached = entry.get(); if mtime <= cached.mtime { @@ -68,22 +79,23 @@ impl FileCache { mtime: u64, rendered: Arc, extra: u64, - ) -> Result<(), (Arc, (PostMetadata, Arc))> { + ) -> Result<(), (CacheKey, (PostMetadata, Arc))> { + let key = CacheKey { name, extra }; + let value = CacheValue { meta: metadata, body: rendered, mtime, - extra, }; if self .0 - .update_async(&name, |_, _| value.clone()) + .update_async(&key, |_, _| value.clone()) .await .is_none() { self.0 - .insert_async(name, value) + .insert_async(key, value) .await .map_err(|x| (x.0, (x.1.meta, x.1.body))) } else { @@ -91,32 +103,38 @@ impl FileCache { } } - pub async fn remove(&self, name: &str) -> Option<(Arc, CacheValue)> { - self.0.remove_async(name).await + #[allow(unused)] + pub async fn remove(&self, name: Arc, extra: u64) -> Option<(CacheKey, CacheValue)> { + self.0.remove_async(&CacheKey { name, extra }).await } #[instrument(name = "cleanup", skip_all)] - pub async fn cleanup(&self, get_mtime: impl Fn(&str) -> Option) { + pub async fn retain(&self, predicate: impl Fn(&CacheKey, &CacheValue) -> bool) { let old_size = self.0.len(); let mut i = 0; // TODO: multithread + // not urgent as this is run concurrently anyways self.0 .retain_async(|k, v| { - if get_mtime(k).is_some_and(|mtime| mtime == v.mtime) { + if predicate(k, v) { true } else { - debug!("removing {k} from cache"); + debug!("removing {k:?} from cache"); i += 1; false } }) .await; - let new_size = self.0.len(); + let new_size = self.len(); debug!("removed {i} entries ({old_size} -> {new_size} entries)"); } + pub fn len(&self) -> usize { + self.0.len() + } + #[inline(always)] pub fn version(&self) -> u16 { self.1 @@ -124,12 +142,12 @@ impl FileCache { } pub struct CacheGuard { - inner: FileCache, + inner: Cache, config: CacheConfig, } impl CacheGuard { - pub fn new(cache: FileCache, config: CacheConfig) -> Self { + pub fn new(cache: Cache, config: CacheConfig) -> Self { Self { inner: cache, config, @@ -159,15 +177,15 @@ impl CacheGuard { } impl Deref for CacheGuard { - type Target = FileCache; + type Target = Cache; fn deref(&self) -> &Self::Target { &self.inner } } -impl AsRef for CacheGuard { - fn as_ref(&self) -> &FileCache { +impl AsRef for CacheGuard { + fn as_ref(&self) -> &Cache { &self.inner } } @@ -178,7 +196,7 @@ impl Drop for CacheGuard { } } -pub(crate) async fn load_cache(config: &CacheConfig) -> Result { +pub(crate) async fn load_cache(config: &CacheConfig) -> Result { let path = &config.file; let mut cache_file = tokio::fs::File::open(&path) .await diff --git a/src/post/markdown_posts.rs b/src/post/markdown_posts.rs index adb44f9..63d21c2 100644 --- a/src/post/markdown_posts.rs +++ b/src/post/markdown_posts.rs @@ -24,7 +24,7 @@ use crate::config::Config; use crate::markdown_render::{build_syntect, render}; use crate::systemtime_as_secs::as_secs; -use super::cache::{CacheGuard, CacheValue}; +use super::cache::{CacheGuard, CacheKey, CacheValue}; use super::{ ApplyFilters, Filter, PostError, PostManager, PostMetadata, RenderStats, ReturnedPost, }; @@ -190,7 +190,9 @@ impl PostManager for MarkdownPosts { String::from(path.file_stem().unwrap().to_string_lossy()).into(); if let Some(cache) = &self.cache - && let Some(hit) = cache.lookup_metadata(&name, mtime).await + && let Some(hit) = cache + .lookup_metadata(name.clone(), mtime, self.render_hash) + .await && hit.apply_filters(filters) { posts.push(hit); @@ -228,15 +230,12 @@ impl PostManager for MarkdownPosts { let mut file = match tokio::fs::OpenOptions::new().read(true).open(&path).await { Ok(value) => value, - Err(err) => match err.kind() { - io::ErrorKind::NotFound => { - if let Some(cache) = &self.cache { - cache.remove(&name).await; - } - return Err(PostError::NotFound(name)); + Err(err) => { + return match err.kind() { + io::ErrorKind::NotFound => Err(PostError::NotFound(name)), + _ => Err(PostError::IoError(err)), } - _ => return Err(PostError::IoError(err)), - }, + } }; let mut buffer = Vec::with_capacity(4096); @@ -257,21 +256,18 @@ impl PostManager for MarkdownPosts { let stat = match tokio::fs::metadata(&path).await { Ok(value) => value, - Err(err) => match err.kind() { - io::ErrorKind::NotFound => { - if let Some(cache) = &self.cache { - cache.remove(&name).await; - } - return Err(PostError::NotFound(name)); + Err(err) => { + return match err.kind() { + io::ErrorKind::NotFound => Err(PostError::NotFound(name)), + _ => Err(PostError::IoError(err)), } - _ => return Err(PostError::IoError(err)), - }, + } }; let mtime = as_secs(&stat.modified()?); if let Some(cache) = &self.cache && let Some(CacheValue { meta, body, .. }) = - cache.lookup(&name, mtime, self.render_hash).await + cache.lookup(name.clone(), mtime, self.render_hash).await { ReturnedPost::Rendered { meta, @@ -302,8 +298,13 @@ impl PostManager for MarkdownPosts { async fn cleanup(&self) { if let Some(cache) = &self.cache { cache - .cleanup(|name| { - std::fs::metadata( + .retain(|CacheKey { name, extra }, value| { + // nuke entries with different render options + if self.render_hash != *extra { + return false; + } + + let mtime = std::fs::metadata( self.config .dirs .posts @@ -311,7 +312,12 @@ impl PostManager for MarkdownPosts { ) .ok() .and_then(|metadata| metadata.modified().ok()) - .map(|mtime| as_secs(&mtime)) + .map(|mtime| as_secs(&mtime)); + + match mtime { + Some(mtime) => mtime <= value.mtime, + None => false, + } }) .await }