change how the cache works

This commit is contained in:
slonkazoid 2024-12-24 16:32:30 +03:00
parent 72c53b0923
commit aec4756c6f
Signed by: slonk
SSH key fingerprint: SHA256:tbZfJX4IOvZ0LGWOWu5Ijo8jfMPi78TU7x1VoEeCIjM
3 changed files with 99 additions and 59 deletions

View file

@ -82,7 +82,6 @@ impl Blag {
name: Arc<str>, name: Arc<str>,
path: impl AsRef<Path>, path: impl AsRef<Path>,
query_json: String, query_json: String,
dont_cache: bool,
) -> Result<(PostMetadata, String, (Duration, Duration), bool), PostError> { ) -> Result<(PostMetadata, String, (Duration, Duration), bool), PostError> {
let start = Instant::now(); let start = Instant::now();
@ -91,7 +90,6 @@ impl Blag {
let mut cmd = tokio::process::Command::new(&*self.blag_bin) let mut cmd = tokio::process::Command::new(&*self.blag_bin)
.arg(path.as_ref()) .arg(path.as_ref())
.env("BLAG_QUERY", query_json) .env("BLAG_QUERY", query_json)
.env("DONT_CACHE", if dont_cache { "1" } else { "" })
.stdout(Stdio::piped()) .stdout(Stdio::piped())
.spawn() .spawn()
.map_err(|err| { .map_err(|err| {
@ -234,11 +232,10 @@ impl PostManager for Blag {
let mut hasher = DefaultHasher::new(); let mut hasher = DefaultHasher::new();
query_json.hash(&mut hasher); query_json.hash(&mut hasher);
let query_hash = hasher.finish(); let query_hash = hasher.finish();
let suggest_dont_cache = !query.is_empty();
let post = if let Some(cache) = &self.cache { let post = if let Some(cache) = &self.cache {
if let Some(CacheValue { meta, body, .. }) = if let Some(CacheValue { meta, body, .. }) =
cache.lookup(&name, mtime, query_hash).await cache.lookup(name.clone(), mtime, query_hash).await
{ {
ReturnedPost::Rendered { ReturnedPost::Rendered {
meta, meta,
@ -246,9 +243,8 @@ impl PostManager for Blag {
perf: RenderStats::Cached(start.elapsed()), perf: RenderStats::Cached(start.elapsed()),
} }
} else { } else {
let (meta, content, (parsed, rendered), dont_cache) = self let (meta, content, (parsed, rendered), dont_cache) =
.render(name.clone(), path, query_json, suggest_dont_cache) self.render(name.clone(), path, query_json).await?;
.await?;
let body = content.into(); let body = content.into();
if !dont_cache { if !dont_cache {
@ -270,9 +266,8 @@ impl PostManager for Blag {
} }
} }
} else { } else {
let (meta, content, (parsed, rendered), ..) = self let (meta, content, (parsed, rendered), ..) =
.render(name, path, query_json, suggest_dont_cache) self.render(name, path, query_json).await?;
.await?;
let total = start.elapsed(); let total = start.elapsed();
ReturnedPost::Rendered { ReturnedPost::Rendered {
@ -293,6 +288,27 @@ impl PostManager for Blag {
Ok(post) Ok(post)
} }
async fn cleanup(&self) {
if let Some(cache) = &self.cache {
cache
.retain(|key, value| {
let mtime = std::fs::metadata(
self.root
.join(self.as_raw(&key.name).unwrap_or_else(|| unreachable!())),
)
.ok()
.and_then(|metadata| metadata.modified().ok())
.map(|mtime| as_secs(&mtime));
match mtime {
Some(mtime) => mtime <= value.mtime,
None => false,
}
})
.await
}
}
fn is_raw(&self, name: &str) -> bool { fn is_raw(&self, name: &str) -> bool {
name.ends_with(".sh") name.ends_with(".sh")
} }

View file

@ -11,31 +11,37 @@ use tokio::io::AsyncReadExt;
use tracing::{debug, info, instrument}; use tracing::{debug, info, instrument};
/// do not persist cache if this version number changed /// do not persist cache if this version number changed
pub const CACHE_VERSION: u16 = 3; pub const CACHE_VERSION: u16 = 5;
#[derive(Serialize, Deserialize, Clone)] #[derive(Serialize, Deserialize, Clone, Debug)]
pub struct CacheValue { pub struct CacheValue {
pub meta: PostMetadata, pub meta: PostMetadata,
pub body: Arc<str>, pub body: Arc<str>,
pub mtime: u64, pub mtime: u64,
pub extra: u64,
} }
#[derive(Serialize, Deserialize, Clone)] #[derive(Serialize, Deserialize, Clone)]
pub struct FileCache(HashMap<Arc<str>, CacheValue>, u16); pub struct Cache(HashMap<CacheKey, CacheValue>, u16);
impl Default for FileCache { impl Default for Cache {
fn default() -> Self { fn default() -> Self {
Self(Default::default(), CACHE_VERSION) Self(Default::default(), CACHE_VERSION)
} }
} }
impl FileCache { #[derive(Serialize, Deserialize, Hash, Eq, PartialEq, Clone, Debug)]
pub async fn lookup(&self, name: &str, mtime: u64, extra: u64) -> Option<CacheValue> { #[repr(C)]
match self.0.get_async(name).await { pub struct CacheKey {
pub name: Arc<str>,
pub extra: u64,
}
impl Cache {
pub async fn lookup(&self, name: Arc<str>, mtime: u64, extra: u64) -> Option<CacheValue> {
match self.0.get_async(&CacheKey { name, extra }).await {
Some(entry) => { Some(entry) => {
let cached = entry.get(); let cached = entry.get();
if extra == cached.extra && mtime <= cached.mtime { if mtime <= cached.mtime {
Some(cached.clone()) Some(cached.clone())
} else { } else {
let _ = entry.remove(); let _ = entry.remove();
@ -46,8 +52,13 @@ impl FileCache {
} }
} }
pub async fn lookup_metadata(&self, name: &str, mtime: u64) -> Option<PostMetadata> { pub async fn lookup_metadata(
match self.0.get_async(name).await { &self,
name: Arc<str>,
mtime: u64,
extra: u64,
) -> Option<PostMetadata> {
match self.0.get_async(&CacheKey { name, extra }).await {
Some(entry) => { Some(entry) => {
let cached = entry.get(); let cached = entry.get();
if mtime <= cached.mtime { if mtime <= cached.mtime {
@ -68,22 +79,23 @@ impl FileCache {
mtime: u64, mtime: u64,
rendered: Arc<str>, rendered: Arc<str>,
extra: u64, extra: u64,
) -> Result<(), (Arc<str>, (PostMetadata, Arc<str>))> { ) -> Result<(), (CacheKey, (PostMetadata, Arc<str>))> {
let key = CacheKey { name, extra };
let value = CacheValue { let value = CacheValue {
meta: metadata, meta: metadata,
body: rendered, body: rendered,
mtime, mtime,
extra,
}; };
if self if self
.0 .0
.update_async(&name, |_, _| value.clone()) .update_async(&key, |_, _| value.clone())
.await .await
.is_none() .is_none()
{ {
self.0 self.0
.insert_async(name, value) .insert_async(key, value)
.await .await
.map_err(|x| (x.0, (x.1.meta, x.1.body))) .map_err(|x| (x.0, (x.1.meta, x.1.body)))
} else { } else {
@ -91,32 +103,38 @@ impl FileCache {
} }
} }
pub async fn remove(&self, name: &str) -> Option<(Arc<str>, CacheValue)> { #[allow(unused)]
self.0.remove_async(name).await pub async fn remove(&self, name: Arc<str>, extra: u64) -> Option<(CacheKey, CacheValue)> {
self.0.remove_async(&CacheKey { name, extra }).await
} }
#[instrument(name = "cleanup", skip_all)] #[instrument(name = "cleanup", skip_all)]
pub async fn cleanup(&self, get_mtime: impl Fn(&str) -> Option<u64>) { pub async fn retain(&self, predicate: impl Fn(&CacheKey, &CacheValue) -> bool) {
let old_size = self.0.len(); let old_size = self.0.len();
let mut i = 0; let mut i = 0;
// TODO: multithread // TODO: multithread
// not urgent as this is run concurrently anyways
self.0 self.0
.retain_async(|k, v| { .retain_async(|k, v| {
if get_mtime(k).is_some_and(|mtime| mtime == v.mtime) { if predicate(k, v) {
true true
} else { } else {
debug!("removing {k} from cache"); debug!("removing {k:?} from cache");
i += 1; i += 1;
false false
} }
}) })
.await; .await;
let new_size = self.0.len(); let new_size = self.len();
debug!("removed {i} entries ({old_size} -> {new_size} entries)"); debug!("removed {i} entries ({old_size} -> {new_size} entries)");
} }
pub fn len(&self) -> usize {
self.0.len()
}
#[inline(always)] #[inline(always)]
pub fn version(&self) -> u16 { pub fn version(&self) -> u16 {
self.1 self.1
@ -124,12 +142,12 @@ impl FileCache {
} }
pub struct CacheGuard { pub struct CacheGuard {
inner: FileCache, inner: Cache,
config: CacheConfig, config: CacheConfig,
} }
impl CacheGuard { impl CacheGuard {
pub fn new(cache: FileCache, config: CacheConfig) -> Self { pub fn new(cache: Cache, config: CacheConfig) -> Self {
Self { Self {
inner: cache, inner: cache,
config, config,
@ -159,15 +177,15 @@ impl CacheGuard {
} }
impl Deref for CacheGuard { impl Deref for CacheGuard {
type Target = FileCache; type Target = Cache;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.inner &self.inner
} }
} }
impl AsRef<FileCache> for CacheGuard { impl AsRef<Cache> for CacheGuard {
fn as_ref(&self) -> &FileCache { fn as_ref(&self) -> &Cache {
&self.inner &self.inner
} }
} }
@ -178,7 +196,7 @@ impl Drop for CacheGuard {
} }
} }
pub(crate) async fn load_cache(config: &CacheConfig) -> Result<FileCache, eyre::Report> { pub(crate) async fn load_cache(config: &CacheConfig) -> Result<Cache, eyre::Report> {
let path = &config.file; let path = &config.file;
let mut cache_file = tokio::fs::File::open(&path) let mut cache_file = tokio::fs::File::open(&path)
.await .await

View file

@ -24,7 +24,7 @@ use crate::config::Config;
use crate::markdown_render::{build_syntect, render}; use crate::markdown_render::{build_syntect, render};
use crate::systemtime_as_secs::as_secs; use crate::systemtime_as_secs::as_secs;
use super::cache::{CacheGuard, CacheValue}; use super::cache::{CacheGuard, CacheKey, CacheValue};
use super::{ use super::{
ApplyFilters, Filter, PostError, PostManager, PostMetadata, RenderStats, ReturnedPost, ApplyFilters, Filter, PostError, PostManager, PostMetadata, RenderStats, ReturnedPost,
}; };
@ -190,7 +190,9 @@ impl PostManager for MarkdownPosts {
String::from(path.file_stem().unwrap().to_string_lossy()).into(); String::from(path.file_stem().unwrap().to_string_lossy()).into();
if let Some(cache) = &self.cache if let Some(cache) = &self.cache
&& let Some(hit) = cache.lookup_metadata(&name, mtime).await && let Some(hit) = cache
.lookup_metadata(name.clone(), mtime, self.render_hash)
.await
&& hit.apply_filters(filters) && hit.apply_filters(filters)
{ {
posts.push(hit); posts.push(hit);
@ -228,15 +230,12 @@ impl PostManager for MarkdownPosts {
let mut file = match tokio::fs::OpenOptions::new().read(true).open(&path).await { let mut file = match tokio::fs::OpenOptions::new().read(true).open(&path).await {
Ok(value) => value, Ok(value) => value,
Err(err) => match err.kind() { Err(err) => {
io::ErrorKind::NotFound => { return match err.kind() {
if let Some(cache) = &self.cache { io::ErrorKind::NotFound => Err(PostError::NotFound(name)),
cache.remove(&name).await; _ => Err(PostError::IoError(err)),
}
return Err(PostError::NotFound(name));
} }
_ => return Err(PostError::IoError(err)), }
},
}; };
let mut buffer = Vec::with_capacity(4096); let mut buffer = Vec::with_capacity(4096);
@ -257,21 +256,18 @@ impl PostManager for MarkdownPosts {
let stat = match tokio::fs::metadata(&path).await { let stat = match tokio::fs::metadata(&path).await {
Ok(value) => value, Ok(value) => value,
Err(err) => match err.kind() { Err(err) => {
io::ErrorKind::NotFound => { return match err.kind() {
if let Some(cache) = &self.cache { io::ErrorKind::NotFound => Err(PostError::NotFound(name)),
cache.remove(&name).await; _ => Err(PostError::IoError(err)),
}
return Err(PostError::NotFound(name));
} }
_ => return Err(PostError::IoError(err)), }
},
}; };
let mtime = as_secs(&stat.modified()?); let mtime = as_secs(&stat.modified()?);
if let Some(cache) = &self.cache if let Some(cache) = &self.cache
&& let Some(CacheValue { meta, body, .. }) = && let Some(CacheValue { meta, body, .. }) =
cache.lookup(&name, mtime, self.render_hash).await cache.lookup(name.clone(), mtime, self.render_hash).await
{ {
ReturnedPost::Rendered { ReturnedPost::Rendered {
meta, meta,
@ -302,8 +298,13 @@ impl PostManager for MarkdownPosts {
async fn cleanup(&self) { async fn cleanup(&self) {
if let Some(cache) = &self.cache { if let Some(cache) = &self.cache {
cache cache
.cleanup(|name| { .retain(|CacheKey { name, extra }, value| {
std::fs::metadata( // nuke entries with different render options
if self.render_hash != *extra {
return false;
}
let mtime = std::fs::metadata(
self.config self.config
.dirs .dirs
.posts .posts
@ -311,7 +312,12 @@ impl PostManager for MarkdownPosts {
) )
.ok() .ok()
.and_then(|metadata| metadata.modified().ok()) .and_then(|metadata| metadata.modified().ok())
.map(|mtime| as_secs(&mtime)) .map(|mtime| as_secs(&mtime));
match mtime {
Some(mtime) => mtime <= value.mtime,
None => false,
}
}) })
.await .await
} }