change how the cache works

This commit is contained in:
slonkazoid 2024-12-24 16:32:30 +03:00
parent 72c53b0923
commit aec4756c6f
Signed by: slonk
SSH key fingerprint: SHA256:tbZfJX4IOvZ0LGWOWu5Ijo8jfMPi78TU7x1VoEeCIjM
3 changed files with 99 additions and 59 deletions

View file

@ -82,7 +82,6 @@ impl Blag {
name: Arc<str>,
path: impl AsRef<Path>,
query_json: String,
dont_cache: bool,
) -> Result<(PostMetadata, String, (Duration, Duration), bool), PostError> {
let start = Instant::now();
@ -91,7 +90,6 @@ impl Blag {
let mut cmd = tokio::process::Command::new(&*self.blag_bin)
.arg(path.as_ref())
.env("BLAG_QUERY", query_json)
.env("DONT_CACHE", if dont_cache { "1" } else { "" })
.stdout(Stdio::piped())
.spawn()
.map_err(|err| {
@ -234,11 +232,10 @@ impl PostManager for Blag {
let mut hasher = DefaultHasher::new();
query_json.hash(&mut hasher);
let query_hash = hasher.finish();
let suggest_dont_cache = !query.is_empty();
let post = if let Some(cache) = &self.cache {
if let Some(CacheValue { meta, body, .. }) =
cache.lookup(&name, mtime, query_hash).await
cache.lookup(name.clone(), mtime, query_hash).await
{
ReturnedPost::Rendered {
meta,
@ -246,9 +243,8 @@ impl PostManager for Blag {
perf: RenderStats::Cached(start.elapsed()),
}
} else {
let (meta, content, (parsed, rendered), dont_cache) = self
.render(name.clone(), path, query_json, suggest_dont_cache)
.await?;
let (meta, content, (parsed, rendered), dont_cache) =
self.render(name.clone(), path, query_json).await?;
let body = content.into();
if !dont_cache {
@ -270,9 +266,8 @@ impl PostManager for Blag {
}
}
} else {
let (meta, content, (parsed, rendered), ..) = self
.render(name, path, query_json, suggest_dont_cache)
.await?;
let (meta, content, (parsed, rendered), ..) =
self.render(name, path, query_json).await?;
let total = start.elapsed();
ReturnedPost::Rendered {
@ -293,6 +288,27 @@ impl PostManager for Blag {
Ok(post)
}
async fn cleanup(&self) {
if let Some(cache) = &self.cache {
cache
.retain(|key, value| {
let mtime = std::fs::metadata(
self.root
.join(self.as_raw(&key.name).unwrap_or_else(|| unreachable!())),
)
.ok()
.and_then(|metadata| metadata.modified().ok())
.map(|mtime| as_secs(&mtime));
match mtime {
Some(mtime) => mtime <= value.mtime,
None => false,
}
})
.await
}
}
fn is_raw(&self, name: &str) -> bool {
name.ends_with(".sh")
}

View file

@ -11,31 +11,37 @@ use tokio::io::AsyncReadExt;
use tracing::{debug, info, instrument};
/// do not persist cache if this version number changed
pub const CACHE_VERSION: u16 = 3;
pub const CACHE_VERSION: u16 = 5;
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct CacheValue {
pub meta: PostMetadata,
pub body: Arc<str>,
pub mtime: u64,
pub extra: u64,
}
#[derive(Serialize, Deserialize, Clone)]
pub struct FileCache(HashMap<Arc<str>, CacheValue>, u16);
pub struct Cache(HashMap<CacheKey, CacheValue>, u16);
impl Default for FileCache {
impl Default for Cache {
fn default() -> Self {
Self(Default::default(), CACHE_VERSION)
}
}
impl FileCache {
pub async fn lookup(&self, name: &str, mtime: u64, extra: u64) -> Option<CacheValue> {
match self.0.get_async(name).await {
#[derive(Serialize, Deserialize, Hash, Eq, PartialEq, Clone, Debug)]
#[repr(C)]
pub struct CacheKey {
pub name: Arc<str>,
pub extra: u64,
}
impl Cache {
pub async fn lookup(&self, name: Arc<str>, mtime: u64, extra: u64) -> Option<CacheValue> {
match self.0.get_async(&CacheKey { name, extra }).await {
Some(entry) => {
let cached = entry.get();
if extra == cached.extra && mtime <= cached.mtime {
if mtime <= cached.mtime {
Some(cached.clone())
} else {
let _ = entry.remove();
@ -46,8 +52,13 @@ impl FileCache {
}
}
pub async fn lookup_metadata(&self, name: &str, mtime: u64) -> Option<PostMetadata> {
match self.0.get_async(name).await {
pub async fn lookup_metadata(
&self,
name: Arc<str>,
mtime: u64,
extra: u64,
) -> Option<PostMetadata> {
match self.0.get_async(&CacheKey { name, extra }).await {
Some(entry) => {
let cached = entry.get();
if mtime <= cached.mtime {
@ -68,22 +79,23 @@ impl FileCache {
mtime: u64,
rendered: Arc<str>,
extra: u64,
) -> Result<(), (Arc<str>, (PostMetadata, Arc<str>))> {
) -> Result<(), (CacheKey, (PostMetadata, Arc<str>))> {
let key = CacheKey { name, extra };
let value = CacheValue {
meta: metadata,
body: rendered,
mtime,
extra,
};
if self
.0
.update_async(&name, |_, _| value.clone())
.update_async(&key, |_, _| value.clone())
.await
.is_none()
{
self.0
.insert_async(name, value)
.insert_async(key, value)
.await
.map_err(|x| (x.0, (x.1.meta, x.1.body)))
} else {
@ -91,32 +103,38 @@ impl FileCache {
}
}
pub async fn remove(&self, name: &str) -> Option<(Arc<str>, CacheValue)> {
self.0.remove_async(name).await
#[allow(unused)]
pub async fn remove(&self, name: Arc<str>, extra: u64) -> Option<(CacheKey, CacheValue)> {
self.0.remove_async(&CacheKey { name, extra }).await
}
#[instrument(name = "cleanup", skip_all)]
pub async fn cleanup(&self, get_mtime: impl Fn(&str) -> Option<u64>) {
pub async fn retain(&self, predicate: impl Fn(&CacheKey, &CacheValue) -> bool) {
let old_size = self.0.len();
let mut i = 0;
// TODO: multithread
// not urgent as this is run concurrently anyways
self.0
.retain_async(|k, v| {
if get_mtime(k).is_some_and(|mtime| mtime == v.mtime) {
if predicate(k, v) {
true
} else {
debug!("removing {k} from cache");
debug!("removing {k:?} from cache");
i += 1;
false
}
})
.await;
let new_size = self.0.len();
let new_size = self.len();
debug!("removed {i} entries ({old_size} -> {new_size} entries)");
}
pub fn len(&self) -> usize {
self.0.len()
}
#[inline(always)]
pub fn version(&self) -> u16 {
self.1
@ -124,12 +142,12 @@ impl FileCache {
}
pub struct CacheGuard {
inner: FileCache,
inner: Cache,
config: CacheConfig,
}
impl CacheGuard {
pub fn new(cache: FileCache, config: CacheConfig) -> Self {
pub fn new(cache: Cache, config: CacheConfig) -> Self {
Self {
inner: cache,
config,
@ -159,15 +177,15 @@ impl CacheGuard {
}
impl Deref for CacheGuard {
type Target = FileCache;
type Target = Cache;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl AsRef<FileCache> for CacheGuard {
fn as_ref(&self) -> &FileCache {
impl AsRef<Cache> for CacheGuard {
fn as_ref(&self) -> &Cache {
&self.inner
}
}
@ -178,7 +196,7 @@ impl Drop for CacheGuard {
}
}
pub(crate) async fn load_cache(config: &CacheConfig) -> Result<FileCache, eyre::Report> {
pub(crate) async fn load_cache(config: &CacheConfig) -> Result<Cache, eyre::Report> {
let path = &config.file;
let mut cache_file = tokio::fs::File::open(&path)
.await

View file

@ -24,7 +24,7 @@ use crate::config::Config;
use crate::markdown_render::{build_syntect, render};
use crate::systemtime_as_secs::as_secs;
use super::cache::{CacheGuard, CacheValue};
use super::cache::{CacheGuard, CacheKey, CacheValue};
use super::{
ApplyFilters, Filter, PostError, PostManager, PostMetadata, RenderStats, ReturnedPost,
};
@ -190,7 +190,9 @@ impl PostManager for MarkdownPosts {
String::from(path.file_stem().unwrap().to_string_lossy()).into();
if let Some(cache) = &self.cache
&& let Some(hit) = cache.lookup_metadata(&name, mtime).await
&& let Some(hit) = cache
.lookup_metadata(name.clone(), mtime, self.render_hash)
.await
&& hit.apply_filters(filters)
{
posts.push(hit);
@ -228,15 +230,12 @@ impl PostManager for MarkdownPosts {
let mut file = match tokio::fs::OpenOptions::new().read(true).open(&path).await {
Ok(value) => value,
Err(err) => match err.kind() {
io::ErrorKind::NotFound => {
if let Some(cache) = &self.cache {
cache.remove(&name).await;
Err(err) => {
return match err.kind() {
io::ErrorKind::NotFound => Err(PostError::NotFound(name)),
_ => Err(PostError::IoError(err)),
}
return Err(PostError::NotFound(name));
}
_ => return Err(PostError::IoError(err)),
},
};
let mut buffer = Vec::with_capacity(4096);
@ -257,21 +256,18 @@ impl PostManager for MarkdownPosts {
let stat = match tokio::fs::metadata(&path).await {
Ok(value) => value,
Err(err) => match err.kind() {
io::ErrorKind::NotFound => {
if let Some(cache) = &self.cache {
cache.remove(&name).await;
Err(err) => {
return match err.kind() {
io::ErrorKind::NotFound => Err(PostError::NotFound(name)),
_ => Err(PostError::IoError(err)),
}
return Err(PostError::NotFound(name));
}
_ => return Err(PostError::IoError(err)),
},
};
let mtime = as_secs(&stat.modified()?);
if let Some(cache) = &self.cache
&& let Some(CacheValue { meta, body, .. }) =
cache.lookup(&name, mtime, self.render_hash).await
cache.lookup(name.clone(), mtime, self.render_hash).await
{
ReturnedPost::Rendered {
meta,
@ -302,8 +298,13 @@ impl PostManager for MarkdownPosts {
async fn cleanup(&self) {
if let Some(cache) = &self.cache {
cache
.cleanup(|name| {
std::fs::metadata(
.retain(|CacheKey { name, extra }, value| {
// nuke entries with different render options
if self.render_hash != *extra {
return false;
}
let mtime = std::fs::metadata(
self.config
.dirs
.posts
@ -311,7 +312,12 @@ impl PostManager for MarkdownPosts {
)
.ok()
.and_then(|metadata| metadata.modified().ok())
.map(|mtime| as_secs(&mtime))
.map(|mtime| as_secs(&mtime));
match mtime {
Some(mtime) => mtime <= value.mtime,
None => false,
}
})
.await
}