diff --git a/README.md b/README.md index c103b67..ef4c07e 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ blazingly fast markdown blog software written in rust memory safe - [ ] general cleanup of code - [ ] better error reporting and error pages - [ ] better tracing -- [ ] cache cleanup task +- [x] cache cleanup task - [ ] ^ replace HashMap with HashCache once i implement [this](https://github.com/wvwwvwwv/scalable-concurrent-containers/issues/139) - [x] (de)compress cache with zstd on startup/shutdown - [ ] make date parsing less strict @@ -45,7 +45,10 @@ markdown_access = true # allow users to see the raw markdown of a post [cache] # cache settings enable = true # save metadata and rendered posts into RAM # highly recommended, only turn off if absolutely necessary -persistence = false # save the cache to on shutdown and load on startup +cleanup = true # clean cache, highly recommended +#cleanup_interval = 86400000 # clean the cache regularly instead of just at startu + # uncomment to enable +persistence = true # save the cache to on shutdown and load on startup file = "cache" # file to save the cache to compress = true # compress the cache file compression_level = 3 # zstd compression level, 3 is recommended diff --git a/src/config.rs b/src/config.rs index 677f341..3fc3b34 100644 --- a/src/config.rs +++ b/src/config.rs @@ -29,6 +29,8 @@ pub struct RenderConfig { #[serde(default)] pub struct CacheConfig { pub enable: bool, + pub cleanup: bool, + pub cleanup_interval: Option, pub persistence: bool, pub file: PathBuf, pub compress: bool, @@ -78,7 +80,9 @@ impl Default for CacheConfig { fn default() -> Self { Self { enable: true, - persistence: false, + cleanup: true, + cleanup_interval: None, + persistence: true, file: "cache".into(), compress: true, compression_level: 3, diff --git a/src/main.rs b/src/main.rs index 1f190f7..6ff6a19 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -#![feature(let_chains)] +#![feature(let_chains, if_let_guard)] mod config; mod error; @@ -7,6 +7,7 @@ mod hash_arc_store; mod markdown_render; mod post; mod ranged_i128_visitor; +mod systemtime_as_secs; use std::future::IntoFuture; use std::io::Read; @@ -25,13 +26,13 @@ use color_eyre::eyre::{self, Context}; use thiserror::Error; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::TcpListener; -use tokio::signal; use tokio::task::JoinSet; +use tokio::{select, signal}; use tokio_util::sync::CancellationToken; use tower_http::services::ServeDir; use tower_http::trace::TraceLayer; use tracing::level_filters::LevelFilter; -use tracing::{error, info, info_span, warn, Span}; +use tracing::{debug, error, info, info_span, warn, Span}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use crate::config::Config; @@ -160,7 +161,7 @@ async fn main() -> eyre::Result<()> { .context("couldn't load configuration")?; let mut tasks = JoinSet::new(); - let mut cancellation_tokens = Vec::new(); + let cancellation_token = CancellationToken::new(); let posts = if config.cache.enable { if config.cache.persistence @@ -228,6 +229,27 @@ async fn main() -> eyre::Result<()> { let state = Arc::new(AppState { config, posts }); + if state.config.cache.enable && state.config.cache.cleanup { + if let Some(t) = state.config.cache.cleanup_interval { + let state = Arc::clone(&state); + let token = cancellation_token.child_token(); + debug!("setting up cleanup task"); + tasks.spawn(async move { + let mut interval = tokio::time::interval(Duration::from_millis(t)); + loop { + select! { + _ = token.cancelled() => break, + _ = interval.tick() => { + state.posts.cleanup().await + } + } + } + }); + } else { + state.posts.cleanup().await; + } + } + let app = Router::new() .route("/", get(index)) .route( @@ -285,8 +307,7 @@ async fn main() -> eyre::Result<()> { #[cfg(not(unix))] // TODO: kill all windows server users let sigterm = std::future::pending::<()>(); - let axum_token = CancellationToken::new(); - cancellation_tokens.push(axum_token.clone()); + let axum_token = cancellation_token.child_token(); let mut server = axum::serve( listener, @@ -309,9 +330,7 @@ async fn main() -> eyre::Result<()> { let cleanup = async move { // stop tasks - for token in cancellation_tokens { - token.cancel(); - } + cancellation_token.cancel(); server.await.context("failed to serve app")?; while let Some(task) = tasks.join_next().await { task.context("failed to join task")?; @@ -320,6 +339,8 @@ async fn main() -> eyre::Result<()> { // write cache to file let AppState { config, posts } = Arc::::try_unwrap(state).unwrap_or_else(|state| { warn!("couldn't unwrap Arc over AppState, more than one strong reference exists for Arc. cloning instead"); + // TODO: only do this when persistence is enabled + // first check config from inside the arc, then try unwrap AppState::clone(state.as_ref()) }); if config.cache.enable diff --git a/src/post/cache.rs b/src/post/cache.rs index d46669e..be9f2f1 100644 --- a/src/post/cache.rs +++ b/src/post/cache.rs @@ -2,6 +2,7 @@ use std::hash::{DefaultHasher, Hash, Hasher}; use scc::HashMap; use serde::{Deserialize, Serialize}; +use tracing::instrument; use crate::config::RenderConfig; use crate::post::PostMetadata; @@ -95,4 +96,25 @@ impl Cache { pub async fn remove(&self, name: &str) -> Option<(String, CacheValue)> { self.0.remove_async(name).await } + + #[instrument(name = "cleanup", skip_all)] + pub async fn cleanup(&self, get_mtime: impl Fn(&str) -> Option) { + let old_size = self.0.len(); + let mut i = 0; + + self.0 + .retain_async(|k, v| { + if get_mtime(k).is_some_and(|mtime| mtime == v.mtime) { + true + } else { + tracing::debug!("removing {k} from cache"); + i += 1; + false + } + }) + .await; + + let new_size = self.0.len(); + tracing::debug!("removed {i} entries ({old_size} -> {new_size} entries)"); + } } diff --git a/src/post/mod.rs b/src/post/mod.rs index a08c428..c5f6dfc 100644 --- a/src/post/mod.rs +++ b/src/post/mod.rs @@ -15,6 +15,7 @@ use tracing::warn; use crate::config::RenderConfig; use crate::markdown_render::render; use crate::post::cache::Cache; +use crate::systemtime_as_secs::as_secs; use crate::PostError; #[derive(Deserialize)] @@ -134,10 +135,7 @@ impl PostManager { .insert( name.to_string(), metadata.clone(), - modified - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(), + as_secs(&modified), post.clone(), &self.config, ) @@ -157,11 +155,8 @@ impl PostManager { let stat = fs::metadata(&path).await?; if stat.is_file() && path.extension().is_some_and(|ext| ext == "md") { - let mtime = stat - .modified()? - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(); + let mtime = as_secs(&stat.modified()?); + // TODO. this? let name = path .clone() .file_stem() @@ -202,11 +197,7 @@ impl PostManager { _ => return Err(PostError::IoError(err)), }, }; - let mtime = stat - .modified()? - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(); + let mtime = as_secs(&stat.modified()?); if let Some(cache) = self.cache.as_ref() && let Some(hit) = cache.lookup(name, mtime, &self.config).await @@ -229,4 +220,17 @@ impl PostManager { pub fn into_cache(self) -> Option { self.cache } + + pub async fn cleanup(&self) { + if let Some(cache) = self.cache.as_ref() { + cache + .cleanup(|name| { + std::fs::metadata(self.dir.join(name.to_owned() + ".md")) + .ok() + .and_then(|metadata| metadata.modified().ok()) + .map(|mtime| as_secs(&mtime)) + }) + .await + } + } }