cache cleanup for all! we can enable persistence now

This commit is contained in:
slonkazoid 2024-04-30 11:41:35 +03:00
parent b9f6d98d49
commit 6a92c1713d
Signed by: slonk
SSH key fingerprint: SHA256:tbZfJX4IOvZ0LGWOWu5Ijo8jfMPi78TU7x1VoEeCIjM
5 changed files with 80 additions and 26 deletions

View file

@ -18,7 +18,7 @@ blazingly fast markdown blog software written in rust memory safe
- [ ] general cleanup of code
- [ ] better error reporting and error pages
- [ ] better tracing
- [ ] cache cleanup task
- [x] cache cleanup task
- [ ] ^ replace HashMap with HashCache once i implement [this](https://github.com/wvwwvwwv/scalable-concurrent-containers/issues/139)
- [x] (de)compress cache with zstd on startup/shutdown
- [ ] make date parsing less strict
@ -45,7 +45,10 @@ markdown_access = true # allow users to see the raw markdown of a post
[cache] # cache settings
enable = true # save metadata and rendered posts into RAM
# highly recommended, only turn off if absolutely necessary
persistence = false # save the cache to on shutdown and load on startup
cleanup = true # clean cache, highly recommended
#cleanup_interval = 86400000 # clean the cache regularly instead of just at startu
# uncomment to enable
persistence = true # save the cache to on shutdown and load on startup
file = "cache" # file to save the cache to
compress = true # compress the cache file
compression_level = 3 # zstd compression level, 3 is recommended

View file

@ -29,6 +29,8 @@ pub struct RenderConfig {
#[serde(default)]
pub struct CacheConfig {
pub enable: bool,
pub cleanup: bool,
pub cleanup_interval: Option<u64>,
pub persistence: bool,
pub file: PathBuf,
pub compress: bool,
@ -78,7 +80,9 @@ impl Default for CacheConfig {
fn default() -> Self {
Self {
enable: true,
persistence: false,
cleanup: true,
cleanup_interval: None,
persistence: true,
file: "cache".into(),
compress: true,
compression_level: 3,

View file

@ -1,4 +1,4 @@
#![feature(let_chains)]
#![feature(let_chains, if_let_guard)]
mod config;
mod error;
@ -7,6 +7,7 @@ mod hash_arc_store;
mod markdown_render;
mod post;
mod ranged_i128_visitor;
mod systemtime_as_secs;
use std::future::IntoFuture;
use std::io::Read;
@ -25,13 +26,13 @@ use color_eyre::eyre::{self, Context};
use thiserror::Error;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpListener;
use tokio::signal;
use tokio::task::JoinSet;
use tokio::{select, signal};
use tokio_util::sync::CancellationToken;
use tower_http::services::ServeDir;
use tower_http::trace::TraceLayer;
use tracing::level_filters::LevelFilter;
use tracing::{error, info, info_span, warn, Span};
use tracing::{debug, error, info, info_span, warn, Span};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
use crate::config::Config;
@ -160,7 +161,7 @@ async fn main() -> eyre::Result<()> {
.context("couldn't load configuration")?;
let mut tasks = JoinSet::new();
let mut cancellation_tokens = Vec::new();
let cancellation_token = CancellationToken::new();
let posts = if config.cache.enable {
if config.cache.persistence
@ -228,6 +229,27 @@ async fn main() -> eyre::Result<()> {
let state = Arc::new(AppState { config, posts });
if state.config.cache.enable && state.config.cache.cleanup {
if let Some(t) = state.config.cache.cleanup_interval {
let state = Arc::clone(&state);
let token = cancellation_token.child_token();
debug!("setting up cleanup task");
tasks.spawn(async move {
let mut interval = tokio::time::interval(Duration::from_millis(t));
loop {
select! {
_ = token.cancelled() => break,
_ = interval.tick() => {
state.posts.cleanup().await
}
}
}
});
} else {
state.posts.cleanup().await;
}
}
let app = Router::new()
.route("/", get(index))
.route(
@ -285,8 +307,7 @@ async fn main() -> eyre::Result<()> {
#[cfg(not(unix))] // TODO: kill all windows server users
let sigterm = std::future::pending::<()>();
let axum_token = CancellationToken::new();
cancellation_tokens.push(axum_token.clone());
let axum_token = cancellation_token.child_token();
let mut server = axum::serve(
listener,
@ -309,9 +330,7 @@ async fn main() -> eyre::Result<()> {
let cleanup = async move {
// stop tasks
for token in cancellation_tokens {
token.cancel();
}
cancellation_token.cancel();
server.await.context("failed to serve app")?;
while let Some(task) = tasks.join_next().await {
task.context("failed to join task")?;
@ -320,6 +339,8 @@ async fn main() -> eyre::Result<()> {
// write cache to file
let AppState { config, posts } = Arc::<AppState>::try_unwrap(state).unwrap_or_else(|state| {
warn!("couldn't unwrap Arc over AppState, more than one strong reference exists for Arc. cloning instead");
// TODO: only do this when persistence is enabled
// first check config from inside the arc, then try unwrap
AppState::clone(state.as_ref())
});
if config.cache.enable

View file

@ -2,6 +2,7 @@ use std::hash::{DefaultHasher, Hash, Hasher};
use scc::HashMap;
use serde::{Deserialize, Serialize};
use tracing::instrument;
use crate::config::RenderConfig;
use crate::post::PostMetadata;
@ -95,4 +96,25 @@ impl Cache {
pub async fn remove(&self, name: &str) -> Option<(String, CacheValue)> {
self.0.remove_async(name).await
}
#[instrument(name = "cleanup", skip_all)]
pub async fn cleanup(&self, get_mtime: impl Fn(&str) -> Option<u64>) {
let old_size = self.0.len();
let mut i = 0;
self.0
.retain_async(|k, v| {
if get_mtime(k).is_some_and(|mtime| mtime == v.mtime) {
true
} else {
tracing::debug!("removing {k} from cache");
i += 1;
false
}
})
.await;
let new_size = self.0.len();
tracing::debug!("removed {i} entries ({old_size} -> {new_size} entries)");
}
}

View file

@ -15,6 +15,7 @@ use tracing::warn;
use crate::config::RenderConfig;
use crate::markdown_render::render;
use crate::post::cache::Cache;
use crate::systemtime_as_secs::as_secs;
use crate::PostError;
#[derive(Deserialize)]
@ -134,10 +135,7 @@ impl PostManager {
.insert(
name.to_string(),
metadata.clone(),
modified
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs(),
as_secs(&modified),
post.clone(),
&self.config,
)
@ -157,11 +155,8 @@ impl PostManager {
let stat = fs::metadata(&path).await?;
if stat.is_file() && path.extension().is_some_and(|ext| ext == "md") {
let mtime = stat
.modified()?
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
let mtime = as_secs(&stat.modified()?);
// TODO. this?
let name = path
.clone()
.file_stem()
@ -202,11 +197,7 @@ impl PostManager {
_ => return Err(PostError::IoError(err)),
},
};
let mtime = stat
.modified()?
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
let mtime = as_secs(&stat.modified()?);
if let Some(cache) = self.cache.as_ref()
&& let Some(hit) = cache.lookup(name, mtime, &self.config).await
@ -229,4 +220,17 @@ impl PostManager {
pub fn into_cache(self) -> Option<Cache> {
self.cache
}
pub async fn cleanup(&self) {
if let Some(cache) = self.cache.as_ref() {
cache
.cleanup(|name| {
std::fs::metadata(self.dir.join(name.to_owned() + ".md"))
.ok()
.and_then(|metadata| metadata.modified().ok())
.map(|mtime| as_secs(&mtime))
})
.await
}
}
}