decrease performance

This commit is contained in:
slonkazoid 2024-12-15 23:06:58 +03:00
parent c44b1a082e
commit 6b5c0beeaa
Signed by: slonk
SSH key fingerprint: SHA256:tbZfJX4IOvZ0LGWOWu5Ijo8jfMPi78TU7x1VoEeCIjM
6 changed files with 220 additions and 172 deletions

6
Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "addr2line"
@ -119,9 +119,9 @@ dependencies = [
[[package]]
name = "async-trait"
version = "0.1.81"
version = "0.1.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd"
dependencies = [
"proc-macro2",
"quote",

View file

@ -21,7 +21,7 @@ use tracing::{info, info_span, Span};
use crate::config::{Config, StyleConfig};
use crate::error::{AppError, AppResult};
use crate::post::{MarkdownPosts, PostManager, PostMetadata, RenderStats, ReturnedPost};
use crate::post::{Filter, PostManager, PostMetadata, RenderStats, ReturnedPost};
use crate::serve_dir_included::handle;
const STATIC: Dir<'static> = include_dir!("$CARGO_MANIFEST_DIR/static");
@ -43,8 +43,8 @@ const BINGUS_INFO: BingusInfo = BingusInfo {
#[non_exhaustive]
pub struct AppState {
pub config: Arc<Config>,
pub posts: Arc<MarkdownPosts<Arc<Config>>>,
pub reg: Arc<RwLock<Handlebars<'static>>>,
pub posts: Arc<dyn PostManager + Send + Sync>,
pub templates: Arc<RwLock<Handlebars<'static>>>,
}
#[derive(Serialize)]
@ -120,14 +120,17 @@ fn join_tags_for_meta(tags: &Map<String, serde_json::Value>, delim: &str) -> Str
s
}
async fn index<'a>(
async fn index(
State(AppState {
config, posts, reg, ..
config,
posts,
templates: reg,
..
}): State<AppState>,
Query(query): Query<QueryParams>,
) -> AppResult<impl IntoResponse> {
let posts = posts
.get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_ref())
.get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_deref())
.await?;
let tags = collect_tags(&posts);
@ -157,7 +160,7 @@ async fn all_posts(
Query(query): Query<QueryParams>,
) -> AppResult<Json<Vec<PostMetadata>>> {
let posts = posts
.get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_ref())
.get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_deref())
.await?;
Ok(Json(posts))
@ -172,11 +175,13 @@ async fn rss(
}
let posts = posts
.get_all_posts(|metadata, _| {
.get_all_posts(
query
.tag
.as_ref().is_none_or(|tag| metadata.tags.contains(tag))
})
.as_ref()
.and(Some(Filter::Tags(query.tag.as_deref().as_slice())))
.as_slice(),
)
.await?;
let mut channel = ChannelBuilder::default();
@ -223,7 +228,10 @@ async fn rss(
async fn post(
State(AppState {
config, posts, reg, ..
config,
posts,
templates: reg,
..
}): State<AppState>,
Path(name): Path<String>,
) -> AppResult<impl IntoResponse> {

View file

@ -31,6 +31,7 @@ use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::{util::SubscriberInitExt, EnvFilter};
use crate::app::AppState;
use crate::post::cache::{load_cache, CacheGuard, CACHE_VERSION};
use crate::post::{MarkdownPosts, PostManager};
use crate::templates::new_registry;
use crate::templates::watcher::watch_templates;
@ -87,7 +88,31 @@ async fn main() -> eyre::Result<()> {
.instrument(info_span!("custom_template_watcher")),
);
let posts = Arc::new(MarkdownPosts::new(Arc::clone(&config)).await?);
let cache = if config.cache.enable {
if config.cache.persistence && tokio::fs::try_exists(&config.cache.file).await? {
info!("loading cache from file");
let mut cache = load_cache(&config.cache).await.unwrap_or_else(|err| {
error!("failed to load cache: {}", err);
info!("using empty cache");
Default::default()
});
if cache.version() < CACHE_VERSION {
warn!("cache version changed, clearing cache");
cache = Default::default();
};
Some(cache)
} else {
Some(Default::default())
}
} else {
None
}
.map(|cache| CacheGuard::new(cache, config.cache.clone()))
.map(Arc::new);
let posts = Arc::new(MarkdownPosts::new(Arc::clone(&config), cache.clone()).await?);
if config.cache.enable && config.cache.cleanup {
if let Some(millis) = config.cache.cleanup_interval {
@ -112,8 +137,8 @@ async fn main() -> eyre::Result<()> {
let state = AppState {
config: Arc::clone(&config),
posts: Arc::clone(&posts),
reg: Arc::clone(&reg),
posts: posts as Arc<dyn PostManager + Send + Sync>,
templates: Arc::clone(&reg),
};
let app = app::new(&config).with_state(state.clone());

View file

@ -1,13 +1,13 @@
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::Read;
use std::io::{Read, Write};
use std::ops::Deref;
use crate::config::{Config, RenderConfig};
use crate::config::CacheConfig;
use crate::post::PostMetadata;
use color_eyre::eyre::{self, Context};
use scc::HashMap;
use serde::{Deserialize, Serialize};
use tokio::io::AsyncReadExt;
use tracing::{debug, instrument};
use tracing::{debug, info, instrument};
/// do not persist cache if this version number changed
pub const CACHE_VERSION: u16 = 2;
@ -17,34 +17,24 @@ pub struct CacheValue {
pub metadata: PostMetadata,
pub rendered: String,
pub mtime: u64,
config_hash: u64,
extra: u64,
}
#[derive(Serialize, Deserialize, Clone)]
pub struct Cache(HashMap<String, CacheValue>, u16);
pub struct FileCache(HashMap<String, CacheValue>, u16);
impl Default for Cache {
impl Default for FileCache {
fn default() -> Self {
Self(Default::default(), CACHE_VERSION)
}
}
impl Cache {
pub async fn lookup(
&self,
name: &str,
mtime: u64,
config: &RenderConfig,
) -> Option<CacheValue> {
impl FileCache {
pub async fn lookup(&self, name: &str, mtime: u64, extra: u64) -> Option<CacheValue> {
match self.0.get_async(name).await {
Some(entry) => {
let cached = entry.get();
if mtime <= cached.mtime && {
let mut hasher = DefaultHasher::new();
config.hash(&mut hasher);
hasher.finish()
} == cached.config_hash
{
if extra == cached.extra && mtime <= cached.mtime {
Some(cached.clone())
} else {
let _ = entry.remove();
@ -76,17 +66,13 @@ impl Cache {
metadata: PostMetadata,
mtime: u64,
rendered: String,
config: &RenderConfig,
extra: u64,
) -> Result<(), (String, (PostMetadata, String))> {
let mut hasher = DefaultHasher::new();
config.hash(&mut hasher);
let hash = hasher.finish();
let value = CacheValue {
metadata,
rendered,
mtime,
config_hash: hash,
extra,
};
if self
@ -136,12 +122,67 @@ impl Cache {
}
}
pub(crate) async fn load_cache(config: &Config) -> Result<Cache, eyre::Report> {
let path = &config.cache.file;
pub struct CacheGuard {
inner: FileCache,
config: CacheConfig,
}
impl CacheGuard {
pub fn new(cache: FileCache, config: CacheConfig) -> Self {
Self {
inner: cache,
config,
}
}
fn try_drop(&mut self) -> Result<(), eyre::Report> {
// write cache to file
let path = &self.config.file;
let serialized = bitcode::serialize(&self.inner).context("failed to serialize cache")?;
let mut cache_file = std::fs::File::create(path)
.with_context(|| format!("failed to open cache at {}", path.display()))?;
let compression_level = self.config.compression_level;
if self.config.compress {
std::io::Write::write_all(
&mut zstd::stream::write::Encoder::new(cache_file, compression_level)?
.auto_finish(),
&serialized,
)
} else {
cache_file.write_all(&serialized)
}
.context("failed to write cache to file")?;
info!("wrote cache to {}", path.display());
Ok(())
}
}
impl Deref for CacheGuard {
type Target = FileCache;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl AsRef<FileCache> for CacheGuard {
fn as_ref(&self) -> &FileCache {
&self.inner
}
}
impl Drop for CacheGuard {
fn drop(&mut self) {
self.try_drop().expect("cache to save successfully")
}
}
pub(crate) async fn load_cache(config: &CacheConfig) -> Result<FileCache, eyre::Report> {
let path = &config.file;
let mut cache_file = tokio::fs::File::open(&path)
.await
.context("failed to open cache file")?;
let serialized = if config.cache.compress {
let serialized = if config.compress {
let cache_file = cache_file.into_std().await;
tokio::task::spawn_blocking(move || {
let mut buf = Vec::with_capacity(4096);

View file

@ -1,11 +1,13 @@
use std::collections::BTreeSet;
use std::io::{self, Write};
use std::ops::Deref;
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
use std::time::SystemTime;
use axum::async_trait;
use axum::http::HeaderValue;
use chrono::{DateTime, Utc};
use color_eyre::eyre::{self, Context};
@ -14,14 +16,17 @@ use fronma::parser::{parse, ParsedData};
use serde::Deserialize;
use tokio::fs;
use tokio::io::AsyncReadExt;
use tracing::{error, info, warn};
use tracing::warn;
use crate::config::Config;
use crate::markdown_render::{build_syntect, render};
use crate::post::cache::{load_cache, Cache, CACHE_VERSION};
use crate::post::{PostError, PostManager, PostMetadata, RenderStats, ReturnedPost};
use crate::systemtime_as_secs::as_secs;
use super::cache::CacheGuard;
use super::{
ApplyFilters, Filter, PostError, PostManager, PostMetadata, RenderStats, ReturnedPost,
};
#[derive(Deserialize)]
struct FrontMatter {
pub title: String,
@ -57,48 +62,30 @@ impl FrontMatter {
}
}
}
pub struct MarkdownPosts<C>
where
C: Deref<Target = Config>,
{
cache: Option<Cache>,
config: C,
pub struct MarkdownPosts {
cache: Option<Arc<CacheGuard>>,
config: Arc<Config>,
render_hash: u64,
syntect: SyntectAdapter,
}
impl<C> MarkdownPosts<C>
where
C: Deref<Target = Config>,
{
pub async fn new(config: C) -> eyre::Result<MarkdownPosts<C>> {
impl MarkdownPosts {
pub async fn new(
config: Arc<Config>,
cache: Option<Arc<CacheGuard>>,
) -> eyre::Result<MarkdownPosts> {
let syntect =
build_syntect(&config.render).context("failed to create syntax highlighting engine")?;
let cache = if config.cache.enable {
if config.cache.persistence && tokio::fs::try_exists(&config.cache.file).await? {
info!("loading cache from file");
let mut cache = load_cache(&config).await.unwrap_or_else(|err| {
error!("failed to load cache: {}", err);
info!("using empty cache");
Default::default()
});
if cache.version() < CACHE_VERSION {
warn!("cache version changed, clearing cache");
cache = Default::default();
};
Some(cache)
} else {
Some(Default::default())
}
} else {
None
};
let mut hasher = DefaultHasher::new();
config.render.hash(&mut hasher);
let render_hash = hasher.finish();
Ok(Self {
cache,
config,
render_hash,
syntect,
})
}
@ -131,14 +118,14 @@ where
let post = render(body, Some(&self.syntect));
let rendering = before_render.elapsed();
if let Some(cache) = self.cache.as_ref() {
if let Some(cache) = &self.cache {
cache
.insert(
name.to_string(),
metadata.clone(),
as_secs(&modified),
post.clone(),
&self.config.render,
self.render_hash,
)
.await
.unwrap_or_else(|err| warn!("failed to insert {:?} into cache", err.0))
@ -146,55 +133,44 @@ where
Ok((metadata, post, (parsing, rendering)))
}
}
fn cache(&self) -> Option<&Cache> {
self.cache.as_ref()
}
#[async_trait]
impl PostManager for MarkdownPosts {
async fn get_all_posts(
&self,
filters: &[Filter<'_>],
) -> Result<Vec<(PostMetadata, String, RenderStats)>, PostError> {
let mut posts = Vec::new();
fn try_drop(&mut self) -> Result<(), eyre::Report> {
// write cache to file
let config = &self.config.cache;
if config.enable
&& config.persistence
&& let Some(cache) = self.cache()
let mut read_dir = fs::read_dir(&self.config.dirs.posts).await?;
while let Some(entry) = read_dir.next_entry().await? {
let path = entry.path();
let stat = fs::metadata(&path).await?;
if stat.is_file() && path.extension().is_some_and(|ext| ext == "md") {
let name = path
.clone()
.file_stem()
.unwrap()
.to_string_lossy()
.to_string();
let post = self.get_post(&name).await?;
if let ReturnedPost::Rendered(meta, content, stats) = post
&& meta.apply_filters(filters)
{
let path = &config.file;
let serialized = bitcode::serialize(cache).context("failed to serialize cache")?;
let mut cache_file = std::fs::File::create(path)
.with_context(|| format!("failed to open cache at {}", path.display()))?;
let compression_level = config.compression_level;
if config.compress {
std::io::Write::write_all(
&mut zstd::stream::write::Encoder::new(cache_file, compression_level)?
.auto_finish(),
&serialized,
)
} else {
cache_file.write_all(&serialized)
posts.push((meta, content, stats));
}
.context("failed to write cache to file")?;
info!("wrote cache to {}", path.display());
}
Ok(())
}
}
impl<C> Drop for MarkdownPosts<C>
where
C: Deref<Target = Config>,
{
fn drop(&mut self) {
self.try_drop().unwrap()
Ok(posts)
}
}
impl<C> PostManager for MarkdownPosts<C>
where
C: Deref<Target = Config>,
{
async fn get_all_post_metadata(
&self,
filter: impl Fn(&PostMetadata) -> bool,
filters: &[Filter<'_>],
) -> Result<Vec<PostMetadata>, PostError> {
let mut posts = Vec::new();
@ -207,15 +183,15 @@ where
let mtime = as_secs(&stat.modified()?);
let name = String::from(path.file_stem().unwrap().to_string_lossy());
if let Some(cache) = self.cache.as_ref()
if let Some(cache) = &self.cache
&& let Some(hit) = cache.lookup_metadata(&name, mtime).await
&& filter(&hit)
&& hit.apply_filters(filters)
{
posts.push(hit);
} else {
match self.parse_and_render(name, path).await {
Ok((metadata, ..)) => {
if filter(&metadata) {
if metadata.apply_filters(filters) {
posts.push(metadata);
}
}
@ -235,37 +211,6 @@ where
Ok(posts)
}
async fn get_all_posts(
&self,
filter: impl Fn(&PostMetadata, &str) -> bool,
) -> Result<Vec<(PostMetadata, String, RenderStats)>, PostError> {
let mut posts = Vec::new();
let mut read_dir = fs::read_dir(&self.config.dirs.posts).await?;
while let Some(entry) = read_dir.next_entry().await? {
let path = entry.path();
let stat = fs::metadata(&path).await?;
if stat.is_file() && path.extension().is_some_and(|ext| ext == "md") {
let name = path
.clone()
.file_stem()
.unwrap()
.to_string_lossy()
.to_string();
let post = self.get_post(&name).await?;
if let ReturnedPost::Rendered(meta, content, stats) = post
&& filter(&meta, &content)
{
posts.push((meta, content, stats));
}
}
}
Ok(posts)
}
async fn get_post(&self, name: &str) -> Result<ReturnedPost, PostError> {
if self.config.markdown_access && name.ends_with(".md") {
let path = self.config.dirs.posts.join(name);
@ -274,7 +219,7 @@ where
Ok(value) => value,
Err(err) => match err.kind() {
io::ErrorKind::NotFound => {
if let Some(cache) = self.cache.as_ref() {
if let Some(cache) = &self.cache {
cache.remove(name).await;
}
return Err(PostError::NotFound(name.to_string()));
@ -299,7 +244,7 @@ where
Ok(value) => value,
Err(err) => match err.kind() {
io::ErrorKind::NotFound => {
if let Some(cache) = self.cache.as_ref() {
if let Some(cache) = &self.cache {
cache.remove(name).await;
}
return Err(PostError::NotFound(name.to_string()));
@ -309,8 +254,8 @@ where
};
let mtime = as_secs(&stat.modified()?);
if let Some(cache) = self.cache.as_ref()
&& let Some(hit) = cache.lookup(name, mtime, &self.config.render).await
if let Some(cache) = &self.cache
&& let Some(hit) = cache.lookup(name, mtime, self.render_hash).await
{
Ok(ReturnedPost::Rendered(
hit.metadata,
@ -330,7 +275,7 @@ where
}
async fn cleanup(&self) {
if let Some(cache) = self.cache.as_ref() {
if let Some(cache) = &self.cache {
cache
.cleanup(|name| {
std::fs::metadata(self.config.dirs.posts.join(name.to_owned() + ".md"))

View file

@ -3,7 +3,7 @@ pub mod markdown_posts;
use std::time::Duration;
use axum::http::HeaderValue;
use axum::{async_trait, http::HeaderValue};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
@ -37,29 +37,58 @@ pub enum ReturnedPost {
Raw(Vec<u8>, HeaderValue),
}
pub enum Filter<'a> {
Tags(&'a [&'a str]),
}
impl<'a> Filter<'a> {
pub fn apply(&self, meta: &PostMetadata) -> bool {
match self {
Filter::Tags(tags) => tags
.iter()
.any(|tag| meta.tags.iter().any(|meta_tag| meta_tag == tag)),
}
}
}
pub trait ApplyFilters {
fn apply_filters(&self, filters: &[Filter<'_>]) -> bool;
}
impl ApplyFilters for PostMetadata {
fn apply_filters(&self, filters: &[Filter<'_>]) -> bool {
for filter in filters {
if !filter.apply(self) {
return false;
}
}
true
}
}
#[async_trait]
pub trait PostManager {
async fn get_all_post_metadata(
&self,
filter: impl Fn(&PostMetadata) -> bool,
filters: &[Filter<'_>],
) -> Result<Vec<PostMetadata>, PostError> {
self.get_all_posts(|m, _| filter(m))
self.get_all_posts(filters)
.await
.map(|vec| vec.into_iter().map(|(meta, ..)| meta).collect())
}
async fn get_all_posts(
&self,
filter: impl Fn(&PostMetadata, &str) -> bool,
filters: &[Filter<'_>],
) -> Result<Vec<(PostMetadata, String, RenderStats)>, PostError>;
async fn get_max_n_post_metadata_with_optional_tag_sorted(
&self,
n: Option<usize>,
tag: Option<&String>,
tag: Option<&str>,
) -> Result<Vec<PostMetadata>, PostError> {
let mut posts = self
.get_all_post_metadata(|metadata| tag.is_none_or(|tag| metadata.tags.contains(tag)))
.await?;
let filters = tag.and(Some(Filter::Tags(tag.as_slice())));
let mut posts = self.get_all_post_metadata(filters.as_slice()).await?;
// we still want some semblance of order if created_at is None so sort by mtime as well
posts.sort_unstable_by_key(|metadata| metadata.modified_at.unwrap_or_default());
posts.sort_by_key(|metadata| metadata.created_at.unwrap_or_default());