From 6b5c0beeaa525e35301d65434e4891b99f7a3edd Mon Sep 17 00:00:00 2001 From: slonkazoid Date: Sun, 15 Dec 2024 23:06:58 +0300 Subject: [PATCH] decrease performance --- Cargo.lock | 6 +- src/app.rs | 30 +++--- src/main.rs | 31 ++++++- src/post/cache.rs | 99 ++++++++++++++------ src/post/markdown_posts.rs | 181 +++++++++++++------------------------ src/post/mod.rs | 45 +++++++-- 6 files changed, 220 insertions(+), 172 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3406f64..ef150b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -119,9 +119,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", diff --git a/src/app.rs b/src/app.rs index ee948bf..d8ae6e8 100644 --- a/src/app.rs +++ b/src/app.rs @@ -21,7 +21,7 @@ use tracing::{info, info_span, Span}; use crate::config::{Config, StyleConfig}; use crate::error::{AppError, AppResult}; -use crate::post::{MarkdownPosts, PostManager, PostMetadata, RenderStats, ReturnedPost}; +use crate::post::{Filter, PostManager, PostMetadata, RenderStats, ReturnedPost}; use crate::serve_dir_included::handle; const STATIC: Dir<'static> = include_dir!("$CARGO_MANIFEST_DIR/static"); @@ -43,8 +43,8 @@ const BINGUS_INFO: BingusInfo = BingusInfo { #[non_exhaustive] pub struct AppState { pub config: Arc, - pub posts: Arc>>, - pub reg: Arc>>, + pub posts: Arc, + pub templates: Arc>>, } #[derive(Serialize)] @@ -120,14 +120,17 @@ fn join_tags_for_meta(tags: &Map, delim: &str) -> Str s } -async fn index<'a>( +async fn index( State(AppState { - config, posts, reg, .. + config, + posts, + templates: reg, + .. }): State, Query(query): Query, ) -> AppResult { let posts = posts - .get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_ref()) + .get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_deref()) .await?; let tags = collect_tags(&posts); @@ -157,7 +160,7 @@ async fn all_posts( Query(query): Query, ) -> AppResult>> { let posts = posts - .get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_ref()) + .get_max_n_post_metadata_with_optional_tag_sorted(query.num_posts, query.tag.as_deref()) .await?; Ok(Json(posts)) @@ -172,11 +175,13 @@ async fn rss( } let posts = posts - .get_all_posts(|metadata, _| { + .get_all_posts( query .tag - .as_ref().is_none_or(|tag| metadata.tags.contains(tag)) - }) + .as_ref() + .and(Some(Filter::Tags(query.tag.as_deref().as_slice()))) + .as_slice(), + ) .await?; let mut channel = ChannelBuilder::default(); @@ -223,7 +228,10 @@ async fn rss( async fn post( State(AppState { - config, posts, reg, .. + config, + posts, + templates: reg, + .. }): State, Path(name): Path, ) -> AppResult { diff --git a/src/main.rs b/src/main.rs index e9627aa..f671d93 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,6 +31,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::{util::SubscriberInitExt, EnvFilter}; use crate::app::AppState; +use crate::post::cache::{load_cache, CacheGuard, CACHE_VERSION}; use crate::post::{MarkdownPosts, PostManager}; use crate::templates::new_registry; use crate::templates::watcher::watch_templates; @@ -87,7 +88,31 @@ async fn main() -> eyre::Result<()> { .instrument(info_span!("custom_template_watcher")), ); - let posts = Arc::new(MarkdownPosts::new(Arc::clone(&config)).await?); + let cache = if config.cache.enable { + if config.cache.persistence && tokio::fs::try_exists(&config.cache.file).await? { + info!("loading cache from file"); + let mut cache = load_cache(&config.cache).await.unwrap_or_else(|err| { + error!("failed to load cache: {}", err); + info!("using empty cache"); + Default::default() + }); + + if cache.version() < CACHE_VERSION { + warn!("cache version changed, clearing cache"); + cache = Default::default(); + }; + + Some(cache) + } else { + Some(Default::default()) + } + } else { + None + } + .map(|cache| CacheGuard::new(cache, config.cache.clone())) + .map(Arc::new); + + let posts = Arc::new(MarkdownPosts::new(Arc::clone(&config), cache.clone()).await?); if config.cache.enable && config.cache.cleanup { if let Some(millis) = config.cache.cleanup_interval { @@ -112,8 +137,8 @@ async fn main() -> eyre::Result<()> { let state = AppState { config: Arc::clone(&config), - posts: Arc::clone(&posts), - reg: Arc::clone(®), + posts: posts as Arc, + templates: Arc::clone(®), }; let app = app::new(&config).with_state(state.clone()); diff --git a/src/post/cache.rs b/src/post/cache.rs index cd3e837..250a073 100644 --- a/src/post/cache.rs +++ b/src/post/cache.rs @@ -1,13 +1,13 @@ -use std::hash::{DefaultHasher, Hash, Hasher}; -use std::io::Read; +use std::io::{Read, Write}; +use std::ops::Deref; -use crate::config::{Config, RenderConfig}; +use crate::config::CacheConfig; use crate::post::PostMetadata; use color_eyre::eyre::{self, Context}; use scc::HashMap; use serde::{Deserialize, Serialize}; use tokio::io::AsyncReadExt; -use tracing::{debug, instrument}; +use tracing::{debug, info, instrument}; /// do not persist cache if this version number changed pub const CACHE_VERSION: u16 = 2; @@ -17,34 +17,24 @@ pub struct CacheValue { pub metadata: PostMetadata, pub rendered: String, pub mtime: u64, - config_hash: u64, + extra: u64, } #[derive(Serialize, Deserialize, Clone)] -pub struct Cache(HashMap, u16); +pub struct FileCache(HashMap, u16); -impl Default for Cache { +impl Default for FileCache { fn default() -> Self { Self(Default::default(), CACHE_VERSION) } } -impl Cache { - pub async fn lookup( - &self, - name: &str, - mtime: u64, - config: &RenderConfig, - ) -> Option { +impl FileCache { + pub async fn lookup(&self, name: &str, mtime: u64, extra: u64) -> Option { match self.0.get_async(name).await { Some(entry) => { let cached = entry.get(); - if mtime <= cached.mtime && { - let mut hasher = DefaultHasher::new(); - config.hash(&mut hasher); - hasher.finish() - } == cached.config_hash - { + if extra == cached.extra && mtime <= cached.mtime { Some(cached.clone()) } else { let _ = entry.remove(); @@ -76,17 +66,13 @@ impl Cache { metadata: PostMetadata, mtime: u64, rendered: String, - config: &RenderConfig, + extra: u64, ) -> Result<(), (String, (PostMetadata, String))> { - let mut hasher = DefaultHasher::new(); - config.hash(&mut hasher); - let hash = hasher.finish(); - let value = CacheValue { metadata, rendered, mtime, - config_hash: hash, + extra, }; if self @@ -136,12 +122,67 @@ impl Cache { } } -pub(crate) async fn load_cache(config: &Config) -> Result { - let path = &config.cache.file; +pub struct CacheGuard { + inner: FileCache, + config: CacheConfig, +} + +impl CacheGuard { + pub fn new(cache: FileCache, config: CacheConfig) -> Self { + Self { + inner: cache, + config, + } + } + + fn try_drop(&mut self) -> Result<(), eyre::Report> { + // write cache to file + let path = &self.config.file; + let serialized = bitcode::serialize(&self.inner).context("failed to serialize cache")?; + let mut cache_file = std::fs::File::create(path) + .with_context(|| format!("failed to open cache at {}", path.display()))?; + let compression_level = self.config.compression_level; + if self.config.compress { + std::io::Write::write_all( + &mut zstd::stream::write::Encoder::new(cache_file, compression_level)? + .auto_finish(), + &serialized, + ) + } else { + cache_file.write_all(&serialized) + } + .context("failed to write cache to file")?; + info!("wrote cache to {}", path.display()); + Ok(()) + } +} + +impl Deref for CacheGuard { + type Target = FileCache; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl AsRef for CacheGuard { + fn as_ref(&self) -> &FileCache { + &self.inner + } +} + +impl Drop for CacheGuard { + fn drop(&mut self) { + self.try_drop().expect("cache to save successfully") + } +} + +pub(crate) async fn load_cache(config: &CacheConfig) -> Result { + let path = &config.file; let mut cache_file = tokio::fs::File::open(&path) .await .context("failed to open cache file")?; - let serialized = if config.cache.compress { + let serialized = if config.compress { let cache_file = cache_file.into_std().await; tokio::task::spawn_blocking(move || { let mut buf = Vec::with_capacity(4096); diff --git a/src/post/markdown_posts.rs b/src/post/markdown_posts.rs index 5ef0290..50547e6 100644 --- a/src/post/markdown_posts.rs +++ b/src/post/markdown_posts.rs @@ -1,11 +1,13 @@ use std::collections::BTreeSet; -use std::io::{self, Write}; -use std::ops::Deref; +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::io; use std::path::Path; +use std::sync::Arc; use std::time::Duration; use std::time::Instant; use std::time::SystemTime; +use axum::async_trait; use axum::http::HeaderValue; use chrono::{DateTime, Utc}; use color_eyre::eyre::{self, Context}; @@ -14,14 +16,17 @@ use fronma::parser::{parse, ParsedData}; use serde::Deserialize; use tokio::fs; use tokio::io::AsyncReadExt; -use tracing::{error, info, warn}; +use tracing::warn; use crate::config::Config; use crate::markdown_render::{build_syntect, render}; -use crate::post::cache::{load_cache, Cache, CACHE_VERSION}; -use crate::post::{PostError, PostManager, PostMetadata, RenderStats, ReturnedPost}; use crate::systemtime_as_secs::as_secs; +use super::cache::CacheGuard; +use super::{ + ApplyFilters, Filter, PostError, PostManager, PostMetadata, RenderStats, ReturnedPost, +}; + #[derive(Deserialize)] struct FrontMatter { pub title: String, @@ -57,48 +62,30 @@ impl FrontMatter { } } } -pub struct MarkdownPosts -where - C: Deref, -{ - cache: Option, - config: C, + +pub struct MarkdownPosts { + cache: Option>, + config: Arc, + render_hash: u64, syntect: SyntectAdapter, } -impl MarkdownPosts -where - C: Deref, -{ - pub async fn new(config: C) -> eyre::Result> { +impl MarkdownPosts { + pub async fn new( + config: Arc, + cache: Option>, + ) -> eyre::Result { let syntect = build_syntect(&config.render).context("failed to create syntax highlighting engine")?; - let cache = if config.cache.enable { - if config.cache.persistence && tokio::fs::try_exists(&config.cache.file).await? { - info!("loading cache from file"); - let mut cache = load_cache(&config).await.unwrap_or_else(|err| { - error!("failed to load cache: {}", err); - info!("using empty cache"); - Default::default() - }); - - if cache.version() < CACHE_VERSION { - warn!("cache version changed, clearing cache"); - cache = Default::default(); - }; - - Some(cache) - } else { - Some(Default::default()) - } - } else { - None - }; + let mut hasher = DefaultHasher::new(); + config.render.hash(&mut hasher); + let render_hash = hasher.finish(); Ok(Self { cache, config, + render_hash, syntect, }) } @@ -131,14 +118,14 @@ where let post = render(body, Some(&self.syntect)); let rendering = before_render.elapsed(); - if let Some(cache) = self.cache.as_ref() { + if let Some(cache) = &self.cache { cache .insert( name.to_string(), metadata.clone(), as_secs(&modified), post.clone(), - &self.config.render, + self.render_hash, ) .await .unwrap_or_else(|err| warn!("failed to insert {:?} into cache", err.0)) @@ -146,55 +133,44 @@ where Ok((metadata, post, (parsing, rendering))) } +} - fn cache(&self) -> Option<&Cache> { - self.cache.as_ref() - } +#[async_trait] +impl PostManager for MarkdownPosts { + async fn get_all_posts( + &self, + filters: &[Filter<'_>], + ) -> Result, PostError> { + let mut posts = Vec::new(); - fn try_drop(&mut self) -> Result<(), eyre::Report> { - // write cache to file - let config = &self.config.cache; - if config.enable - && config.persistence - && let Some(cache) = self.cache() - { - let path = &config.file; - let serialized = bitcode::serialize(cache).context("failed to serialize cache")?; - let mut cache_file = std::fs::File::create(path) - .with_context(|| format!("failed to open cache at {}", path.display()))?; - let compression_level = config.compression_level; - if config.compress { - std::io::Write::write_all( - &mut zstd::stream::write::Encoder::new(cache_file, compression_level)? - .auto_finish(), - &serialized, - ) - } else { - cache_file.write_all(&serialized) + let mut read_dir = fs::read_dir(&self.config.dirs.posts).await?; + while let Some(entry) = read_dir.next_entry().await? { + let path = entry.path(); + let stat = fs::metadata(&path).await?; + + if stat.is_file() && path.extension().is_some_and(|ext| ext == "md") { + let name = path + .clone() + .file_stem() + .unwrap() + .to_string_lossy() + .to_string(); + + let post = self.get_post(&name).await?; + if let ReturnedPost::Rendered(meta, content, stats) = post + && meta.apply_filters(filters) + { + posts.push((meta, content, stats)); + } } - .context("failed to write cache to file")?; - info!("wrote cache to {}", path.display()); } - Ok(()) - } -} -impl Drop for MarkdownPosts -where - C: Deref, -{ - fn drop(&mut self) { - self.try_drop().unwrap() + Ok(posts) } -} -impl PostManager for MarkdownPosts -where - C: Deref, -{ async fn get_all_post_metadata( &self, - filter: impl Fn(&PostMetadata) -> bool, + filters: &[Filter<'_>], ) -> Result, PostError> { let mut posts = Vec::new(); @@ -207,15 +183,15 @@ where let mtime = as_secs(&stat.modified()?); let name = String::from(path.file_stem().unwrap().to_string_lossy()); - if let Some(cache) = self.cache.as_ref() + if let Some(cache) = &self.cache && let Some(hit) = cache.lookup_metadata(&name, mtime).await - && filter(&hit) + && hit.apply_filters(filters) { posts.push(hit); } else { match self.parse_and_render(name, path).await { Ok((metadata, ..)) => { - if filter(&metadata) { + if metadata.apply_filters(filters) { posts.push(metadata); } } @@ -235,37 +211,6 @@ where Ok(posts) } - async fn get_all_posts( - &self, - filter: impl Fn(&PostMetadata, &str) -> bool, - ) -> Result, PostError> { - let mut posts = Vec::new(); - - let mut read_dir = fs::read_dir(&self.config.dirs.posts).await?; - while let Some(entry) = read_dir.next_entry().await? { - let path = entry.path(); - let stat = fs::metadata(&path).await?; - - if stat.is_file() && path.extension().is_some_and(|ext| ext == "md") { - let name = path - .clone() - .file_stem() - .unwrap() - .to_string_lossy() - .to_string(); - - let post = self.get_post(&name).await?; - if let ReturnedPost::Rendered(meta, content, stats) = post - && filter(&meta, &content) - { - posts.push((meta, content, stats)); - } - } - } - - Ok(posts) - } - async fn get_post(&self, name: &str) -> Result { if self.config.markdown_access && name.ends_with(".md") { let path = self.config.dirs.posts.join(name); @@ -274,7 +219,7 @@ where Ok(value) => value, Err(err) => match err.kind() { io::ErrorKind::NotFound => { - if let Some(cache) = self.cache.as_ref() { + if let Some(cache) = &self.cache { cache.remove(name).await; } return Err(PostError::NotFound(name.to_string())); @@ -299,7 +244,7 @@ where Ok(value) => value, Err(err) => match err.kind() { io::ErrorKind::NotFound => { - if let Some(cache) = self.cache.as_ref() { + if let Some(cache) = &self.cache { cache.remove(name).await; } return Err(PostError::NotFound(name.to_string())); @@ -309,8 +254,8 @@ where }; let mtime = as_secs(&stat.modified()?); - if let Some(cache) = self.cache.as_ref() - && let Some(hit) = cache.lookup(name, mtime, &self.config.render).await + if let Some(cache) = &self.cache + && let Some(hit) = cache.lookup(name, mtime, self.render_hash).await { Ok(ReturnedPost::Rendered( hit.metadata, @@ -330,7 +275,7 @@ where } async fn cleanup(&self) { - if let Some(cache) = self.cache.as_ref() { + if let Some(cache) = &self.cache { cache .cleanup(|name| { std::fs::metadata(self.config.dirs.posts.join(name.to_owned() + ".md")) diff --git a/src/post/mod.rs b/src/post/mod.rs index 4ba51d4..c501746 100644 --- a/src/post/mod.rs +++ b/src/post/mod.rs @@ -3,7 +3,7 @@ pub mod markdown_posts; use std::time::Duration; -use axum::http::HeaderValue; +use axum::{async_trait, http::HeaderValue}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; @@ -37,29 +37,58 @@ pub enum ReturnedPost { Raw(Vec, HeaderValue), } +pub enum Filter<'a> { + Tags(&'a [&'a str]), +} + +impl<'a> Filter<'a> { + pub fn apply(&self, meta: &PostMetadata) -> bool { + match self { + Filter::Tags(tags) => tags + .iter() + .any(|tag| meta.tags.iter().any(|meta_tag| meta_tag == tag)), + } + } +} + +pub trait ApplyFilters { + fn apply_filters(&self, filters: &[Filter<'_>]) -> bool; +} + +impl ApplyFilters for PostMetadata { + fn apply_filters(&self, filters: &[Filter<'_>]) -> bool { + for filter in filters { + if !filter.apply(self) { + return false; + } + } + true + } +} + +#[async_trait] pub trait PostManager { async fn get_all_post_metadata( &self, - filter: impl Fn(&PostMetadata) -> bool, + filters: &[Filter<'_>], ) -> Result, PostError> { - self.get_all_posts(|m, _| filter(m)) + self.get_all_posts(filters) .await .map(|vec| vec.into_iter().map(|(meta, ..)| meta).collect()) } async fn get_all_posts( &self, - filter: impl Fn(&PostMetadata, &str) -> bool, + filters: &[Filter<'_>], ) -> Result, PostError>; async fn get_max_n_post_metadata_with_optional_tag_sorted( &self, n: Option, - tag: Option<&String>, + tag: Option<&str>, ) -> Result, PostError> { - let mut posts = self - .get_all_post_metadata(|metadata| tag.is_none_or(|tag| metadata.tags.contains(tag))) - .await?; + let filters = tag.and(Some(Filter::Tags(tag.as_slice()))); + let mut posts = self.get_all_post_metadata(filters.as_slice()).await?; // we still want some semblance of order if created_at is None so sort by mtime as well posts.sort_unstable_by_key(|metadata| metadata.modified_at.unwrap_or_default()); posts.sort_by_key(|metadata| metadata.created_at.unwrap_or_default());