riley_cms_core/lib.rs
1//! # riley-cms-core
2//!
3//! Core library for riley_cms - a minimal, self-hosted headless CMS.
4//!
5//! This crate provides the domain logic for riley_cms without any HTTP or CLI concerns.
6//! It can be embedded in other Rust applications or used standalone.
7//!
8//! ## Features
9//!
10//! - **Content Management**: Parse and query posts and series from a Git-based content directory
11//! - **S3/R2 Storage**: Upload and list assets from S3-compatible storage
12//! - **In-Memory Caching**: Fast content access with cache refresh on demand
13//! - **Visibility Control**: Support for drafts, scheduled posts, and live content
14//!
15//! ## Quick Start
16//!
17//! ```ignore
18//! use riley_cms_core::{RileyCms, resolve_config, ListOptions};
19//!
20//! #[tokio::main]
21//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! // Load config from standard locations
23//! let config = resolve_config(None)?;
24//!
25//! // Create the RileyCms instance
26//! let riley_cms = RileyCms::from_config(config).await?;
27//!
28//! // List all live posts
29//! let posts = riley_cms.list_posts(&ListOptions::default()).await?;
30//! for post in posts.items {
31//! println!("{}: {}", post.slug, post.title);
32//! }
33//!
34//! // Get a specific post
35//! if let Some(post) = riley_cms.get_post("my-post").await? {
36//! println!("Content: {}", post.content);
37//! }
38//!
39//! Ok(())
40//! }
41//! ```
42//!
43//! ## Content Structure
44//!
45//! riley_cms expects content in this structure:
46//!
47//! ```text
48//! content/
49//! ├── my-post/
50//! │ ├── config.toml
51//! │ └── content.mdx
52//! └── my-series/
53//! ├── series.toml
54//! ├── part-one/
55//! │ ├── config.toml
56//! │ └── content.mdx
57//! └── part-two/
58//! ├── config.toml
59//! └── content.mdx
60//! ```
61//!
62//! ## Visibility Model
63//!
64//! Content visibility is controlled by the `goes_live_at` field:
65//!
66//! - `None` → Draft (only visible with `include_drafts`)
67//! - `Some(past_date)` → Live (always visible)
68//! - `Some(future_date)` → Scheduled (only visible with `include_scheduled`)
69
70mod config;
71mod content;
72mod error;
73pub mod git;
74mod security;
75mod storage;
76mod types;
77
78pub use config::{Config, GitConfig, RileyCmsConfig, resolve_config};
79pub use content::ContentCache;
80pub use error::{Error, Result};
81pub use git::{BodyStream, GitBackend, GitCgiCompletion, GitCgiHeaders, GitCgiStreamResponse};
82pub use storage::Storage;
83pub use types::*;
84
85use chrono::Utc;
86use hmac::{Hmac, Mac};
87use sha2::Sha256;
88use std::net::ToSocketAddrs;
89use std::path::Path;
90use std::sync::Arc;
91use tokio::sync::RwLock;
92
93/// Main entry point for riley_cms functionality.
94///
95/// `RileyCms` provides access to all CMS operations: listing posts and series,
96/// retrieving individual content, managing assets, and cache control.
97///
98/// # Example
99///
100/// ```ignore
101/// use riley_cms_core::{RileyCms, RileyCmsConfig, ListOptions};
102///
103/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
104/// let config: RileyCmsConfig = toml::from_str(r#"
105/// [content]
106/// repo_path = "/data/content"
107/// [storage]
108/// bucket = "my-assets"
109/// public_url_base = "https://assets.example.com"
110/// "#)?;
111///
112/// let riley_cms = RileyCms::from_config(config).await?;
113/// let posts = riley_cms.list_posts(&ListOptions::default()).await?;
114/// # Ok(())
115/// # }
116/// ```
117pub struct RileyCms {
118 config: RileyCmsConfig,
119 cache: Arc<RwLock<ContentCache>>,
120 storage: Storage,
121}
122
123impl RileyCms {
124 /// Create a new RileyCms instance from configuration.
125 ///
126 /// This loads content from disk into an in-memory cache and initializes
127 /// the S3 storage client.
128 ///
129 /// # Errors
130 ///
131 /// Returns an error if content cannot be loaded or S3 configuration is invalid.
132 pub async fn from_config(config: RileyCmsConfig) -> Result<Self> {
133 let storage = Storage::new(&config.storage).await?;
134
135 // Clone content config to move into the blocking task closure
136 let content_config = config.content.clone();
137
138 // Offload blocking filesystem I/O to a dedicated thread pool
139 let cache = tokio::task::spawn_blocking(move || ContentCache::load(&content_config))
140 .await
141 .map_err(|e| Error::Io(std::io::Error::other(e)))??;
142
143 Ok(Self {
144 config,
145 cache: Arc::new(RwLock::new(cache)),
146 storage,
147 })
148 }
149
150 /// List posts with filtering and pagination.
151 ///
152 /// By default, only live posts (with `goes_live_at` in the past) are returned.
153 /// Use [`ListOptions`] to include drafts or scheduled posts.
154 ///
155 /// Posts are sorted by `goes_live_at` descending (newest first).
156 pub async fn list_posts(&self, opts: &ListOptions) -> Result<ListResult<PostSummary>> {
157 let cache = self.cache.read().await;
158 cache.list_posts(opts)
159 }
160
161 /// Get a single post by its slug.
162 ///
163 /// Returns `None` if no post with the given slug exists.
164 /// Note: This returns the post regardless of visibility status.
165 pub async fn get_post(&self, slug: &str) -> Result<Option<Post>> {
166 let cache = self.cache.read().await;
167 cache.get_post(slug)
168 }
169
170 /// List series with filtering and pagination.
171 ///
172 /// By default, only live series are returned.
173 /// Series are sorted by `goes_live_at` descending.
174 pub async fn list_series(&self, opts: &ListOptions) -> Result<ListResult<SeriesSummary>> {
175 let cache = self.cache.read().await;
176 cache.list_series(opts)
177 }
178
179 /// Get a single series by its slug, including all posts.
180 ///
181 /// Posts within the series are sorted by their `order` field,
182 /// with alphabetical fallback for ties or missing values.
183 pub async fn get_series(&self, slug: &str) -> Result<Option<Series>> {
184 let cache = self.cache.read().await;
185 cache.get_series(slug)
186 }
187
188 /// Validate content structure and return any errors.
189 ///
190 /// Checks for common issues like empty titles, missing content, etc.
191 pub async fn validate_content(&self) -> Result<Vec<ValidationError>> {
192 let cache = self.cache.read().await;
193 Ok(cache.validate())
194 }
195
196 /// List assets in the S3/R2 storage bucket with pagination.
197 ///
198 /// Uses cursor-based pagination via S3 continuation tokens.
199 /// Defaults to 100 assets per page, capped at 1000.
200 pub async fn list_assets(&self, opts: &AssetListOptions) -> Result<AssetListResult> {
201 self.storage.list_assets(opts).await
202 }
203
204 /// Upload a file to the storage bucket.
205 ///
206 /// # Arguments
207 ///
208 /// * `path` - Local file path to upload
209 /// * `dest` - Optional destination path in bucket (defaults to filename)
210 pub async fn upload_asset(&self, path: &Path, dest: Option<&str>) -> Result<Asset> {
211 self.storage.upload_asset(path, dest).await
212 }
213
214 /// Refresh the content cache from disk.
215 ///
216 /// Call this after content has been updated (e.g., after a git push)
217 /// to reload the in-memory cache.
218 pub async fn refresh(&self) -> Result<()> {
219 // Clone the config to move into the blocking task closure
220 let content_config = self.config.content.clone();
221
222 // Offload blocking filesystem I/O to a dedicated thread pool
223 let new_cache = tokio::task::spawn_blocking(move || ContentCache::load(&content_config))
224 .await
225 .map_err(|e| Error::Io(std::io::Error::other(e)))??;
226
227 let mut cache = self.cache.write().await;
228 *cache = new_cache;
229 Ok(())
230 }
231
232 /// Get an ETag representing the current content state.
233 ///
234 /// This is a hash of all content, suitable for HTTP caching headers.
235 /// The ETag changes when any content is modified.
236 pub async fn content_etag(&self) -> String {
237 let cache = self.cache.read().await;
238 cache.etag()
239 }
240
241 /// Fire webhooks after content update.
242 ///
243 /// Each webhook is validated and sent atomically: DNS is resolved once,
244 /// checked against private/internal IP ranges, and the connection is pinned
245 /// to the validated IP (preventing DNS rebinding/TOCTOU attacks).
246 ///
247 /// If a `secret` is configured in `[webhooks]`, signs each request body with
248 /// HMAC-SHA256 and includes the hex signature in the `X-Riley-Cms-Signature` header.
249 /// Retries up to 3 times with exponential backoff on network errors or 5xx responses.
250 pub async fn fire_webhooks(&self) {
251 if let Some(ref webhooks) = self.config.webhooks {
252 // Resolve webhook secret once (if configured)
253 let secret = if let Some(ref secret_config) = webhooks.secret {
254 match secret_config.resolve() {
255 Ok(v) if v.is_empty() => {
256 tracing::error!(
257 "Webhook secret resolves to empty string. Skipping webhook delivery."
258 );
259 return;
260 }
261 Ok(v) => Some(v),
262 Err(e) => {
263 tracing::error!(
264 "Failed to resolve webhook secret: {}. Skipping webhook delivery.",
265 e
266 );
267 return;
268 }
269 }
270 } else {
271 None
272 };
273
274 for url in &webhooks.on_content_update {
275 let url = url.clone();
276 let secret = secret.clone();
277 tokio::spawn(async move {
278 send_webhook(&url, secret.as_deref()).await;
279 });
280 }
281 }
282 }
283
284 /// Get a reference to the config.
285 pub fn config(&self) -> &RileyCmsConfig {
286 &self.config
287 }
288}
289
290/// Maximum number of retry attempts for webhook delivery.
291const WEBHOOK_MAX_RETRIES: u32 = 3;
292
293/// Send a single webhook with SSRF protection, optional HMAC signing, and retry.
294///
295/// Resolves DNS once, validates all IPs against private ranges, then pins the
296/// connection to the validated IP using `reqwest::ClientBuilder::resolve()`.
297/// This prevents DNS rebinding (TOCTOU) attacks where DNS changes between
298/// validation and the actual connection.
299///
300/// Retries on network errors or 5xx responses. Does not retry on 4xx (client errors)
301/// since those indicate a problem with the receiver's configuration, not a transient issue.
302async fn send_webhook(url: &str, secret: Option<&str>) {
303 // 1. Parse URL and validate scheme
304 let parsed = match reqwest::Url::parse(url) {
305 Ok(u) => u,
306 Err(e) => {
307 tracing::warn!("Skipping webhook {}: invalid URL: {}", url, e);
308 return;
309 }
310 };
311
312 let scheme = parsed.scheme();
313 if scheme != "http" && scheme != "https" {
314 tracing::warn!("Skipping webhook {}: unsupported scheme: {}", url, scheme);
315 return;
316 }
317
318 let host = match parsed.host_str() {
319 Some(h) => h.to_string(),
320 None => {
321 tracing::warn!("Skipping webhook {}: missing host", url);
322 return;
323 }
324 };
325 let port = parsed.port_or_known_default().unwrap_or(443);
326
327 // 2. Resolve DNS once and validate all IPs
328 let addr_str = format!("{}:{}", host, port);
329 let addrs: Vec<std::net::SocketAddr> = match addr_str.to_socket_addrs() {
330 Ok(a) => a.collect(),
331 Err(e) => {
332 tracing::warn!("Skipping webhook {}: DNS resolution failed: {}", url, e);
333 return;
334 }
335 };
336
337 // 3. Find a safe (non-private) IP address to connect to
338 let safe_addr = match addrs.into_iter().find(|a| security::is_safe_ip(&a.ip())) {
339 Some(a) => a,
340 None => {
341 tracing::warn!(
342 "Skipping webhook {}: all resolved IPs are private/internal",
343 url
344 );
345 return;
346 }
347 };
348
349 // 4. Build client pinned to the validated IP (prevents DNS rebinding)
350 // Redirects disabled to prevent SSRF bypass via 302 to internal IPs.
351 let client = reqwest::Client::builder()
352 .resolve(&host, safe_addr)
353 .redirect(reqwest::redirect::Policy::none())
354 .timeout(std::time::Duration::from_secs(10))
355 .build()
356 .unwrap_or_else(|_| reqwest::Client::new());
357
358 // Include a timestamp in the payload to prevent replay attacks.
359 // Each webhook delivery gets a unique signature since the body changes.
360 let body = serde_json::json!({
361 "event": "content_update",
362 "timestamp": Utc::now().timestamp()
363 })
364 .to_string();
365
366 // Compute HMAC signature if secret is configured
367 let signature = match secret {
368 Some(s) => {
369 let mut mac = match Hmac::<Sha256>::new_from_slice(s.as_bytes()) {
370 Ok(m) => m,
371 Err(e) => {
372 tracing::error!("Invalid webhook secret key: {}. Skipping webhook.", e);
373 return;
374 }
375 };
376 mac.update(body.as_bytes());
377 Some(hex::encode(mac.finalize().into_bytes()))
378 }
379 None => None,
380 };
381
382 for attempt in 0..WEBHOOK_MAX_RETRIES {
383 let mut request = client
384 .post(url)
385 .header("Content-Type", "application/json")
386 .body(body.clone());
387
388 if let Some(ref sig) = signature {
389 request = request.header("X-Riley-Cms-Signature", format!("sha256={}", sig));
390 }
391
392 match request.send().await {
393 Ok(response) if response.status().is_success() => return,
394 Ok(response) if response.status().is_client_error() => {
395 tracing::warn!(
396 "Webhook {} returned {} (not retrying)",
397 url,
398 response.status()
399 );
400 return;
401 }
402 Ok(response) => {
403 tracing::warn!(
404 "Webhook {} returned {} (attempt {}/{})",
405 url,
406 response.status(),
407 attempt + 1,
408 WEBHOOK_MAX_RETRIES
409 );
410 }
411 Err(e) => {
412 tracing::warn!(
413 "Webhook {} failed: {} (attempt {}/{})",
414 url,
415 e,
416 attempt + 1,
417 WEBHOOK_MAX_RETRIES
418 );
419 }
420 }
421
422 // Exponential backoff: 1s, 2s, 4s
423 if attempt < WEBHOOK_MAX_RETRIES - 1 {
424 tokio::time::sleep(std::time::Duration::from_secs(1 << attempt)).await;
425 }
426 }
427
428 tracing::error!(
429 "Webhook {} failed after {} attempts",
430 url,
431 WEBHOOK_MAX_RETRIES
432 );
433}