frontmatter_gen/
lib.rs

1#![doc = include_str!("../README.md")]
2#![doc(
3    html_favicon_url = "https://kura.pro/frontmatter-gen/images/favicon.ico",
4    html_logo_url = "https://kura.pro/frontmatter-gen/images/logos/frontmatter-gen.svg",
5    html_root_url = "https://docs.rs/frontmatter-gen"
6)]
7
8//! # Frontmatter Gen
9//!
10//! `frontmatter-gen` is a fast, secure, and memory-efficient library for working with
11//! frontmatter in multiple formats (YAML, TOML, and JSON).
12//!
13//! ## Overview
14//!
15//! This library provides robust handling of frontmatter with the following key features:
16//!
17//! - **Zero-copy parsing** for optimal memory efficiency
18//! - **Type-safe operations** with comprehensive error handling
19//! - **Multiple format support** (YAML, TOML, JSON)
20//! - **Secure processing** with input validation and size limits
21//! - **Async support** with the `ssg` feature flag
22//!
23//! ## Security Features
24//!
25//! - Input validation to prevent malicious content
26//! - Size limits to prevent denial of service attacks
27//! - Safe string handling to prevent memory corruption
28//! - Secure path handling for file operations
29//!
30//! ## Quick Start
31//!
32//! ```rust
33//! use frontmatter_gen::{extract, Format, Frontmatter, Result};
34//!
35//! let content = r#"---
36//! title: Test Post
37//! date: 2025-09-09
38//! ---
39//! Content here"#;
40//!
41//! let result = extract(content);
42//! assert!(result.is_ok());
43//! let (frontmatter, content) = result.unwrap();
44//! assert_eq!(
45//!     frontmatter.get("title").and_then(|v| v.as_str()),
46//!     Some("Test Post")
47//! );
48//! assert_eq!(content.trim(), "Content here");
49//! # Ok::<(), frontmatter_gen::Error>(())
50//! ```
51//!
52//! ## Feature Flags
53//!
54//! - `default`: Core frontmatter functionality
55//! - `cli`: Command-line interface support
56//! - `ssg`: Static Site Generator functionality (includes CLI)
57//!
58//! ## Error Handling
59//!
60//! All operations return a `Result` type with detailed error information:
61//!
62//! ```rust
63//! use frontmatter_gen::{extract, Error};
64//!
65//! fn process_content(content: &str) -> Result<(), Error> {
66//!     let (frontmatter, _) = extract(content)?;
67//!
68//!     // Validate required fields
69//!     if !frontmatter.contains_key("title") {
70//!         return Err(Error::ValidationError(
71//!             "Missing required field: title".to_string()
72//!         ));
73//!     }
74//!
75//!     Ok(())
76//! }
77//! ```
78
79use std::num::NonZeroUsize;
80
81// Re-export core types and traits
82pub use crate::{
83    config::Config,
84    error::Error,
85    extractor::{detect_format, extract_raw_frontmatter},
86    parser::{parse, to_string},
87    types::{Format, Frontmatter, Value},
88};
89
90// Module declarations
91#[cfg(feature = "cli")]
92pub mod cli;
93pub mod config;
94pub mod engine;
95pub mod error;
96pub mod extractor;
97pub mod parser;
98#[cfg(feature = "ssg")]
99pub mod ssg;
100pub mod types;
101pub mod utils;
102
103macro_rules! non_zero_usize {
104    ($value:expr) => {
105        match NonZeroUsize::new($value) {
106            Some(val) => val,
107            None => panic!("Value must be non-zero"),
108        }
109    };
110}
111
112/// Maximum size allowed for frontmatter content (1MB)
113pub const MAX_FRONTMATTER_SIZE: NonZeroUsize =
114    non_zero_usize!(1024 * 1024);
115
116/// Maximum allowed nesting depth for structured data
117pub const MAX_NESTING_DEPTH: NonZeroUsize = non_zero_usize!(32);
118
119/// A specialized Result type for frontmatter operations.
120///
121/// This type alias provides a consistent error type throughout the crate
122/// and simplifies error handling for library users.
123pub type Result<T> = std::result::Result<T, Error>;
124
125/// Prelude module for convenient imports.
126///
127/// This module provides the most commonly used types and traits.
128/// Import all contents with `use frontmatter_gen::prelude::*`.
129pub mod prelude {
130    pub use crate::{
131        extract, to_format, Config, Error, Format, Frontmatter, Result,
132        Value,
133    };
134}
135
136/// Configuration options for parsing operations.
137///
138/// Provides fine-grained control over parsing behaviour and security limits.
139#[derive(Debug, Clone, Copy)]
140pub struct ParseOptions {
141    /// Maximum allowed content size
142    pub max_size: NonZeroUsize,
143    /// Maximum allowed nesting depth
144    pub max_depth: NonZeroUsize,
145    /// Whether to validate content structure
146    pub validate: bool,
147}
148
149impl Default for ParseOptions {
150    fn default() -> Self {
151        Self {
152            max_size: MAX_FRONTMATTER_SIZE,
153            max_depth: MAX_NESTING_DEPTH,
154            validate: true,
155        }
156    }
157}
158
159/// Validates input content against security constraints.
160///
161/// # Security
162///
163/// This function helps prevent denial of service attacks by:
164/// - Limiting the maximum size of frontmatter content
165/// - Skipping validation for fenced code blocks
166/// - Checking for malicious patterns
167///
168/// # Examples
169///
170/// ```rust
171/// use frontmatter_gen::{validate_input, ParseOptions};
172///
173/// let content = "---\ntitle: Example\n---\nBody content";
174/// let options = ParseOptions::default();
175/// assert!(validate_input(content, &options).is_ok());
176/// ```
177#[inline]
178pub fn validate_input(
179    content: &str,
180    options: &ParseOptions,
181) -> Result<()> {
182    let mut inside_fenced_code = false;
183
184    for line in content.lines() {
185        if line.trim_start().starts_with("```")
186            || line.trim_start().starts_with("~~~")
187        {
188            inside_fenced_code = !inside_fenced_code;
189            continue; // Skip validation for this line
190        }
191
192        if inside_fenced_code {
193            continue; // Skip validation inside fenced code blocks
194        }
195
196        // Path traversal detection
197        if line.contains("../") || line.contains("..\\") {
198            log::warn!("Potential path traversal detected: {}", line);
199            return Err(Error::ValidationError(
200                "Content contains path traversal patterns".to_string(),
201            ));
202        }
203
204        // Null byte validation
205        if line.contains('\0') {
206            log::warn!("Null byte detected in content");
207            return Err(Error::ValidationError(
208                "Content contains null bytes".to_string(),
209            ));
210        }
211    }
212
213    // Check size limit
214    if content.len() > options.max_size.get() {
215        log::warn!(
216            "Content exceeds maximum size: {} > {}",
217            content.len(),
218            options.max_size.get()
219        );
220        return Err(Error::ContentTooLarge {
221            size: content.len(),
222            max: options.max_size.get(),
223        });
224    }
225
226    Ok(())
227}
228
229/// Extracts and parses frontmatter from content with format auto-detection.
230///
231/// This function provides zero-copy extraction of frontmatter where possible,
232/// automatically detecting the format (YAML, TOML, or JSON) and parsing it
233/// into a structured representation.
234///
235/// # Security
236///
237/// This function includes several security measures:
238/// - Input validation and size limits
239/// - Safe string handling
240/// - Protection against malicious content
241///
242/// # Performance
243///
244/// Optimized for performance with:
245/// - Zero-copy operations where possible
246/// - Single-pass parsing
247/// - Minimal allocations
248/// - Pre-allocated buffers
249///
250/// # Examples
251///
252/// ```rust
253/// use frontmatter_gen::extract;
254///
255/// let content = r#"---
256/// title: My Post
257/// date: 2025-09-09
258/// ---
259/// Content here"#;
260///
261/// let (frontmatter, content) = extract(content)?;
262/// assert_eq!(frontmatter.get("title").unwrap().as_str().unwrap(), "My Post");
263/// assert_eq!(content.trim(), "Content here");
264/// # Ok::<(), frontmatter_gen::Error>(())
265/// ```
266///
267/// # Errors
268///
269/// Returns `Error` if:
270/// - Content exceeds size limits
271/// - Content is malformed
272/// - Frontmatter format is invalid
273/// - Parsing fails
274pub fn extract(content: &str) -> Result<(Frontmatter, &str)> {
275    let options = ParseOptions::from_env();
276    validate_input(content, &options)?;
277
278    let (raw_frontmatter, remaining_content) =
279        extract_raw_frontmatter(content)?;
280    let format = detect_format(raw_frontmatter)?;
281    let frontmatter = parse(raw_frontmatter, format)?;
282
283    Ok((frontmatter, remaining_content))
284}
285
286/// Converts frontmatter to a specific format.
287///
288/// # Arguments
289///
290/// * `frontmatter` - The frontmatter to convert
291/// * `format` - Target format for conversion
292///
293/// # Security
294///
295/// This function includes validation of:
296/// - Input size limits
297/// - Format compatibility
298/// - Output safety
299///
300/// # Examples
301///
302/// ```rust
303/// use frontmatter_gen::{Frontmatter, Format, Value, to_format};
304///
305/// let mut frontmatter = Frontmatter::new();
306/// frontmatter.insert("title".to_string(), Value::String("My Post".into()));
307///
308/// let yaml = to_format(&frontmatter, Format::Yaml)?;
309/// assert!(yaml.contains("title: My Post"));
310/// # Ok::<(), frontmatter_gen::Error>(())
311/// ```
312///
313/// # Errors
314///
315/// Returns `Error` if:
316/// - Serialization fails
317/// - Format conversion fails
318/// - Invalid data types are encountered
319pub fn to_format(
320    frontmatter: &Frontmatter,
321    format: Format,
322) -> Result<String> {
323    to_string(frontmatter, format)
324}
325
326impl ParseOptions {
327    /// Load options from environment variables or use defaults.
328    ///
329    /// Reads the following environment variables:
330    /// - `MAX_FRONTMATTER_SIZE`: Maximum size for frontmatter content.
331    /// - `MAX_NESTING_DEPTH`: Maximum allowed nesting depth.
332    /// - `VALIDATE_STRUCTURE`: Enable or disable structure validation (default: `true`).
333    ///
334    /// # Example
335    ///
336    /// ```rust
337    /// use frontmatter_gen::ParseOptions;
338    /// std::env::set_var("MAX_FRONTMATTER_SIZE", "2048");
339    /// std::env::set_var("MAX_NESTING_DEPTH", "64");
340    ///
341    /// let options = ParseOptions::from_env();
342    /// assert_eq!(options.max_size.get(), 2048);
343    /// assert_eq!(options.max_depth.get(), 64);
344    /// assert!(options.validate);
345    /// ```
346    pub fn from_env() -> Self {
347        let max_size = std::env::var("MAX_FRONTMATTER_SIZE")
348            .ok()
349            .and_then(|val| val.parse::<usize>().ok())
350            .map_or(MAX_FRONTMATTER_SIZE, |size| non_zero_usize!(size));
351
352        let max_depth = std::env::var("MAX_NESTING_DEPTH")
353            .ok()
354            .and_then(|val| val.parse::<usize>().ok())
355            .map_or(MAX_NESTING_DEPTH, |depth| non_zero_usize!(depth));
356
357        Self {
358            max_size,
359            max_depth,
360            validate: std::env::var("VALIDATE_STRUCTURE")
361                .map_or(true, |val| val.eq_ignore_ascii_case("true")),
362        }
363    }
364}
365
366#[cfg(test)]
367mod extractor_tests {
368    use crate::Error;
369
370    fn mock_operation(input: Option<&str>) -> Result<String, Error> {
371        match input {
372            Some(value) => Ok(value.to_uppercase()), // Successful operation
373            None => {
374                Err(Error::ParseError("Input is missing".to_string()))
375            }
376        }
377    }
378
379    #[test]
380    fn test_result_type_success() {
381        let input = Some("hello");
382        let result = mock_operation(input);
383        assert!(result.is_ok());
384        assert_eq!(result.unwrap(), "HELLO".to_string());
385    }
386
387    #[test]
388    fn test_result_type_error() {
389        let input = None;
390        let result = mock_operation(input);
391        assert!(matches!(
392            result,
393            Err(Error::ParseError(ref e)) if e == "Input is missing"
394        ));
395    }
396
397    #[test]
398    fn test_result_type_pattern_matching() {
399        let input = Some("world");
400        let result = mock_operation(input);
401        match result {
402            Ok(value) => assert_eq!(value, "WORLD".to_string()),
403            Err(e) => panic!("Operation failed: {:?}", e),
404        }
405    }
406
407    #[test]
408    fn test_result_type_unwrap() {
409        let input = Some("rust");
410        let result = mock_operation(input);
411        assert_eq!(result.unwrap(), "RUST".to_string());
412    }
413
414    #[test]
415    fn test_result_type_expect() {
416        let input = Some("test");
417        let result = mock_operation(input);
418        assert_eq!(
419            result.expect("Unexpected error"),
420            "TEST".to_string()
421        );
422    }
423
424    #[test]
425    fn test_result_type_debug_format() {
426        let input = None;
427        let result = mock_operation(input);
428        assert_eq!(
429            format!("{:?}", result),
430            "Err(ParseError(\"Input is missing\"))"
431        );
432    }
433}
434
435#[cfg(test)]
436mod parser_tests {
437    use super::*;
438
439    #[test]
440    fn test_parse_yaml_frontmatter() {
441        let raw = "title: Test Post\npublished: true";
442        let format = Format::Yaml;
443        let parsed = parse(raw, format).unwrap();
444        assert_eq!(
445            parsed.get("title").unwrap().as_str().unwrap(),
446            "Test Post"
447        );
448        assert!(parsed.get("published").unwrap().as_bool().unwrap());
449    }
450
451    #[test]
452    fn test_parse_toml_frontmatter() {
453        let raw = "title = \"Test Post\"\npublished = true";
454        let format = Format::Toml;
455        let parsed = parse(raw, format).unwrap();
456        assert_eq!(
457            parsed.get("title").unwrap().as_str().unwrap(),
458            "Test Post"
459        );
460        assert!(parsed.get("published").unwrap().as_bool().unwrap());
461    }
462
463    #[test]
464    fn test_invalid_yaml_syntax() {
465        let raw = "title: : invalid yaml";
466        let format = Format::Yaml;
467        let result = parse(raw, format);
468        assert!(result.is_err());
469    }
470
471    #[test]
472    fn test_parse_invalid_toml_syntax() {
473        let raw = "title = \"Unmatched quote";
474        let format = Format::Toml;
475        let result = parse(raw, format);
476        assert!(result.is_err(), "Should fail for invalid TOML syntax");
477    }
478
479    #[test]
480    fn test_parse_invalid_json_syntax() {
481        let raw = "{\"title\": \"Missing closing brace\"";
482        let format = Format::Json;
483        let result = parse(raw, format);
484        assert!(result.is_err(), "Should fail for invalid JSON syntax");
485    }
486
487    #[test]
488    fn test_parse_with_unknown_format() {
489        let raw = "random text";
490        let format = Format::Unsupported;
491        let result = parse(raw, format);
492        assert!(result.is_err(), "Should fail for unsupported formats");
493    }
494
495    #[test]
496    fn test_parse_valid_yaml() {
497        let raw = "title: Valid Post\npublished: true";
498        let format = Format::Yaml;
499        let frontmatter = parse(raw, format).unwrap();
500        assert_eq!(
501            frontmatter.get("title").unwrap().as_str().unwrap(),
502            "Valid Post"
503        );
504        assert!(frontmatter
505            .get("published")
506            .unwrap()
507            .as_bool()
508            .unwrap());
509    }
510
511    #[test]
512    fn test_parse_malformed_yaml() {
513        let raw = "title: : bad yaml";
514        let format = Format::Yaml;
515        let result = parse(raw, format);
516        assert!(result.is_err(), "Should fail for malformed YAML");
517    }
518
519    #[test]
520    fn test_parse_json() {
521        let raw = r#"{"title": "Valid Post", "draft": false}"#;
522        let format = Format::Json;
523        let frontmatter = parse(raw, format).unwrap();
524        assert_eq!(
525            frontmatter.get("title").unwrap().as_str().unwrap(),
526            "Valid Post"
527        );
528        assert!(!frontmatter.get("draft").unwrap().as_bool().unwrap());
529    }
530}
531
532#[cfg(test)]
533mod format_tests {
534    use super::*;
535
536    #[test]
537    fn test_to_format_yaml() {
538        let mut frontmatter = Frontmatter::new();
539        let _ = frontmatter.insert(
540            "title".to_string(),
541            Value::String("Test Post".to_string()),
542        );
543        let yaml = to_format(&frontmatter, Format::Yaml).unwrap();
544        assert!(yaml.contains("title: Test Post"));
545    }
546
547    #[test]
548    fn test_format_conversion_roundtrip() {
549        let mut frontmatter = Frontmatter::new();
550        let _ = frontmatter.insert(
551            "key".to_string(),
552            Value::String("value".to_string()),
553        );
554        let yaml = to_format(&frontmatter, Format::Yaml).unwrap();
555        let content = format!("---\n{}\n---\nContent", yaml);
556        let (parsed, _) = extract(&content).unwrap();
557        assert_eq!(
558            parsed.get("key").unwrap().as_str().unwrap(),
559            "value"
560        );
561    }
562
563    #[test]
564    fn test_unsupported_format() {
565        let result =
566            to_format(&Frontmatter::new(), Format::Unsupported);
567        assert!(result.is_err());
568    }
569
570    #[test]
571    fn test_convert_to_yaml() {
572        let mut frontmatter = Frontmatter::new();
573        let _ = frontmatter.insert(
574            "title".to_string(),
575            Value::String("Test Post".into()),
576        );
577        let yaml = to_format(&frontmatter, Format::Yaml).unwrap();
578        assert!(yaml.contains("title: Test Post"));
579    }
580
581    #[test]
582    fn test_roundtrip_conversion() {
583        let content = "---\ntitle: Test Post\n---\nContent";
584        let (parsed, _) = extract(content).unwrap();
585        let yaml = to_format(&parsed, Format::Yaml).unwrap();
586        assert!(yaml.contains("title: Test Post"));
587    }
588
589    #[test]
590    fn test_format_invalid_data() {
591        let frontmatter = Frontmatter::new();
592        let result = to_format(&frontmatter, Format::Unsupported);
593        assert!(result.is_err());
594    }
595}
596
597#[cfg(test)]
598mod integration_tests {
599    use super::*;
600
601    #[test]
602    fn test_end_to_end_extraction_and_parsing() {
603        let content = "---\ntitle: Test Post\n---\nContent here";
604        let (frontmatter, content) = extract(content).unwrap();
605        assert_eq!(
606            frontmatter.get("title").unwrap().as_str().unwrap(),
607            "Test Post"
608        );
609        assert_eq!(content.trim(), "Content here");
610    }
611
612    #[test]
613    fn test_roundtrip_conversion() {
614        let content = "---\ntitle: Test Post\n---\nContent";
615        let (frontmatter, _) = extract(content).unwrap();
616        let yaml = to_format(&frontmatter, Format::Yaml).unwrap();
617        assert!(yaml.contains("title: Test Post"));
618    }
619
620    #[test]
621    fn test_complete_workflow() {
622        let content = "---\ntitle: Integration Test\n---\nBody content";
623        let (frontmatter, body) = extract(content).unwrap();
624        assert_eq!(
625            frontmatter.get("title").unwrap().as_str().unwrap(),
626            "Integration Test"
627        );
628        assert_eq!(body.trim(), "Body content");
629    }
630
631    #[test]
632    fn test_end_to_end_error_handling() {
633        let content = "Invalid frontmatter";
634        let result = extract(content);
635        assert!(result.is_err());
636    }
637}
638
639#[cfg(test)]
640mod edge_case_tests {
641    use super::*;
642
643    #[test]
644    fn test_special_characters_handling() {
645        let cases = vec![
646            (
647                "---\ntitle: \"Special: &chars\"\n---\nContent",
648                "Special: &chars",
649            ),
650            (
651                "---\ntitle: \"Another > test\"\n---\nContent",
652                "Another > test",
653            ),
654        ];
655
656        for (content, expected_title) in cases {
657            let (frontmatter, _) = extract(content).unwrap();
658            assert_eq!(
659                frontmatter.get("title").unwrap().as_str().unwrap(),
660                expected_title
661            );
662        }
663    }
664
665    #[cfg(feature = "ssg")]
666    #[tokio::test]
667    async fn test_async_extraction() {
668        let content = "---\ntitle: Async Test\n---\nContent";
669        let (frontmatter, body) = extract(content).unwrap();
670        assert_eq!(
671            frontmatter.get("title").unwrap().as_str().unwrap(),
672            "Async Test"
673        );
674        assert_eq!(body.trim(), "Content");
675    }
676
677    #[test]
678    fn test_large_frontmatter() {
679        let mut large_content = String::from("---\n");
680        for i in 0..1000 {
681            large_content
682                .push_str(&format!("key_{}: value_{}\n", i, i));
683        }
684        large_content.push_str("---\nContent");
685        let (frontmatter, content) = extract(&large_content).unwrap();
686        assert_eq!(frontmatter.len(), 1000);
687        assert_eq!(content.trim(), "Content");
688    }
689
690    #[test]
691    fn test_special_characters() {
692        let content =
693            "---\ntitle: \"Special & <characters>\"\n---\nContent";
694        let (frontmatter, _) = extract(content).unwrap();
695        assert_eq!(
696            frontmatter.get("title").unwrap().as_str().unwrap(),
697            "Special & <characters>"
698        );
699    }
700}
701
702#[cfg(test)]
703mod validate_input_tests {
704    use super::*;
705
706    #[test]
707    fn test_skip_validation_in_fenced_code_blocks() {
708        let options = ParseOptions::default();
709        let content = r#"
710        ---
711        title: Example
712        ---
713        ```
714        ../example/path
715        ```
716        Valid content here.
717        "#;
718
719        let result = validate_input(content, &options);
720        assert!(
721            result.is_ok(),
722            "Validation should skip fenced code blocks."
723        );
724    }
725
726    #[test]
727    fn test_detect_path_traversal_outside_code_blocks() {
728        let options = ParseOptions::default();
729        let content = r#"
730        ---
731        title: Example
732        ---
733        ../malicious/path
734        "#;
735
736        let result = validate_input(content, &options);
737        assert!(result.is_err(), "Validation should detect path traversal outside fenced code blocks.");
738    }
739
740    #[test]
741    fn test_validate_input_null_bytes() {
742        let options = ParseOptions::default();
743        let malicious_content = "title: Valid\0Post";
744        let result = validate_input(malicious_content, &options);
745        assert!(matches!(
746            result,
747            Err(Error::ValidationError(ref e)) if e == "Content contains null bytes"
748        ));
749    }
750
751    #[test]
752    fn test_validate_input_exceeds_max_size() {
753        let options = ParseOptions::default();
754        let oversized_content = "a".repeat(options.max_size.get() + 1);
755        let result = validate_input(&oversized_content, &options);
756        assert!(matches!(result, Err(Error::ContentTooLarge { .. })));
757    }
758
759    #[test]
760    fn test_validate_input_contains_null_bytes() {
761        let options = ParseOptions::default();
762        let malicious_content = "title: Valid\0Post";
763        let result = validate_input(malicious_content, &options);
764        assert!(matches!(
765            result,
766            Err(Error::ValidationError(ref e)) if e == "Content contains null bytes"
767        ));
768    }
769
770    #[test]
771    fn test_validate_input_path_traversal() {
772        let options = ParseOptions::default();
773        let malicious_content = "../malicious/path";
774        let result = validate_input(malicious_content, &options);
775        assert!(matches!(
776            result,
777            Err(Error::ValidationError(ref e)) if e == "Content contains path traversal patterns"
778        ));
779    }
780}
781
782#[cfg(test)]
783mod parse_options_tests {
784    use super::*;
785
786    #[test]
787    fn test_parse_options_default() {
788        let options = ParseOptions::default();
789        assert_eq!(options.max_size.get(), 1024 * 1024);
790        assert_eq!(options.max_depth.get(), 32);
791        assert!(options.validate);
792    }
793
794    #[test]
795    fn test_parse_options_from_env() {
796        std::env::set_var("MAX_FRONTMATTER_SIZE", "524288");
797        std::env::set_var("MAX_NESTING_DEPTH", "20");
798        std::env::set_var("VALIDATE_STRUCTURE", "false");
799
800        let options = ParseOptions::from_env();
801        assert_eq!(options.max_size.get(), 524288);
802        assert_eq!(options.max_depth.get(), 20);
803        assert!(!options.validate);
804
805        std::env::remove_var("MAX_FRONTMATTER_SIZE");
806        std::env::remove_var("MAX_NESTING_DEPTH");
807        std::env::remove_var("VALIDATE_STRUCTURE");
808    }
809}