Skip to main content

context_weaver/
entry.rs

1//! Entry parsing and representation.
2//!
3//! A `.weaver` file consists of YAML frontmatter delimited by `---` lines
4//! followed by a weaver-lang template body:
5//!
6//! ```text
7//! ---
8//! id: dark_forest
9//! name: Dark Forest Description
10//! keywords: ["dark forest", "shadowed path"]
11//! slot: context
12//! fallback: [foundation]
13//! priority: 100
14//! ---
15//! The dark forest looms ahead...
16//! ```
17
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use std::path::Path;
21use std::sync::Arc;
22use weaver_lang::{CompiledExpr, CompiledTemplate};
23
24use crate::assembler::Slot;
25use crate::ContextWeaverError;
26
27// ── Entry ───────────────────────────────────────────────────────────────
28
29/// A single lorebook entry: metadata + compiled template.
30#[derive(Clone)]
31pub struct Entry {
32    pub meta: EntryMeta,
33    pub compiled: Arc<CompiledTemplate>,
34    /// An optional weaver-based condition statement to further fine-tune activation.
35    pub condition: Option<Arc<CompiledExpr>>,
36    /// Compiled regex patterns, cached at parse time to avoid recompilation
37    /// on every activation scan.
38    pub compiled_regex: Vec<Regex>,
39    /// Raw body source, preserved for diagnostics and re-serialization.
40    pub source_body: String,
41}
42
43/// Structured metadata parsed from the YAML frontmatter.
44///
45/// Fields map directly to the frontmatter keys. Unknown keys are
46/// preserved in `extensions` so plugins and community tools can
47/// stash their own data without it being silently dropped.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct EntryMeta {
50    /// Unique identifier for this entry. Used in triggers and document refs.
51    pub id: String,
52
53    /// Human-readable display name.
54    #[serde(default)]
55    pub name: String,
56
57    // ── Activation (Tier 1) ─────────────────────────────────────────
58    /// Keywords that activate this entry when found in chat messages.
59    /// Matching is case-insensitive by default.
60    #[serde(default)]
61    pub keywords: Vec<String>,
62
63    /// Regex patterns for activation. Matched against recent messages.
64    /// These are compiled and cached on the [`Entry`] struct at parse time.
65    #[serde(default)]
66    pub regex: Vec<String>,
67
68    /// A weaver-based expression for further fine-tuning activation conditions.
69    pub condition: Option<String>,
70
71    /// How many recent messages to scan for keywords/regex.
72    /// `None` means use the lorebook default.
73    #[serde(default)]
74    pub scan_depth: Option<usize>,
75
76    /// If true, this entry is always active regardless of keywords.
77    #[serde(default)]
78    pub constant: bool,
79
80    // ── Ordering & placement ────────────────────────────────────────
81    /// Higher priority entries are evaluated first and take precedence
82    /// in token budget conflicts.
83    #[serde(default = "default_priority")]
84    pub priority: i32,
85
86    /// Target slot for this entry's output in the prompt.
87    ///
88    /// Standard slots form a gradient from deep background to immediate
89    /// foreground: `preamble`, `foundation`, `context`, `reference`,
90    /// `framing`, `guidance`, `emphasis`, `immediate`, `aftermath`.
91    #[serde(default)]
92    pub slot: Slot,
93
94    /// Fallback slots to try if the primary slot is not available
95    /// in the user's ContextDefinition template. Tried in order.
96    /// If no fallback matches, the entry is silently dropped.
97    #[serde(default)]
98    pub fallback: Vec<Slot>,
99
100    /// Tie-breaker for entries at the same slot and priority.
101    #[serde(default = "default_insertion_order")]
102    pub insertion_order: i32,
103
104    // ── Behavior ────────────────────────────────────────────────────
105    /// Whether this entry is enabled. Disabled entries are skipped entirely.
106    #[serde(default = "default_true")]
107    pub enabled: bool,
108
109    /// Once activated, stay active for this many turns even if keywords
110    /// no longer match. 0 means re-evaluate every turn.
111    #[serde(default)]
112    pub sticky_turns: usize,
113
114    /// Minimum turns between activations. Prevents rapid re-triggering.
115    #[serde(default)]
116    pub cooldown: usize,
117
118    /// If set, this entry counts toward the named group's token budget
119    /// rather than the global budget. Groups allow authors to say
120    /// "these combat entries share a 500-token pool."
121    #[serde(default)]
122    pub group: Option<String>,
123
124    /// Tags for organizational purposes and for other entries to query.
125    #[serde(default)]
126    pub tags: Vec<String>,
127
128    // ── Extensions ──────────────────────────────────────────────────
129    /// Catch-all for unknown frontmatter keys. Preserved on round-trip.
130    #[serde(flatten)]
131    pub extensions: std::collections::HashMap<String, serde_yaml::Value>,
132}
133
134fn default_priority() -> i32 {
135    100
136}
137fn default_insertion_order() -> i32 {
138    50
139}
140fn default_true() -> bool {
141    true
142}
143
144// ── Parsing ─────────────────────────────────────────────────────────────
145
146impl Entry {
147    /// Parse a `.weaver` file from its raw contents.
148    pub fn parse(source: &str, file_path: Option<&str>) -> Result<Self, ContextWeaverError> {
149        let (frontmatter, body) =
150            split_frontmatter(source).ok_or_else(|| ContextWeaverError::MetaParse {
151                entry_path: file_path.unwrap_or("<unknown>").to_string(),
152                message: "missing frontmatter delimiters (---)".to_string(),
153            })?;
154
155        let meta: EntryMeta =
156            serde_yaml::from_str(frontmatter).map_err(|e| ContextWeaverError::MetaParse {
157                entry_path: file_path.unwrap_or("<unknown>").to_string(),
158                message: e.to_string(),
159            })?;
160
161        let compiled = CompiledTemplate::compile(body).map_err(|errors| {
162            ContextWeaverError::TemplateParse {
163                entry_id: meta.id.clone(),
164                errors,
165            }
166        })?;
167
168        let condition = meta
169            .condition
170            .as_ref()
171            .map(|src| CompiledExpr::compile(src))
172            .transpose()
173            .map_err(|errors| ContextWeaverError::TemplateParse {
174                entry_id: meta.id.clone(),
175                errors,
176            })?
177            .map(Arc::new);
178
179        // Compile regex patterns once at parse time
180        let compiled_regex = meta
181            .regex
182            .iter()
183            .filter_map(|pattern| match Regex::new(pattern) {
184                Ok(re) => Some(re),
185                Err(e) => {
186                    // Log but don't fail — bad regexes are skipped
187                    tracing::error!(
188                        "warning: entry '{}': invalid regex '{}': {}",
189                        meta.id, pattern, e
190                    );
191                    None
192                }
193            })
194            .collect();
195
196        Ok(Entry {
197            meta,
198            compiled: Arc::new(compiled),
199            source_body: body.to_string(),
200            condition,
201            compiled_regex,
202        })
203    }
204
205    /// Load and parse a `.weaver` file from disk.
206    pub fn load(path: &Path) -> Result<Self, ContextWeaverError> {
207        let source = std::fs::read_to_string(path)?;
208        Self::parse(&source, path.to_str())
209    }
210}
211
212/// Split source into (frontmatter, body) at the `---` delimiters.
213///
214/// Expects the file to start with `---\n`, have frontmatter content,
215/// then `---\n`, then the template body.
216fn split_frontmatter(source: &str) -> Option<(&str, &str)> {
217    let s = source.strip_prefix("---")?;
218    let s = s.strip_prefix('\n').or_else(|| s.strip_prefix("\r\n"))?;
219
220    let end = s
221        .find("\n---\n")
222        .or_else(|| s.find("\r\n---\r\n"))
223        .or_else(|| s.find("\n---\r\n"))?;
224
225    let frontmatter = &s[..end];
226    let rest = &s[end..];
227
228    // Skip past the closing --- and its newline
229    let body_start = rest.find("---").unwrap() + 3;
230    let body = &rest[body_start..];
231    let body = body
232        .strip_prefix('\n')
233        .or_else(|| body.strip_prefix("\r\n"))
234        .unwrap_or(body);
235
236    Some((frontmatter, body))
237}
238
239// ── Tests ───────────────────────────────────────────────────────────────
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn test_parse_basic_entry() {
247        let source = r#"---
248id: test_entry
249name: Test Entry
250keywords: ["hello", "world"]
251slot: coda
252priority: 50
253---
254Hello, {{char:name}}!
255"#;
256        let entry = Entry::parse(source, None).unwrap();
257        assert_eq!(entry.meta.id, "test_entry");
258        assert_eq!(entry.meta.keywords, vec!["hello", "world"]);
259        assert_eq!(entry.meta.priority, 50);
260        assert_eq!(entry.meta.slot, Slot::Coda);
261    }
262
263    #[test]
264    fn test_default_values() {
265        let source = r#"---
266id: minimal
267---
268content
269"#;
270        let entry = Entry::parse(source, None).unwrap();
271        assert_eq!(entry.meta.priority, 100);
272        assert!(entry.meta.enabled);
273        assert!(entry.meta.keywords.is_empty());
274        assert!(!entry.meta.constant);
275        assert_eq!(entry.meta.slot, Slot::Backdrop); // default
276        assert!(entry.meta.fallback.is_empty());
277    }
278
279    #[test]
280    fn test_fallback_parsed() {
281        let source = r#"---
282id: with_fallback
283slot: preamble
284fallback: [backdrop, coda]
285---
286content
287"#;
288        let entry = Entry::parse(source, None).unwrap();
289        assert_eq!(entry.meta.slot, Slot::Preamble);
290        assert_eq!(entry.meta.fallback, vec![Slot::Backdrop, Slot::Coda]);
291    }
292
293    #[test]
294    fn test_regex_compiled_at_parse_time() {
295        let source = r#"---
296id: regex_entry
297regex: ['\b(attack|fight)\b', '\d{3,}']
298---
299content
300"#;
301        let entry = Entry::parse(source, None).unwrap();
302        assert_eq!(entry.compiled_regex.len(), 2);
303        assert!(entry.compiled_regex[0].is_match("attack now"));
304        assert!(entry.compiled_regex[1].is_match("found 1000 gold"));
305    }
306
307    #[test]
308    fn test_invalid_regex_skipped() {
309        let source = r#"---
310id: bad_regex
311regex: ['[invalid', '\d+']
312---
313content
314"#;
315        let entry = Entry::parse(source, None).unwrap();
316        // The invalid regex is skipped, only the valid one is kept
317        assert_eq!(entry.compiled_regex.len(), 1);
318        assert!(entry.compiled_regex[0].is_match("42"));
319    }
320
321    #[test]
322    fn test_extensions_preserved() {
323        let source = r#"---
324id: extended
325my_custom_field: "hello"
326plugin_data:
327  foo: bar
328---
329content
330"#;
331        let entry = Entry::parse(source, None).unwrap();
332        assert!(entry.meta.extensions.contains_key("my_custom_field"));
333        assert!(entry.meta.extensions.contains_key("plugin_data"));
334    }
335
336    #[test]
337    fn test_missing_frontmatter_errors() {
338        let result = Entry::parse("no frontmatter here", None);
339        assert!(matches!(result, Err(ContextWeaverError::MetaParse { .. })));
340    }
341}