Skip to main content

context_weaver/
entry.rs

1//! Entry parsing and representation.
2//!
3//! A `.weaver` file consists of YAML frontmatter delimited by `---` lines
4//! followed by a weaver-lang template body:
5//!
6//! ```text
7//! ---
8//! id: dark_forest
9//! name: Dark Forest Description
10//! keywords: ["dark forest", "shadowed path"]
11//! slot: context
12//! fallback: [foundation]
13//! priority: 100
14//! ---
15//! The dark forest looms ahead...
16//! ```
17
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use std::path::Path;
21use std::sync::Arc;
22use weaver_lang::{CompiledExpr, CompiledTemplate};
23
24use crate::ContextWeaverError;
25use crate::assembler::Slot;
26
27// ── Entry ───────────────────────────────────────────────────────────────
28
29/// A single lorebook entry: metadata + compiled template.
30#[derive(Clone)]
31pub struct Entry {
32    pub meta: EntryMeta,
33    pub compiled: Arc<CompiledTemplate>,
34    /// An optional weaver-based condition statement to further fine-tune activation.
35    pub condition: Option<Arc<CompiledExpr>>,
36    /// Compiled regex patterns, cached at parse time to avoid recompilation
37    /// on every activation scan.
38    pub compiled_regex: Vec<Regex>,
39    /// Raw body source, preserved for diagnostics and re-serialization.
40    pub source_body: String,
41}
42
43/// Structured metadata parsed from the YAML frontmatter.
44///
45/// Fields map directly to the frontmatter keys. Unknown keys are
46/// preserved in `extensions` so plugins and community tools can
47/// stash their own data without it being silently dropped.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct EntryMeta {
50    /// Unique identifier for this entry. Used in triggers and document refs.
51    pub id: String,
52
53    /// Human-readable display name.
54    #[serde(default)]
55    pub name: String,
56
57    // ── Activation (Tier 1) ─────────────────────────────────────────
58    /// Keywords that activate this entry when found in chat messages.
59    /// Matching is case-insensitive by default.
60    #[serde(default)]
61    pub keywords: Vec<String>,
62
63    /// Regex patterns for activation. Matched against recent messages.
64    /// These are compiled and cached on the [`Entry`] struct at parse time.
65    #[serde(default)]
66    pub regex: Vec<String>,
67
68    /// A weaver-based expression for further fine-tuning activation conditions.
69    pub condition: Option<String>,
70
71    /// How many recent messages to scan for keywords/regex.
72    /// `None` means use the lorebook default.
73    #[serde(default)]
74    pub scan_depth: Option<usize>,
75
76    /// If true, this entry is always active regardless of keywords.
77    #[serde(default)]
78    pub constant: bool,
79
80    // ── Ordering & placement ────────────────────────────────────────
81    /// Higher priority entries are evaluated first and take precedence
82    /// in token budget conflicts.
83    #[serde(default = "default_priority")]
84    pub priority: i32,
85
86    /// Target slot for this entry's output in the prompt.
87    ///
88    /// Standard slots form a gradient from deep background to immediate
89    /// foreground: `preamble`, `foundation`, `context`, `reference`,
90    /// `framing`, `guidance`, `emphasis`, `immediate`, `aftermath`.
91    #[serde(default)]
92    pub slot: Slot,
93
94    /// Fallback slots to try if the primary slot is not available
95    /// in the user's ContextDefinition template. Tried in order.
96    /// If no fallback matches, the entry is silently dropped.
97    #[serde(default)]
98    pub fallback: Vec<Slot>,
99
100    /// Tie-breaker for entries at the same slot and priority.
101    #[serde(default = "default_insertion_order")]
102    pub insertion_order: i32,
103
104    // ── Behavior ────────────────────────────────────────────────────
105    /// Whether this entry is enabled. Disabled entries are skipped entirely.
106    #[serde(default = "default_true")]
107    pub enabled: bool,
108
109    /// Once activated, stay active for this many turns even if keywords
110    /// no longer match. 0 means re-evaluate every turn.
111    #[serde(default)]
112    pub sticky_turns: usize,
113
114    /// Minimum turns between activations. Prevents rapid re-triggering.
115    #[serde(default)]
116    pub cooldown: usize,
117
118    /// If set, this entry counts toward the named group's token budget
119    /// rather than the global budget. Groups allow authors to say
120    /// "these combat entries share a 500-token pool."
121    #[serde(default)]
122    pub group: Option<String>,
123
124    /// Tags for organizational purposes and for other entries to query.
125    #[serde(default)]
126    pub tags: Vec<String>,
127
128    // ── Extensions ──────────────────────────────────────────────────
129    /// Catch-all for unknown frontmatter keys. Preserved on round-trip.
130    #[serde(flatten)]
131    pub extensions: std::collections::HashMap<String, serde_yaml::Value>,
132}
133
134fn default_priority() -> i32 {
135    100
136}
137fn default_insertion_order() -> i32 {
138    50
139}
140fn default_true() -> bool {
141    true
142}
143
144// ── Parsing ─────────────────────────────────────────────────────────────
145
146impl Entry {
147    /// Parse a `.weaver` file from its raw contents.
148    pub fn parse(source: &str, file_path: Option<&str>) -> Result<Self, ContextWeaverError> {
149        let (frontmatter, body) =
150            split_frontmatter(source).ok_or_else(|| ContextWeaverError::MetaParse {
151                entry_path: file_path.unwrap_or("<unknown>").to_string(),
152                message: "missing frontmatter delimiters (---)".to_string(),
153            })?;
154
155        let meta: EntryMeta =
156            serde_yaml::from_str(frontmatter).map_err(|e| ContextWeaverError::MetaParse {
157                entry_path: file_path.unwrap_or("<unknown>").to_string(),
158                message: e.to_string(),
159            })?;
160
161        let compiled = CompiledTemplate::compile(body).map_err(|errors| {
162            ContextWeaverError::TemplateParse {
163                entry_id: meta.id.clone(),
164                errors,
165            }
166        })?;
167
168        let condition = meta
169            .condition
170            .as_ref()
171            .map(|src| CompiledExpr::compile(src))
172            .transpose()
173            .map_err(|errors| ContextWeaverError::TemplateParse {
174                entry_id: meta.id.clone(),
175                errors,
176            })?
177            .map(Arc::new);
178
179        // Compile regex patterns once at parse time
180        let compiled_regex = meta
181            .regex
182            .iter()
183            .filter_map(|pattern| match Regex::new(pattern) {
184                Ok(re) => Some(re),
185                Err(e) => {
186                    // Log but don't fail — bad regexes are skipped
187                    tracing::error!(
188                        "warning: entry '{}': invalid regex '{}': {}",
189                        meta.id,
190                        pattern,
191                        e
192                    );
193                    None
194                }
195            })
196            .collect();
197
198        Ok(Entry {
199            meta,
200            compiled: Arc::new(compiled),
201            source_body: body.to_string(),
202            condition,
203            compiled_regex,
204        })
205    }
206
207    /// Load and parse a `.weaver` file from disk.
208    pub fn load(path: &Path) -> Result<Self, ContextWeaverError> {
209        let source = std::fs::read_to_string(path)?;
210        Self::parse(&source, path.to_str())
211    }
212
213    pub fn to_source(&self) -> String {
214        self.source_body.clone()
215    }
216}
217
218/// Split source into (frontmatter, body) at the `---` delimiters.
219///
220/// Expects the file to start with `---\n`, have frontmatter content,
221/// then `---\n`, then the template body.
222fn split_frontmatter(source: &str) -> Option<(&str, &str)> {
223    let s = source.strip_prefix("---")?;
224    let s = s.strip_prefix('\n').or_else(|| s.strip_prefix("\r\n"))?;
225
226    let end = s
227        .find("\n---\n")
228        .or_else(|| s.find("\r\n---\r\n"))
229        .or_else(|| s.find("\n---\r\n"))?;
230
231    let frontmatter = &s[..end];
232    let rest = &s[end..];
233
234    // Skip past the closing --- and its newline
235    let body_start = rest.find("---").unwrap() + 3;
236    let body = &rest[body_start..];
237    let body = body
238        .strip_prefix('\n')
239        .or_else(|| body.strip_prefix("\r\n"))
240        .unwrap_or(body);
241
242    Some((frontmatter, body))
243}
244
245// ── Tests ───────────────────────────────────────────────────────────────
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_parse_basic_entry() {
253        let source = r#"---
254id: test_entry
255name: Test Entry
256keywords: ["hello", "world"]
257slot: coda
258priority: 50
259---
260Hello, {{char:name}}!
261"#;
262        let entry = Entry::parse(source, None).unwrap();
263        assert_eq!(entry.meta.id, "test_entry");
264        assert_eq!(entry.meta.keywords, vec!["hello", "world"]);
265        assert_eq!(entry.meta.priority, 50);
266        assert_eq!(entry.meta.slot, Slot::Coda);
267    }
268
269    #[test]
270    fn test_default_values() {
271        let source = r#"---
272id: minimal
273---
274content
275"#;
276        let entry = Entry::parse(source, None).unwrap();
277        assert_eq!(entry.meta.priority, 100);
278        assert!(entry.meta.enabled);
279        assert!(entry.meta.keywords.is_empty());
280        assert!(!entry.meta.constant);
281        assert_eq!(entry.meta.slot, Slot::Backdrop); // default
282        assert!(entry.meta.fallback.is_empty());
283    }
284
285    #[test]
286    fn test_fallback_parsed() {
287        let source = r#"---
288id: with_fallback
289slot: preamble
290fallback: [backdrop, coda]
291---
292content
293"#;
294        let entry = Entry::parse(source, None).unwrap();
295        assert_eq!(entry.meta.slot, Slot::Preamble);
296        assert_eq!(entry.meta.fallback, vec![Slot::Backdrop, Slot::Coda]);
297    }
298
299    #[test]
300    fn test_regex_compiled_at_parse_time() {
301        let source = r#"---
302id: regex_entry
303regex: ['\b(attack|fight)\b', '\d{3,}']
304---
305content
306"#;
307        let entry = Entry::parse(source, None).unwrap();
308        assert_eq!(entry.compiled_regex.len(), 2);
309        assert!(entry.compiled_regex[0].is_match("attack now"));
310        assert!(entry.compiled_regex[1].is_match("found 1000 gold"));
311    }
312
313    #[test]
314    fn test_invalid_regex_skipped() {
315        let source = r#"---
316id: bad_regex
317regex: ['[invalid', '\d+']
318---
319content
320"#;
321        let entry = Entry::parse(source, None).unwrap();
322        // The invalid regex is skipped, only the valid one is kept
323        assert_eq!(entry.compiled_regex.len(), 1);
324        assert!(entry.compiled_regex[0].is_match("42"));
325    }
326
327    #[test]
328    fn test_extensions_preserved() {
329        let source = r#"---
330id: extended
331my_custom_field: "hello"
332plugin_data:
333  foo: bar
334---
335content
336"#;
337        let entry = Entry::parse(source, None).unwrap();
338        assert!(entry.meta.extensions.contains_key("my_custom_field"));
339        assert!(entry.meta.extensions.contains_key("plugin_data"));
340    }
341
342    #[test]
343    fn test_missing_frontmatter_errors() {
344        let result = Entry::parse("no frontmatter here", None);
345        assert!(matches!(result, Err(ContextWeaverError::MetaParse { .. })));
346    }
347}