Skip to main content

algocline_core/
pkg.rs

1//! Canonical projection of a Lua package's `M.meta` block.
2//!
3//! `PkgEntity` captures the identity portion of an algocline package: the
4//! fields users rely on to discover, categorize, and version-track a package.
5//! It is the single source of truth for "what is this package?" and is
6//! flattened into higher-level records (`IndexEntry`, `SearchResult`,
7//! `hub_info` responses) so the JSON wire shape stays consistent across the
8//! Hub, the manifest, and project lockfiles.
9//!
10//! ## Parsing contract
11//!
12//! [`PkgEntity::parse_from_init_lua`] is a non-Lua-VM best-effort parser over
13//! the `M.meta = { ... }` block of an `init.lua`. It deliberately only
14//! supports flat key–value pairs with (possibly concatenated) string
15//! literals; nested tables (e.g. `tags = { ... }`) are skipped via
16//! brace-depth tracking. When `M.meta.name` is absent or empty the parser
17//! returns `None` — this is the **inclusion gate** for hub indexing. The
18//! caller (`build_index` in `algocline-app::service::hub`) is expected to
19//! drop `None` directories silently so "draft" directories like
20//! `alc_shapes/` (a type DSL library, not an algocline package) do not
21//! pollute the hub index.
22//!
23//! ## Wire format
24//!
25//! `Option` fields use `#[serde(default)]` but deliberately do **not** use
26//! `skip_serializing_if`. A missing field deserializes as `None` and
27//! serializes back as `null`. This preserves the key-presence guarantee of
28//! the current `hub_index.json` consumers (Bundled-side doc generation,
29//! `README.md` package-count scripts) so they do not break on field
30//! absence.
31
32use std::path::Path;
33
34use serde::{Deserialize, Serialize};
35
36/// Canonical projection of a Lua package's `M.meta` block.
37///
38/// `name` is required (= hub-index inclusion gate). Other fields are
39/// optional and degrade UI / discoverability when absent, following the
40/// BP convention of Cargo / JSR / npm.
41#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
42pub struct PkgEntity {
43    pub name: String,
44    #[serde(default)]
45    pub version: Option<String>,
46    #[serde(default)]
47    pub description: Option<String>,
48    #[serde(default)]
49    pub category: Option<String>,
50    #[serde(default)]
51    pub docstring: Option<String>,
52}
53
54impl PkgEntity {
55    /// Parse `M.meta` + leading `---` docstring from an `init.lua`.
56    ///
57    /// Returns `None` when the file cannot be read, `M.meta` is absent, or
58    /// `M.meta.name` is empty. Callers treat `None` as "not a package" and
59    /// drop the directory silently from the hub index.
60    ///
61    /// The parser is **not** a full Lua evaluator:
62    ///
63    /// - Only flat key–value pairs inside `M.meta` are extracted.
64    /// - Nested tables (e.g. `tags = { ... }`) are skipped via brace-depth
65    ///   tracking; their keys are not reachable from here.
66    /// - Values must be string literals (`"..."`), optionally joined by `..`
67    ///   concatenation with whitespace between operators.
68    /// - Occurrences of `M.meta` inside single-line comments (`-- ...`)
69    ///   are ignored, so docstrings mentioning the key do not hijack the
70    ///   search.
71    pub fn parse_from_init_lua(path: &Path) -> Option<Self> {
72        let content = std::fs::read_to_string(path).ok()?;
73        let (name, version, description, category) = parse_meta(&content)?;
74        let docstring = extract_docstring_from(&content);
75        Some(PkgEntity {
76            name,
77            version: option_from_str(version),
78            description: option_from_str(description),
79            category: option_from_str(category),
80            docstring: option_from_str(docstring),
81        })
82    }
83}
84
85/// Return `None` for empty strings, `Some(s)` otherwise. Kept inline with
86/// `parse_from_init_lua` so the "empty field = absent" projection rule is
87/// applied uniformly to every optional column.
88fn option_from_str(s: String) -> Option<String> {
89    if s.is_empty() {
90        None
91    } else {
92        Some(s)
93    }
94}
95
96/// Extract leading `---` doc-comment lines from an init.lua source. Blank
97/// lines within the block are tolerated; the first non-doc content line
98/// terminates the block.
99fn extract_docstring_from(content: &str) -> String {
100    let mut lines = Vec::new();
101    for line in content.lines() {
102        let trimmed = line.trim_start();
103        if let Some(rest) = trimmed.strip_prefix("---") {
104            lines.push(rest.trim().to_string());
105        } else if trimmed.is_empty() {
106            continue;
107        } else {
108            break;
109        }
110    }
111    lines.join("\n")
112}
113
114/// Parse `M.meta = { ... }` out of `content`. Returns
115/// `(name, version, description, category)`. `None` if the block is
116/// missing, unparseable, or `name` is empty.
117fn parse_meta(content: &str) -> Option<(String, String, String, String)> {
118    let head = content;
119
120    // Find M.meta = { ... } block (with brace-depth tracking).
121    // Skip occurrences inside Lua line comments (`-- ...`) so that
122    // docstrings mentioning "M.meta" do not hijack the search.
123    let mut search_from = 0;
124    let meta_start = loop {
125        let rel = head[search_from..].find("M.meta")?;
126        let pos = search_from + rel;
127        let line_start = head[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
128        if !head[line_start..pos].contains("--") {
129            break pos;
130        }
131        search_from = pos + "M.meta".len();
132    };
133    let brace_start = head[meta_start..].find('{')? + meta_start;
134
135    // Track brace depth so nested tables do not terminate the block.
136    let mut depth = 0;
137    let mut brace_end = None;
138    for (i, ch) in head[brace_start..].char_indices() {
139        match ch {
140            '{' => depth += 1,
141            '}' => {
142                depth -= 1;
143                if depth == 0 {
144                    brace_end = Some(brace_start + i);
145                    break;
146                }
147            }
148            _ => {}
149        }
150    }
151    let brace_end = brace_end?;
152    let block = &head[brace_start + 1..brace_end];
153
154    let extract = |field: &str| -> String {
155        // Match: field = "value" [.. "value" ...] with word-boundary check.
156        // Walk through all occurrences of `field`, skipping matches inside
157        // longer identifiers (e.g. "short_description"). On the first valid
158        // occurrence, collect one or more `"..."` string literals joined by
159        // `..` concatenation operators.
160        let mut search_from = 0;
161        while let Some(rel) = block[search_from..].find(field) {
162            let pos = search_from + rel;
163            let word_boundary = pos == 0 || {
164                let prev = block.as_bytes()[pos - 1];
165                !(prev.is_ascii_alphanumeric() || prev == b'_')
166            };
167            if word_boundary {
168                let after = &block[pos + field.len()..];
169                let mut collected = String::new();
170                let mut cursor = 0usize;
171                let mut found_any = false;
172                loop {
173                    let rest = &after[cursor..];
174                    let Some(q_start_rel) = rest.find('"') else {
175                        break;
176                    };
177                    if found_any {
178                        // Between the prior closing quote and this opening
179                        // quote, only whitespace and a single `..` operator
180                        // are allowed. Anything else (comma, another field,
181                        // etc.) ends the value.
182                        let between = &rest[..q_start_rel];
183                        if between.trim() != ".." {
184                            break;
185                        }
186                    }
187                    let lit_start = cursor + q_start_rel + 1;
188                    let Some(q_end_rel) = after[lit_start..].find('"') else {
189                        break;
190                    };
191                    collected.push_str(&after[lit_start..lit_start + q_end_rel]);
192                    cursor = lit_start + q_end_rel + 1;
193                    found_any = true;
194                }
195                if found_any {
196                    return collected;
197                }
198            }
199            search_from = pos + field.len();
200        }
201        String::new()
202    };
203
204    let name = extract("name");
205    if name.is_empty() {
206        return None;
207    }
208    Some((
209        name,
210        extract("version"),
211        extract("description"),
212        extract("category"),
213    ))
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use std::fs;
220
221    fn write_init_lua(dir: &Path, body: &str) -> std::path::PathBuf {
222        let path = dir.join("init.lua");
223        fs::write(&path, body).unwrap();
224        path
225    }
226
227    #[test]
228    fn parse_flat_meta() {
229        let tmp = tempfile::tempdir().unwrap();
230        let path = write_init_lua(
231            tmp.path(),
232            r#"
233local M = {}
234M.meta = {
235    name = "my_pkg",
236    version = "1.0.0",
237    description = "A test package",
238    category = "reasoning",
239}
240return M
241"#,
242        );
243
244        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
245        assert_eq!(pkg.name, "my_pkg");
246        assert_eq!(pkg.version.as_deref(), Some("1.0.0"));
247        assert_eq!(pkg.description.as_deref(), Some("A test package"));
248        assert_eq!(pkg.category.as_deref(), Some("reasoning"));
249    }
250
251    #[test]
252    fn parse_nested_table_skipped() {
253        let tmp = tempfile::tempdir().unwrap();
254        let path = write_init_lua(
255            tmp.path(),
256            r#"
257local M = {}
258M.meta = {
259    name = "nested_pkg",
260    tags = { "a", "b" },
261    description = "After nested",
262}
263return M
264"#,
265        );
266
267        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
268        assert_eq!(pkg.name, "nested_pkg");
269        assert_eq!(pkg.description.as_deref(), Some("After nested"));
270    }
271
272    #[test]
273    fn parse_concat_string_literals() {
274        let tmp = tempfile::tempdir().unwrap();
275        let path = write_init_lua(
276            tmp.path(),
277            r#"
278local M = {}
279M.meta = {
280    name = "concat_pkg",
281    version = "0.1.0",
282    description = "foo "
283        .. "bar "
284        .. "baz",
285    category = "reasoning",
286}
287return M
288"#,
289        );
290
291        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
292        assert_eq!(pkg.description.as_deref(), Some("foo bar baz"));
293    }
294
295    #[test]
296    fn parse_word_boundary_for_description() {
297        let tmp = tempfile::tempdir().unwrap();
298        let path = write_init_lua(
299            tmp.path(),
300            r#"
301local M = {}
302M.meta = {
303    name = "wb_pkg",
304    short_description = "should not match",
305    description = "correct one",
306}
307return M
308"#,
309        );
310
311        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
312        assert_eq!(pkg.name, "wb_pkg");
313        assert_eq!(pkg.description.as_deref(), Some("correct one"));
314    }
315
316    #[test]
317    fn parse_meta_large_leading_docstring() {
318        let tmp = tempfile::tempdir().unwrap();
319        let mut content = String::new();
320        for i in 0..120 {
321            content.push_str(&format!("--- line {i}: long doc comment\n"));
322        }
323        content.push_str(
324            r#"
325local M = {}
326M.meta = {
327    name = "late_meta_pkg",
328    version = "0.2.0",
329    description = "Located past 2KB",
330    category = "test",
331}
332return M
333"#,
334        );
335        assert!(content.len() > 2048, "fixture should exceed 2KB");
336        let path = write_init_lua(tmp.path(), &content);
337
338        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
339        assert_eq!(pkg.name, "late_meta_pkg");
340        assert_eq!(pkg.version.as_deref(), Some("0.2.0"));
341        assert_eq!(pkg.description.as_deref(), Some("Located past 2KB"));
342        assert_eq!(pkg.category.as_deref(), Some("test"));
343    }
344
345    #[test]
346    fn parse_returns_none_without_meta_block() {
347        // Mirrors the alc_shapes case: an init.lua with no M.meta block at
348        // all. This is the **silent exclusion gate** — the caller drops
349        // these directories from the hub index without warning.
350        let tmp = tempfile::tempdir().unwrap();
351        let path = write_init_lua(
352            tmp.path(),
353            r#"
354--- alc_shapes — type DSL (not a package)
355local M = {}
356return M
357"#,
358        );
359
360        assert!(PkgEntity::parse_from_init_lua(&path).is_none());
361    }
362
363    #[test]
364    fn parse_returns_none_when_name_empty() {
365        let tmp = tempfile::tempdir().unwrap();
366        let path = write_init_lua(
367            tmp.path(),
368            r#"
369local M = {}
370M.meta = {
371    name = "",
372    version = "1.0.0",
373}
374return M
375"#,
376        );
377
378        assert!(PkgEntity::parse_from_init_lua(&path).is_none());
379    }
380
381    #[test]
382    fn parse_returns_none_when_file_missing() {
383        let tmp = tempfile::tempdir().unwrap();
384        let path = tmp.path().join("nonexistent.lua");
385        assert!(PkgEntity::parse_from_init_lua(&path).is_none());
386    }
387
388    #[test]
389    fn extracts_docstring_and_meta() {
390        let tmp = tempfile::tempdir().unwrap();
391        let path = write_init_lua(
392            tmp.path(),
393            r#"--- cascade — Multi-level routing with confidence gating
394--- Based on: "FrugalGPT" (Chen et al., 2023)
395
396local M = {}
397M.meta = {
398    name = "cascade",
399    version = "0.1.0",
400    description = "Multi-level routing",
401    category = "meta",
402}
403return M
404"#,
405        );
406
407        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
408        assert_eq!(pkg.name, "cascade");
409        let doc = pkg.docstring.expect("docstring should be present");
410        assert!(doc.contains("FrugalGPT"));
411        assert!(doc.contains("Multi-level"));
412        assert!(!doc.contains("local M"));
413    }
414
415    #[test]
416    fn docstring_absent_when_no_leading_comments() {
417        let tmp = tempfile::tempdir().unwrap();
418        let path = write_init_lua(
419            tmp.path(),
420            r#"local M = {}
421M.meta = { name = "nodoc" }
422return M
423"#,
424        );
425        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
426        assert!(pkg.docstring.is_none());
427    }
428
429    #[test]
430    fn m_dot_meta_inside_comment_is_ignored() {
431        // A `M.meta` reference inside a `--` comment must not hijack the
432        // parser. The real block below it should still be found.
433        let tmp = tempfile::tempdir().unwrap();
434        let path = write_init_lua(
435            tmp.path(),
436            r#"
437-- example: M.meta = { name = "decoy" }
438local M = {}
439M.meta = {
440    name = "real",
441}
442return M
443"#,
444        );
445        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
446        assert_eq!(pkg.name, "real");
447    }
448
449    #[test]
450    fn serde_round_trip_preserves_none_vs_empty() {
451        // Wire format contract: None is serialized as null; empty string
452        // deserializes as Some("") (not None). Keep these separable so the
453        // consumer can distinguish "field absent" from "field present but
454        // empty".
455        let pkg = PkgEntity {
456            name: "p".into(),
457            version: None,
458            description: Some(String::new()),
459            category: Some("meta".into()),
460            docstring: None,
461        };
462        let json = serde_json::to_string(&pkg).unwrap();
463        assert!(json.contains("\"version\":null"), "version null: {json}");
464        assert!(
465            json.contains("\"description\":\"\""),
466            "description empty string: {json}"
467        );
468        assert!(
469            json.contains("\"docstring\":null"),
470            "docstring null: {json}"
471        );
472
473        let back: PkgEntity = serde_json::from_str(&json).unwrap();
474        assert_eq!(back, pkg);
475    }
476
477    #[test]
478    fn serde_deserialize_accepts_missing_optional_fields() {
479        // Legacy hub_index.json entries may omit every optional field;
480        // they must deserialize as None (not error).
481        let json = r#"{"name":"minimal"}"#;
482        let pkg: PkgEntity = serde_json::from_str(json).unwrap();
483        assert_eq!(pkg.name, "minimal");
484        assert!(pkg.version.is_none());
485        assert!(pkg.description.is_none());
486        assert!(pkg.category.is_none());
487        assert!(pkg.docstring.is_none());
488    }
489}