Skip to main content

algocline_core/
pkg.rs

1//! Canonical projection of a Lua package's `M.meta` block.
2//!
3//! `PkgEntity` captures the identity portion of an algocline package: the
4//! fields users rely on to discover, categorize, and version-track a package.
5//! It is the single source of truth for "what is this package?" and is
6//! flattened into higher-level records (`IndexEntry`, `SearchResult`,
7//! `hub_info` responses) so the JSON wire shape stays consistent across the
8//! Hub, the manifest, and project lockfiles.
9//!
10//! ## Parsing contract
11//!
12//! [`PkgEntity::parse_from_init_lua`] is a non-Lua-VM best-effort parser over
13//! the `M.meta = { ... }` block of an `init.lua`. It deliberately only
14//! supports flat key–value pairs with (possibly concatenated) string
15//! literals; nested tables (e.g. `tags = { ... }`) are skipped via
16//! brace-depth tracking. When `M.meta.name` is absent or empty the parser
17//! returns `None` — this is the **inclusion gate** for hub indexing. The
18//! caller (`build_index` in `algocline-app::service::hub`) is expected to
19//! drop `None` directories silently so "draft" directories like
20//! `alc_shapes/` (a type DSL library, not an algocline package) do not
21//! pollute the hub index.
22//!
23//! ## Wire format
24//!
25//! `Option` fields use `#[serde(default)]` but deliberately do **not** use
26//! `skip_serializing_if`. A missing field deserializes as `None` and
27//! serializes back as `null`. This preserves the key-presence guarantee of
28//! the current `hub_index.json` consumers (Bundled-side doc generation,
29//! `README.md` package-count scripts) so they do not break on field
30//! absence.
31
32use std::path::Path;
33
34use serde::{Deserialize, Serialize};
35
36/// Canonical projection of a Lua package's `M.meta` block.
37///
38/// `name` is required (= hub-index inclusion gate). Other fields are
39/// optional and degrade UI / discoverability when absent, following the
40/// BP convention of Cargo / JSR / npm.
41#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
42pub struct PkgEntity {
43    pub name: String,
44    #[serde(default)]
45    pub version: Option<String>,
46    #[serde(default)]
47    pub description: Option<String>,
48    #[serde(default)]
49    pub category: Option<String>,
50    #[serde(default)]
51    pub docstring: Option<String>,
52    #[serde(default)]
53    pub tags: Option<Vec<String>>,
54}
55
56impl PkgEntity {
57    /// Parse `M.meta` + leading `---` docstring from an `init.lua`.
58    ///
59    /// Returns `None` when the file cannot be read, `M.meta` is absent, or
60    /// `M.meta.name` is empty. Callers treat `None` as "not a package" and
61    /// drop the directory silently from the hub index.
62    ///
63    /// The parser is **not** a full Lua evaluator:
64    ///
65    /// - Only flat key–value pairs inside `M.meta` are extracted.
66    /// - Nested tables (e.g. `tags = { ... }`) are skipped via brace-depth
67    ///   tracking; their keys are not reachable from here.
68    /// - Values must be string literals (`"..."`), optionally joined by `..`
69    ///   concatenation with whitespace between operators.
70    /// - Occurrences of `M.meta` inside single-line comments (`-- ...`)
71    ///   are ignored, so docstrings mentioning the key do not hijack the
72    ///   search.
73    pub fn parse_from_init_lua(path: &Path) -> Option<Self> {
74        let content = std::fs::read_to_string(path).ok()?;
75        let (name, version, description, category, tags) = parse_meta(&content)?;
76        let docstring = extract_docstring_from(&content);
77        Some(PkgEntity {
78            name,
79            version: option_from_str(version),
80            description: option_from_str(description),
81            category: option_from_str(category),
82            docstring: option_from_str(docstring),
83            tags: if tags.is_empty() { None } else { Some(tags) },
84        })
85    }
86}
87
88/// Return `None` for empty strings, `Some(s)` otherwise. Kept inline with
89/// `parse_from_init_lua` so the "empty field = absent" projection rule is
90/// applied uniformly to every optional column.
91fn option_from_str(s: String) -> Option<String> {
92    if s.is_empty() {
93        None
94    } else {
95        Some(s)
96    }
97}
98
99/// Extract leading `---` doc-comment lines from an init.lua source. Blank
100/// lines within the block are tolerated; the first non-doc content line
101/// terminates the block.
102fn extract_docstring_from(content: &str) -> String {
103    let mut lines = Vec::new();
104    for line in content.lines() {
105        let trimmed = line.trim_start();
106        if let Some(rest) = trimmed.strip_prefix("---") {
107            lines.push(rest.trim().to_string());
108        } else if trimmed.is_empty() {
109            continue;
110        } else {
111            break;
112        }
113    }
114    lines.join("\n")
115}
116
117/// Parse `M.meta = { ... }` out of `content`. Returns
118/// `(name, version, description, category, tags)`. `None` if the block is
119/// missing, unparseable, or `name` is empty.
120fn parse_meta(content: &str) -> Option<(String, String, String, String, Vec<String>)> {
121    let head = content;
122
123    // Find M.meta = { ... } block (with brace-depth tracking).
124    // Skip occurrences inside Lua line comments (`-- ...`) so that
125    // docstrings mentioning "M.meta" do not hijack the search.
126    let mut search_from = 0;
127    let meta_start = loop {
128        let rel = head[search_from..].find("M.meta")?;
129        let pos = search_from + rel;
130        let line_start = head[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
131        if !head[line_start..pos].contains("--") {
132            break pos;
133        }
134        search_from = pos + "M.meta".len();
135    };
136    let brace_start = head[meta_start..].find('{')? + meta_start;
137
138    // Track brace depth so nested tables do not terminate the block.
139    let mut depth = 0;
140    let mut brace_end = None;
141    for (i, ch) in head[brace_start..].char_indices() {
142        match ch {
143            '{' => depth += 1,
144            '}' => {
145                depth -= 1;
146                if depth == 0 {
147                    brace_end = Some(brace_start + i);
148                    break;
149                }
150            }
151            _ => {}
152        }
153    }
154    let brace_end = brace_end?;
155    let block = &head[brace_start + 1..brace_end];
156
157    let extract = |field: &str| -> String {
158        // Match: field = "value" [.. "value" ...] with word-boundary check.
159        // Walk through all occurrences of `field`, skipping matches inside
160        // longer identifiers (e.g. "short_description"). On the first valid
161        // occurrence, collect one or more `"..."` string literals joined by
162        // `..` concatenation operators.
163        let mut search_from = 0;
164        while let Some(rel) = block[search_from..].find(field) {
165            let pos = search_from + rel;
166            let word_boundary = pos == 0 || {
167                let prev = block.as_bytes()[pos - 1];
168                !(prev.is_ascii_alphanumeric() || prev == b'_')
169            };
170            if word_boundary {
171                let after = &block[pos + field.len()..];
172                let mut collected = String::new();
173                let mut cursor = 0usize;
174                let mut found_any = false;
175                loop {
176                    let rest = &after[cursor..];
177                    let Some(q_start_rel) = rest.find('"') else {
178                        break;
179                    };
180                    if found_any {
181                        // Between the prior closing quote and this opening
182                        // quote, only whitespace and a single `..` operator
183                        // are allowed. Anything else (comma, another field,
184                        // etc.) ends the value.
185                        let between = &rest[..q_start_rel];
186                        if between.trim() != ".." {
187                            break;
188                        }
189                    }
190                    let lit_start = cursor + q_start_rel + 1;
191                    let Some(q_end_rel) = after[lit_start..].find('"') else {
192                        break;
193                    };
194                    collected.push_str(&after[lit_start..lit_start + q_end_rel]);
195                    cursor = lit_start + q_end_rel + 1;
196                    found_any = true;
197                }
198                if found_any {
199                    return collected;
200                }
201            }
202            search_from = pos + field.len();
203        }
204        String::new()
205    };
206
207    let name = extract("name");
208    if name.is_empty() {
209        return None;
210    }
211    let tags = extract_string_array(block, "tags");
212    Some((
213        name,
214        extract("version"),
215        extract("description"),
216        extract("category"),
217        tags,
218    ))
219}
220
221/// Extract a string array from a nested table like `tags = { "a", "b" }`.
222/// Returns an empty Vec if the field is absent or has no string elements.
223fn extract_string_array(block: &str, field: &str) -> Vec<String> {
224    let mut result = Vec::new();
225    let mut search_from = 0;
226    while let Some(rel) = block[search_from..].find(field) {
227        let pos = search_from + rel;
228        let word_boundary = pos == 0 || {
229            let prev = block.as_bytes()[pos - 1];
230            !(prev.is_ascii_alphanumeric() || prev == b'_')
231        };
232        if word_boundary {
233            let after = &block[pos + field.len()..];
234            if let Some(brace_start) = after.find('{') {
235                let inner_start = brace_start + 1;
236                let mut depth = 1;
237                let mut brace_end = None;
238                for (i, ch) in after[inner_start..].char_indices() {
239                    match ch {
240                        '{' => depth += 1,
241                        '}' => {
242                            depth -= 1;
243                            if depth == 0 {
244                                brace_end = Some(inner_start + i);
245                                break;
246                            }
247                        }
248                        _ => {}
249                    }
250                }
251                if let Some(end) = brace_end {
252                    let inner = &after[inner_start..end];
253                    let mut cursor = 0;
254                    while let Some(q_start) = inner[cursor..].find('"') {
255                        let lit_start = cursor + q_start + 1;
256                        if let Some(q_end) = inner[lit_start..].find('"') {
257                            let s = &inner[lit_start..lit_start + q_end];
258                            if !s.is_empty() {
259                                result.push(s.to_string());
260                            }
261                            cursor = lit_start + q_end + 1;
262                        } else {
263                            break;
264                        }
265                    }
266                }
267            }
268            break;
269        }
270        search_from = pos + field.len();
271    }
272    result
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use std::fs;
279
280    fn write_init_lua(dir: &Path, body: &str) -> std::path::PathBuf {
281        let path = dir.join("init.lua");
282        fs::write(&path, body).unwrap();
283        path
284    }
285
286    #[test]
287    fn parse_flat_meta() {
288        let tmp = tempfile::tempdir().unwrap();
289        let path = write_init_lua(
290            tmp.path(),
291            r#"
292local M = {}
293M.meta = {
294    name = "my_pkg",
295    version = "1.0.0",
296    description = "A test package",
297    category = "reasoning",
298}
299return M
300"#,
301        );
302
303        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
304        assert_eq!(pkg.name, "my_pkg");
305        assert_eq!(pkg.version.as_deref(), Some("1.0.0"));
306        assert_eq!(pkg.description.as_deref(), Some("A test package"));
307        assert_eq!(pkg.category.as_deref(), Some("reasoning"));
308    }
309
310    #[test]
311    fn parse_tags_from_nested_table() {
312        let tmp = tempfile::tempdir().unwrap();
313        let path = write_init_lua(
314            tmp.path(),
315            r#"
316local M = {}
317M.meta = {
318    name = "nested_pkg",
319    tags = { "a", "b" },
320    description = "After nested",
321}
322return M
323"#,
324        );
325
326        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
327        assert_eq!(pkg.name, "nested_pkg");
328        assert_eq!(pkg.description.as_deref(), Some("After nested"));
329        assert_eq!(
330            pkg.tags.as_deref(),
331            Some(vec!["a".to_string(), "b".to_string()].as_slice())
332        );
333    }
334
335    #[test]
336    fn parse_tags_absent() {
337        let tmp = tempfile::tempdir().unwrap();
338        let path = write_init_lua(
339            tmp.path(),
340            r#"
341local M = {}
342M.meta = {
343    name = "no_tags_pkg",
344    description = "No tags",
345}
346return M
347"#,
348        );
349
350        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
351        assert_eq!(pkg.name, "no_tags_pkg");
352        assert!(pkg.tags.is_none());
353    }
354
355    #[test]
356    fn parse_tags_empty_array() {
357        let tmp = tempfile::tempdir().unwrap();
358        let path = write_init_lua(
359            tmp.path(),
360            r#"
361local M = {}
362M.meta = {
363    name = "empty_tags_pkg",
364    tags = {},
365    description = "Empty tags",
366}
367return M
368"#,
369        );
370
371        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
372        assert_eq!(pkg.name, "empty_tags_pkg");
373        assert!(pkg.tags.is_none());
374    }
375
376    #[test]
377    fn parse_concat_string_literals() {
378        let tmp = tempfile::tempdir().unwrap();
379        let path = write_init_lua(
380            tmp.path(),
381            r#"
382local M = {}
383M.meta = {
384    name = "concat_pkg",
385    version = "0.1.0",
386    description = "foo "
387        .. "bar "
388        .. "baz",
389    category = "reasoning",
390}
391return M
392"#,
393        );
394
395        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
396        assert_eq!(pkg.description.as_deref(), Some("foo bar baz"));
397    }
398
399    #[test]
400    fn parse_word_boundary_for_description() {
401        let tmp = tempfile::tempdir().unwrap();
402        let path = write_init_lua(
403            tmp.path(),
404            r#"
405local M = {}
406M.meta = {
407    name = "wb_pkg",
408    short_description = "should not match",
409    description = "correct one",
410}
411return M
412"#,
413        );
414
415        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
416        assert_eq!(pkg.name, "wb_pkg");
417        assert_eq!(pkg.description.as_deref(), Some("correct one"));
418    }
419
420    #[test]
421    fn parse_meta_large_leading_docstring() {
422        let tmp = tempfile::tempdir().unwrap();
423        let mut content = String::new();
424        for i in 0..120 {
425            content.push_str(&format!("--- line {i}: long doc comment\n"));
426        }
427        content.push_str(
428            r#"
429local M = {}
430M.meta = {
431    name = "late_meta_pkg",
432    version = "0.2.0",
433    description = "Located past 2KB",
434    category = "test",
435}
436return M
437"#,
438        );
439        assert!(content.len() > 2048, "fixture should exceed 2KB");
440        let path = write_init_lua(tmp.path(), &content);
441
442        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
443        assert_eq!(pkg.name, "late_meta_pkg");
444        assert_eq!(pkg.version.as_deref(), Some("0.2.0"));
445        assert_eq!(pkg.description.as_deref(), Some("Located past 2KB"));
446        assert_eq!(pkg.category.as_deref(), Some("test"));
447    }
448
449    #[test]
450    fn parse_returns_none_without_meta_block() {
451        // Mirrors the alc_shapes case: an init.lua with no M.meta block at
452        // all. This is the **silent exclusion gate** — the caller drops
453        // these directories from the hub index without warning.
454        let tmp = tempfile::tempdir().unwrap();
455        let path = write_init_lua(
456            tmp.path(),
457            r#"
458--- alc_shapes — type DSL (not a package)
459local M = {}
460return M
461"#,
462        );
463
464        assert!(PkgEntity::parse_from_init_lua(&path).is_none());
465    }
466
467    #[test]
468    fn parse_returns_none_when_name_empty() {
469        let tmp = tempfile::tempdir().unwrap();
470        let path = write_init_lua(
471            tmp.path(),
472            r#"
473local M = {}
474M.meta = {
475    name = "",
476    version = "1.0.0",
477}
478return M
479"#,
480        );
481
482        assert!(PkgEntity::parse_from_init_lua(&path).is_none());
483    }
484
485    #[test]
486    fn parse_returns_none_when_file_missing() {
487        let tmp = tempfile::tempdir().unwrap();
488        let path = tmp.path().join("nonexistent.lua");
489        assert!(PkgEntity::parse_from_init_lua(&path).is_none());
490    }
491
492    #[test]
493    fn extracts_docstring_and_meta() {
494        let tmp = tempfile::tempdir().unwrap();
495        let path = write_init_lua(
496            tmp.path(),
497            r#"--- cascade — Multi-level routing with confidence gating
498--- Based on: "FrugalGPT" (Chen et al., 2023)
499
500local M = {}
501M.meta = {
502    name = "cascade",
503    version = "0.1.0",
504    description = "Multi-level routing",
505    category = "meta",
506}
507return M
508"#,
509        );
510
511        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
512        assert_eq!(pkg.name, "cascade");
513        let doc = pkg.docstring.expect("docstring should be present");
514        assert!(doc.contains("FrugalGPT"));
515        assert!(doc.contains("Multi-level"));
516        assert!(!doc.contains("local M"));
517    }
518
519    #[test]
520    fn docstring_absent_when_no_leading_comments() {
521        let tmp = tempfile::tempdir().unwrap();
522        let path = write_init_lua(
523            tmp.path(),
524            r#"local M = {}
525M.meta = { name = "nodoc" }
526return M
527"#,
528        );
529        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
530        assert!(pkg.docstring.is_none());
531    }
532
533    #[test]
534    fn m_dot_meta_inside_comment_is_ignored() {
535        // A `M.meta` reference inside a `--` comment must not hijack the
536        // parser. The real block below it should still be found.
537        let tmp = tempfile::tempdir().unwrap();
538        let path = write_init_lua(
539            tmp.path(),
540            r#"
541-- example: M.meta = { name = "decoy" }
542local M = {}
543M.meta = {
544    name = "real",
545}
546return M
547"#,
548        );
549        let pkg = PkgEntity::parse_from_init_lua(&path).expect("should parse");
550        assert_eq!(pkg.name, "real");
551    }
552
553    #[test]
554    fn serde_round_trip_preserves_none_vs_empty() {
555        // Wire format contract: None is serialized as null; empty string
556        // deserializes as Some("") (not None). Keep these separable so the
557        // consumer can distinguish "field absent" from "field present but
558        // empty".
559        let pkg = PkgEntity {
560            name: "p".into(),
561            version: None,
562            description: Some(String::new()),
563            category: Some("meta".into()),
564            docstring: None,
565            tags: None,
566        };
567        let json = serde_json::to_string(&pkg).unwrap();
568        assert!(json.contains("\"version\":null"), "version null: {json}");
569        assert!(
570            json.contains("\"description\":\"\""),
571            "description empty string: {json}"
572        );
573        assert!(
574            json.contains("\"docstring\":null"),
575            "docstring null: {json}"
576        );
577
578        let back: PkgEntity = serde_json::from_str(&json).unwrap();
579        assert_eq!(back, pkg);
580    }
581
582    #[test]
583    fn serde_deserialize_accepts_missing_optional_fields() {
584        // Legacy hub_index.json entries may omit every optional field;
585        // they must deserialize as None (not error).
586        let json = r#"{"name":"minimal"}"#;
587        let pkg: PkgEntity = serde_json::from_str(json).unwrap();
588        assert_eq!(pkg.name, "minimal");
589        assert!(pkg.version.is_none());
590        assert!(pkg.description.is_none());
591        assert!(pkg.category.is_none());
592        assert!(pkg.docstring.is_none());
593    }
594}