Skip to main content

lean_ctx/core/
structured_read.rs

1use std::collections::BTreeMap;
2
3/// Markdown outline: heading tree + fenced code block boundaries.
4pub fn extract_markdown_outline(content: &str) -> String {
5    let mut parts = Vec::new();
6    let mut in_code_block = false;
7
8    for line in content.lines() {
9        let trimmed = line.trim();
10
11        if trimmed.starts_with("```") {
12            in_code_block = !in_code_block;
13            continue;
14        }
15        if in_code_block {
16            continue;
17        }
18
19        if let Some(heading) = parse_heading(trimmed) {
20            parts.push(heading);
21        }
22    }
23
24    if parts.is_empty() {
25        return String::new();
26    }
27
28    parts.join("\n")
29}
30
31fn parse_heading(line: &str) -> Option<String> {
32    let level = line.bytes().take_while(|&b| b == b'#').count();
33    if !(1..=6).contains(&level) {
34        return None;
35    }
36    let rest = line[level..].trim();
37    if rest.is_empty() {
38        return None;
39    }
40    let indent = "  ".repeat(level.saturating_sub(1));
41    Some(format!("{indent}{rest}"))
42}
43
44/// JSON structure: key-tree with types, depth 3, max 20 keys per level.
45/// Reuses logic from `patterns::json_schema` but produces a read-mode output.
46pub fn extract_json_structure(content: &str) -> String {
47    let trimmed = content.trim();
48    let val: serde_json::Value = match serde_json::from_str(trimmed) {
49        Ok(v) => v,
50        Err(_) => return String::new(),
51    };
52    format_json_value(&val, 0)
53}
54
55fn format_json_value(val: &serde_json::Value, depth: usize) -> String {
56    let indent = "  ".repeat(depth);
57    match val {
58        serde_json::Value::Object(map) => {
59            if map.is_empty() {
60                return format!("{indent}{{}}");
61            }
62            if depth > 3 {
63                return format!("{indent}{{...{} keys}}", map.len());
64            }
65            let mut entries = Vec::new();
66            for (key, value) in map.iter().take(20) {
67                match value {
68                    serde_json::Value::Object(inner) if !inner.is_empty() && depth < 3 => {
69                        let nested = format_json_value(value, depth + 1);
70                        entries.push(format!("{indent}  {key}: {{\n{nested}\n{indent}  }}"));
71                    }
72                    serde_json::Value::Array(arr) if !arr.is_empty() => {
73                        let item_type = arr.first().map_or("any", json_type_name);
74                        entries.push(format!("{indent}  {key}: [{item_type}...{}]", arr.len()));
75                    }
76                    _ => {
77                        entries.push(format!("{indent}  {key}: {}", json_type_name(value)));
78                    }
79                }
80            }
81            if map.len() > 20 {
82                entries.push(format!("{indent}  ...+{} more keys", map.len() - 20));
83            }
84            entries.join("\n")
85        }
86        serde_json::Value::Array(arr) => {
87            if arr.is_empty() {
88                return format!("{indent}[]");
89            }
90            let first_schema = format_json_value(&arr[0], depth + 1);
91            format!(
92                "{indent}[{} items, each:\n{first_schema}\n{indent}]",
93                arr.len()
94            )
95        }
96        other => format!("{indent}{}", json_type_name(other)),
97    }
98}
99
100fn json_type_name(val: &serde_json::Value) -> &'static str {
101    match val {
102        serde_json::Value::Null => "null",
103        serde_json::Value::Bool(_) => "bool",
104        serde_json::Value::Number(_) => "num",
105        serde_json::Value::String(_) => "str",
106        serde_json::Value::Array(_) => "array",
107        serde_json::Value::Object(_) => "object",
108    }
109}
110
111/// YAML structure: indent-based key extraction with nested structure.
112pub fn extract_yaml_structure(content: &str) -> String {
113    let mut parts = Vec::new();
114    let mut prev_indent = 0usize;
115
116    for line in content.lines() {
117        let trimmed = line.trim();
118        if trimmed.is_empty() || trimmed.starts_with('#') {
119            continue;
120        }
121
122        let indent = line.len() - line.trim_start().len();
123        if let Some(key) = extract_yaml_key(trimmed) {
124            let level = indent / 2;
125            let prefix = "  ".repeat(level);
126            parts.push(format!("{prefix}{key}"));
127            prev_indent = indent;
128        } else if trimmed.starts_with("- ") && indent <= prev_indent + 2 {
129            if let Some(key) = extract_yaml_key(trimmed.trim_start_matches("- ")) {
130                let level = indent / 2;
131                let prefix = "  ".repeat(level);
132                parts.push(format!("{prefix}- {key}"));
133            }
134        }
135    }
136
137    deduplicate_consecutive(&parts)
138}
139
140fn extract_yaml_key(line: &str) -> Option<String> {
141    let colon_pos = line.find(':')?;
142    let key = line[..colon_pos].trim();
143    if key.is_empty() || key.contains(' ') && !key.starts_with('"') {
144        return None;
145    }
146    let value_part = line[colon_pos + 1..].trim();
147    if value_part.is_empty() || value_part == "|" || value_part == ">" {
148        Some(format!("{key}:"))
149    } else if value_part.len() > 40 {
150        Some(format!("{key}: ..."))
151    } else {
152        Some(format!("{key}: {value_part}"))
153    }
154}
155
156fn deduplicate_consecutive(lines: &[String]) -> String {
157    if lines.is_empty() {
158        return String::new();
159    }
160    let mut result = Vec::with_capacity(lines.len());
161    let mut prev = "";
162    for line in lines {
163        if line != prev {
164            result.push(line.as_str());
165            prev = line;
166        }
167    }
168    result.join("\n")
169}
170
171/// TOML structure: `[section]` headers + top-level key=value pairs.
172pub fn extract_toml_structure(content: &str) -> String {
173    let mut sections: BTreeMap<String, Vec<String>> = BTreeMap::new();
174    let mut current_section = String::new();
175
176    for line in content.lines() {
177        let trimmed = line.trim();
178        if trimmed.is_empty() || trimmed.starts_with('#') {
179            continue;
180        }
181
182        if trimmed.starts_with('[') {
183            if let Some(end) = trimmed.find(']') {
184                current_section = trimmed[1..end].to_string();
185                sections.entry(current_section.clone()).or_default();
186            }
187            continue;
188        }
189
190        if let Some(eq_pos) = trimmed.find('=') {
191            let key = trimmed[..eq_pos].trim();
192            let value = trimmed[eq_pos + 1..].trim();
193            let display_val = if value.len() > 40 { "..." } else { value };
194            sections
195                .entry(current_section.clone())
196                .or_default()
197                .push(format!("{key} = {display_val}"));
198        }
199    }
200
201    let mut parts = Vec::new();
202    for (section, keys) in &sections {
203        if section.is_empty() {
204            for k in keys {
205                parts.push(k.clone());
206            }
207        } else {
208            parts.push(format!("[{section}]"));
209            for k in keys.iter().take(10) {
210                parts.push(format!("  {k}"));
211            }
212            if keys.len() > 10 {
213                parts.push(format!("  ...+{} more", keys.len() - 10));
214            }
215        }
216    }
217
218    parts.join("\n")
219}
220
221/// Lock file summary: package count + direct dependency names.
222pub fn extract_lock_summary(content: &str, path: &str) -> String {
223    let lower = path.to_lowercase();
224    if lower.ends_with("cargo.lock") {
225        extract_cargo_lock_summary(content)
226    } else if lower.ends_with("package-lock.json") {
227        extract_npm_lock_summary(content)
228    } else if lower.ends_with("yarn.lock") {
229        extract_yarn_lock_summary(content)
230    } else if lower.ends_with("poetry.lock") || lower.ends_with("pdm.lock") {
231        extract_poetry_lock_summary(content)
232    } else if lower.ends_with("go.sum") {
233        extract_go_sum_summary(content)
234    } else {
235        extract_generic_lock_summary(content)
236    }
237}
238
239fn extract_cargo_lock_summary(content: &str) -> String {
240    let pkg_count = content
241        .lines()
242        .filter(|l| l.trim() == "[[package]]")
243        .count();
244
245    let mut local_crates: Vec<&str> = Vec::new();
246    let mut local_deps: Vec<&str> = Vec::new();
247    let mut current_name: Option<&str> = None;
248    let mut has_source = false;
249    let mut in_deps = false;
250
251    for line in content.lines() {
252        let t = line.trim();
253        if t == "[[package]]" {
254            if let Some(name) = current_name {
255                if !has_source && !local_crates.contains(&name) {
256                    local_crates.push(name);
257                }
258            }
259            current_name = None;
260            has_source = false;
261            in_deps = false;
262            continue;
263        }
264        if t.starts_with("name = ") {
265            current_name = Some(t.trim_start_matches("name = ").trim_matches('"'));
266        } else if t.starts_with("source = ") {
267            has_source = true;
268        } else if t.starts_with("dependencies = [") {
269            if !has_source {
270                in_deps = true;
271            }
272        } else if in_deps {
273            if t == "]" {
274                in_deps = false;
275            } else {
276                let dep = t.trim_matches(|c: char| c == '"' || c == ',');
277                let dep_name = dep.split_whitespace().next().unwrap_or(dep);
278                if !dep_name.is_empty() && !local_deps.contains(&dep_name) && local_deps.len() < 30
279                {
280                    local_deps.push(dep_name);
281                }
282            }
283        }
284    }
285    if let Some(name) = current_name {
286        if !has_source && !local_crates.contains(&name) {
287            local_crates.push(name);
288        }
289    }
290
291    let mut out = format!("Cargo.lock: {pkg_count} packages");
292    if !local_crates.is_empty() {
293        out.push_str(&format!("\n  workspace: {}", local_crates.join(", ")));
294    }
295    if !local_deps.is_empty() {
296        out.push_str(&format!("\n  direct deps: {}", local_deps.join(", ")));
297    }
298    out
299}
300
301fn extract_npm_lock_summary(content: &str) -> String {
302    let val: serde_json::Value = match serde_json::from_str(content) {
303        Ok(v) => v,
304        Err(_) => return extract_generic_lock_summary(content),
305    };
306    let name = val.get("name").and_then(|v| v.as_str()).unwrap_or("?");
307    let pkg_count = val
308        .get("packages")
309        .and_then(|v| v.as_object())
310        .map(serde_json::Map::len)
311        .or_else(|| {
312            val.get("dependencies")
313                .and_then(|v| v.as_object())
314                .map(serde_json::Map::len)
315        })
316        .unwrap_or(0);
317    format!("package-lock.json ({name}): {pkg_count} packages")
318}
319
320fn extract_yarn_lock_summary(content: &str) -> String {
321    let pkg_count = content
322        .lines()
323        .filter(|l| !l.starts_with(' ') && !l.starts_with('#') && l.contains('@'))
324        .count();
325    format!("yarn.lock: ~{pkg_count} packages")
326}
327
328fn extract_poetry_lock_summary(content: &str) -> String {
329    let pkg_count = content
330        .lines()
331        .filter(|l| l.trim() == "[[package]]")
332        .count();
333    format!("poetry.lock: {pkg_count} packages")
334}
335
336fn extract_go_sum_summary(content: &str) -> String {
337    let mut modules = std::collections::HashSet::new();
338    for line in content.lines() {
339        if let Some(space) = line.find(' ') {
340            modules.insert(&line[..space]);
341        }
342    }
343    format!("go.sum: {} modules", modules.len())
344}
345
346fn extract_generic_lock_summary(content: &str) -> String {
347    let line_count = content.lines().count();
348    format!("lock file: {line_count} lines")
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    #[test]
356    fn markdown_outline_extracts_headings() {
357        let md =
358            "# Title\n\nSome text.\n\n## Section A\n\n### Sub A1\n\n## Section B\n\nMore text.";
359        let outline = extract_markdown_outline(md);
360        assert!(outline.contains("Title"));
361        assert!(outline.contains("  Section A"));
362        assert!(outline.contains("    Sub A1"));
363        assert!(outline.contains("  Section B"));
364    }
365
366    #[test]
367    fn markdown_outline_skips_code_blocks() {
368        let md = "# Real\n\n```\n# Not a heading\n```\n\n## Also Real";
369        let outline = extract_markdown_outline(md);
370        assert!(outline.contains("Real"));
371        assert!(outline.contains("Also Real"));
372        assert!(!outline.contains("Not a heading"));
373    }
374
375    #[test]
376    fn markdown_outline_empty_for_no_headings() {
377        let md = "Just plain text\nwithout any headings.";
378        assert!(extract_markdown_outline(md).is_empty());
379    }
380
381    #[test]
382    fn json_structure_extracts_keys() {
383        let json = r#"{"name": "test", "version": "1.0", "deps": {"a": 1, "b": 2}}"#;
384        let structure = extract_json_structure(json);
385        assert!(structure.contains("name: str"));
386        assert!(structure.contains("version: str"));
387        assert!(structure.contains("deps: {"));
388        assert!(structure.contains("a: num"));
389    }
390
391    #[test]
392    fn json_structure_handles_arrays() {
393        let json = r#"[{"id": 1}, {"id": 2}]"#;
394        let structure = extract_json_structure(json);
395        assert!(structure.contains("2 items"));
396        assert!(structure.contains("id: num"));
397    }
398
399    #[test]
400    fn json_structure_empty_for_invalid() {
401        assert!(extract_json_structure("not json").is_empty());
402    }
403
404    #[test]
405    fn yaml_structure_extracts_keys() {
406        let yaml =
407            "name: my-app\nversion: 1.0\nservices:\n  web:\n    port: 8080\n  db:\n    port: 5432";
408        let structure = extract_yaml_structure(yaml);
409        assert!(structure.contains("name: my-app"));
410        assert!(structure.contains("version: 1.0"));
411        assert!(structure.contains("services:"));
412        assert!(structure.contains("web:"));
413    }
414
415    #[test]
416    fn yaml_structure_skips_comments() {
417        let yaml = "# Comment\nkey: value\n# Another comment\nkey2: value2";
418        let structure = extract_yaml_structure(yaml);
419        assert!(!structure.contains("Comment"));
420        assert!(structure.contains("key: value"));
421        assert!(structure.contains("key2: value2"));
422    }
423
424    #[test]
425    fn toml_structure_extracts_sections() {
426        let toml =
427            "[package]\nname = \"test\"\nversion = \"0.1.0\"\n\n[dependencies]\nserde = \"1.0\"";
428        let structure = extract_toml_structure(toml);
429        assert!(structure.contains("[package]"));
430        assert!(structure.contains("name = \"test\""));
431        assert!(structure.contains("[dependencies]"));
432        assert!(structure.contains("serde = \"1.0\""));
433    }
434
435    #[test]
436    fn toml_structure_handles_top_level_keys() {
437        let toml = "key = \"value\"\n\n[section]\na = 1";
438        let structure = extract_toml_structure(toml);
439        assert!(structure.contains("key = \"value\""));
440        assert!(structure.contains("[section]"));
441    }
442
443    #[test]
444    fn cargo_lock_summary() {
445        let lock = "[[package]]\nname = \"serde\"\nversion = \"1.0\"\n\n[[package]]\nname = \"tokio\"\nversion = \"1.0\"";
446        let summary = extract_lock_summary(lock, "Cargo.lock");
447        assert!(summary.contains("2 packages"));
448    }
449
450    #[test]
451    fn npm_lock_summary() {
452        let lock = r#"{"name":"app","lockfileVersion":3,"packages":{"":{},"node_modules/a":{},"node_modules/b":{}}}"#;
453        let summary = extract_lock_summary(lock, "package-lock.json");
454        assert!(summary.contains("app"));
455        assert!(summary.contains("3 packages"));
456    }
457
458    #[test]
459    fn yarn_lock_summary_counts() {
460        let lock = "# yarn lockfile v1\n\na@^1.0:\n  version \"1.0\"\n\nb@^2.0:\n  version \"2.0\"";
461        let summary = extract_lock_summary(lock, "yarn.lock");
462        assert!(summary.contains("2 packages"));
463    }
464
465    #[test]
466    fn go_sum_summary_counts_modules() {
467        let sum = "github.com/a/b v1.0.0 h1:abc=\ngithub.com/a/b v1.0.0/go.mod h1:def=\ngithub.com/c/d v2.0.0 h1:ghi=";
468        let summary = extract_lock_summary(sum, "go.sum");
469        assert!(summary.contains("2 modules"));
470    }
471}