lean_ctx/core/
structured_read.rs1use std::collections::BTreeMap;
2
3pub fn extract_markdown_outline(content: &str) -> String {
5 let mut parts = Vec::new();
6 let mut in_code_block = false;
7
8 for line in content.lines() {
9 let trimmed = line.trim();
10
11 if trimmed.starts_with("```") {
12 in_code_block = !in_code_block;
13 continue;
14 }
15 if in_code_block {
16 continue;
17 }
18
19 if let Some(heading) = parse_heading(trimmed) {
20 parts.push(heading);
21 }
22 }
23
24 if parts.is_empty() {
25 return String::new();
26 }
27
28 parts.join("\n")
29}
30
31fn parse_heading(line: &str) -> Option<String> {
32 let level = line.bytes().take_while(|&b| b == b'#').count();
33 if !(1..=6).contains(&level) {
34 return None;
35 }
36 let rest = line[level..].trim();
37 if rest.is_empty() {
38 return None;
39 }
40 let indent = " ".repeat(level.saturating_sub(1));
41 Some(format!("{indent}{rest}"))
42}
43
44pub fn extract_json_structure(content: &str) -> String {
47 let trimmed = content.trim();
48 let val: serde_json::Value = match serde_json::from_str(trimmed) {
49 Ok(v) => v,
50 Err(_) => return String::new(),
51 };
52 format_json_value(&val, 0)
53}
54
55fn format_json_value(val: &serde_json::Value, depth: usize) -> String {
56 let indent = " ".repeat(depth);
57 match val {
58 serde_json::Value::Object(map) => {
59 if map.is_empty() {
60 return format!("{indent}{{}}");
61 }
62 if depth > 3 {
63 return format!("{indent}{{...{} keys}}", map.len());
64 }
65 let mut entries = Vec::new();
66 for (key, value) in map.iter().take(20) {
67 match value {
68 serde_json::Value::Object(inner) if !inner.is_empty() && depth < 3 => {
69 let nested = format_json_value(value, depth + 1);
70 entries.push(format!("{indent} {key}: {{\n{nested}\n{indent} }}"));
71 }
72 serde_json::Value::Array(arr) if !arr.is_empty() => {
73 let item_type = arr.first().map_or("any", json_type_name);
74 entries.push(format!("{indent} {key}: [{item_type}...{}]", arr.len()));
75 }
76 _ => {
77 entries.push(format!("{indent} {key}: {}", json_type_name(value)));
78 }
79 }
80 }
81 if map.len() > 20 {
82 entries.push(format!("{indent} ...+{} more keys", map.len() - 20));
83 }
84 entries.join("\n")
85 }
86 serde_json::Value::Array(arr) => {
87 if arr.is_empty() {
88 return format!("{indent}[]");
89 }
90 let first_schema = format_json_value(&arr[0], depth + 1);
91 format!(
92 "{indent}[{} items, each:\n{first_schema}\n{indent}]",
93 arr.len()
94 )
95 }
96 other => format!("{indent}{}", json_type_name(other)),
97 }
98}
99
100fn json_type_name(val: &serde_json::Value) -> &'static str {
101 match val {
102 serde_json::Value::Null => "null",
103 serde_json::Value::Bool(_) => "bool",
104 serde_json::Value::Number(_) => "num",
105 serde_json::Value::String(_) => "str",
106 serde_json::Value::Array(_) => "array",
107 serde_json::Value::Object(_) => "object",
108 }
109}
110
111pub fn extract_yaml_structure(content: &str) -> String {
113 let mut parts = Vec::new();
114 let mut prev_indent = 0usize;
115
116 for line in content.lines() {
117 let trimmed = line.trim();
118 if trimmed.is_empty() || trimmed.starts_with('#') {
119 continue;
120 }
121
122 let indent = line.len() - line.trim_start().len();
123 if let Some(key) = extract_yaml_key(trimmed) {
124 let level = indent / 2;
125 let prefix = " ".repeat(level);
126 parts.push(format!("{prefix}{key}"));
127 prev_indent = indent;
128 } else if trimmed.starts_with("- ") && indent <= prev_indent + 2 {
129 if let Some(key) = extract_yaml_key(trimmed.trim_start_matches("- ")) {
130 let level = indent / 2;
131 let prefix = " ".repeat(level);
132 parts.push(format!("{prefix}- {key}"));
133 }
134 }
135 }
136
137 deduplicate_consecutive(&parts)
138}
139
140fn extract_yaml_key(line: &str) -> Option<String> {
141 let colon_pos = line.find(':')?;
142 let key = line[..colon_pos].trim();
143 if key.is_empty() || key.contains(' ') && !key.starts_with('"') {
144 return None;
145 }
146 let value_part = line[colon_pos + 1..].trim();
147 if value_part.is_empty() || value_part == "|" || value_part == ">" {
148 Some(format!("{key}:"))
149 } else if value_part.len() > 40 {
150 Some(format!("{key}: ..."))
151 } else {
152 Some(format!("{key}: {value_part}"))
153 }
154}
155
156fn deduplicate_consecutive(lines: &[String]) -> String {
157 if lines.is_empty() {
158 return String::new();
159 }
160 let mut result = Vec::with_capacity(lines.len());
161 let mut prev = "";
162 for line in lines {
163 if line != prev {
164 result.push(line.as_str());
165 prev = line;
166 }
167 }
168 result.join("\n")
169}
170
171pub fn extract_toml_structure(content: &str) -> String {
173 let mut sections: BTreeMap<String, Vec<String>> = BTreeMap::new();
174 let mut current_section = String::new();
175
176 for line in content.lines() {
177 let trimmed = line.trim();
178 if trimmed.is_empty() || trimmed.starts_with('#') {
179 continue;
180 }
181
182 if trimmed.starts_with('[') {
183 if let Some(end) = trimmed.find(']') {
184 current_section = trimmed[1..end].to_string();
185 sections.entry(current_section.clone()).or_default();
186 }
187 continue;
188 }
189
190 if let Some(eq_pos) = trimmed.find('=') {
191 let key = trimmed[..eq_pos].trim();
192 let value = trimmed[eq_pos + 1..].trim();
193 let display_val = if value.len() > 40 { "..." } else { value };
194 sections
195 .entry(current_section.clone())
196 .or_default()
197 .push(format!("{key} = {display_val}"));
198 }
199 }
200
201 let mut parts = Vec::new();
202 for (section, keys) in §ions {
203 if section.is_empty() {
204 for k in keys {
205 parts.push(k.clone());
206 }
207 } else {
208 parts.push(format!("[{section}]"));
209 for k in keys.iter().take(10) {
210 parts.push(format!(" {k}"));
211 }
212 if keys.len() > 10 {
213 parts.push(format!(" ...+{} more", keys.len() - 10));
214 }
215 }
216 }
217
218 parts.join("\n")
219}
220
221pub fn extract_lock_summary(content: &str, path: &str) -> String {
223 let lower = path.to_lowercase();
224 if lower.ends_with("cargo.lock") {
225 extract_cargo_lock_summary(content)
226 } else if lower.ends_with("package-lock.json") {
227 extract_npm_lock_summary(content)
228 } else if lower.ends_with("yarn.lock") {
229 extract_yarn_lock_summary(content)
230 } else if lower.ends_with("poetry.lock") || lower.ends_with("pdm.lock") {
231 extract_poetry_lock_summary(content)
232 } else if lower.ends_with("go.sum") {
233 extract_go_sum_summary(content)
234 } else {
235 extract_generic_lock_summary(content)
236 }
237}
238
239fn extract_cargo_lock_summary(content: &str) -> String {
240 let pkg_count = content
241 .lines()
242 .filter(|l| l.trim() == "[[package]]")
243 .count();
244
245 let mut local_crates: Vec<&str> = Vec::new();
246 let mut local_deps: Vec<&str> = Vec::new();
247 let mut current_name: Option<&str> = None;
248 let mut has_source = false;
249 let mut in_deps = false;
250
251 for line in content.lines() {
252 let t = line.trim();
253 if t == "[[package]]" {
254 if let Some(name) = current_name {
255 if !has_source && !local_crates.contains(&name) {
256 local_crates.push(name);
257 }
258 }
259 current_name = None;
260 has_source = false;
261 in_deps = false;
262 continue;
263 }
264 if t.starts_with("name = ") {
265 current_name = Some(t.trim_start_matches("name = ").trim_matches('"'));
266 } else if t.starts_with("source = ") {
267 has_source = true;
268 } else if t.starts_with("dependencies = [") {
269 if !has_source {
270 in_deps = true;
271 }
272 } else if in_deps {
273 if t == "]" {
274 in_deps = false;
275 } else {
276 let dep = t.trim_matches(|c: char| c == '"' || c == ',');
277 let dep_name = dep.split_whitespace().next().unwrap_or(dep);
278 if !dep_name.is_empty() && !local_deps.contains(&dep_name) && local_deps.len() < 30
279 {
280 local_deps.push(dep_name);
281 }
282 }
283 }
284 }
285 if let Some(name) = current_name {
286 if !has_source && !local_crates.contains(&name) {
287 local_crates.push(name);
288 }
289 }
290
291 let mut out = format!("Cargo.lock: {pkg_count} packages");
292 if !local_crates.is_empty() {
293 out.push_str(&format!("\n workspace: {}", local_crates.join(", ")));
294 }
295 if !local_deps.is_empty() {
296 out.push_str(&format!("\n direct deps: {}", local_deps.join(", ")));
297 }
298 out
299}
300
301fn extract_npm_lock_summary(content: &str) -> String {
302 let val: serde_json::Value = match serde_json::from_str(content) {
303 Ok(v) => v,
304 Err(_) => return extract_generic_lock_summary(content),
305 };
306 let name = val.get("name").and_then(|v| v.as_str()).unwrap_or("?");
307 let pkg_count = val
308 .get("packages")
309 .and_then(|v| v.as_object())
310 .map(serde_json::Map::len)
311 .or_else(|| {
312 val.get("dependencies")
313 .and_then(|v| v.as_object())
314 .map(serde_json::Map::len)
315 })
316 .unwrap_or(0);
317 format!("package-lock.json ({name}): {pkg_count} packages")
318}
319
320fn extract_yarn_lock_summary(content: &str) -> String {
321 let pkg_count = content
322 .lines()
323 .filter(|l| !l.starts_with(' ') && !l.starts_with('#') && l.contains('@'))
324 .count();
325 format!("yarn.lock: ~{pkg_count} packages")
326}
327
328fn extract_poetry_lock_summary(content: &str) -> String {
329 let pkg_count = content
330 .lines()
331 .filter(|l| l.trim() == "[[package]]")
332 .count();
333 format!("poetry.lock: {pkg_count} packages")
334}
335
336fn extract_go_sum_summary(content: &str) -> String {
337 let mut modules = std::collections::HashSet::new();
338 for line in content.lines() {
339 if let Some(space) = line.find(' ') {
340 modules.insert(&line[..space]);
341 }
342 }
343 format!("go.sum: {} modules", modules.len())
344}
345
346fn extract_generic_lock_summary(content: &str) -> String {
347 let line_count = content.lines().count();
348 format!("lock file: {line_count} lines")
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354
355 #[test]
356 fn markdown_outline_extracts_headings() {
357 let md =
358 "# Title\n\nSome text.\n\n## Section A\n\n### Sub A1\n\n## Section B\n\nMore text.";
359 let outline = extract_markdown_outline(md);
360 assert!(outline.contains("Title"));
361 assert!(outline.contains(" Section A"));
362 assert!(outline.contains(" Sub A1"));
363 assert!(outline.contains(" Section B"));
364 }
365
366 #[test]
367 fn markdown_outline_skips_code_blocks() {
368 let md = "# Real\n\n```\n# Not a heading\n```\n\n## Also Real";
369 let outline = extract_markdown_outline(md);
370 assert!(outline.contains("Real"));
371 assert!(outline.contains("Also Real"));
372 assert!(!outline.contains("Not a heading"));
373 }
374
375 #[test]
376 fn markdown_outline_empty_for_no_headings() {
377 let md = "Just plain text\nwithout any headings.";
378 assert!(extract_markdown_outline(md).is_empty());
379 }
380
381 #[test]
382 fn json_structure_extracts_keys() {
383 let json = r#"{"name": "test", "version": "1.0", "deps": {"a": 1, "b": 2}}"#;
384 let structure = extract_json_structure(json);
385 assert!(structure.contains("name: str"));
386 assert!(structure.contains("version: str"));
387 assert!(structure.contains("deps: {"));
388 assert!(structure.contains("a: num"));
389 }
390
391 #[test]
392 fn json_structure_handles_arrays() {
393 let json = r#"[{"id": 1}, {"id": 2}]"#;
394 let structure = extract_json_structure(json);
395 assert!(structure.contains("2 items"));
396 assert!(structure.contains("id: num"));
397 }
398
399 #[test]
400 fn json_structure_empty_for_invalid() {
401 assert!(extract_json_structure("not json").is_empty());
402 }
403
404 #[test]
405 fn yaml_structure_extracts_keys() {
406 let yaml =
407 "name: my-app\nversion: 1.0\nservices:\n web:\n port: 8080\n db:\n port: 5432";
408 let structure = extract_yaml_structure(yaml);
409 assert!(structure.contains("name: my-app"));
410 assert!(structure.contains("version: 1.0"));
411 assert!(structure.contains("services:"));
412 assert!(structure.contains("web:"));
413 }
414
415 #[test]
416 fn yaml_structure_skips_comments() {
417 let yaml = "# Comment\nkey: value\n# Another comment\nkey2: value2";
418 let structure = extract_yaml_structure(yaml);
419 assert!(!structure.contains("Comment"));
420 assert!(structure.contains("key: value"));
421 assert!(structure.contains("key2: value2"));
422 }
423
424 #[test]
425 fn toml_structure_extracts_sections() {
426 let toml =
427 "[package]\nname = \"test\"\nversion = \"0.1.0\"\n\n[dependencies]\nserde = \"1.0\"";
428 let structure = extract_toml_structure(toml);
429 assert!(structure.contains("[package]"));
430 assert!(structure.contains("name = \"test\""));
431 assert!(structure.contains("[dependencies]"));
432 assert!(structure.contains("serde = \"1.0\""));
433 }
434
435 #[test]
436 fn toml_structure_handles_top_level_keys() {
437 let toml = "key = \"value\"\n\n[section]\na = 1";
438 let structure = extract_toml_structure(toml);
439 assert!(structure.contains("key = \"value\""));
440 assert!(structure.contains("[section]"));
441 }
442
443 #[test]
444 fn cargo_lock_summary() {
445 let lock = "[[package]]\nname = \"serde\"\nversion = \"1.0\"\n\n[[package]]\nname = \"tokio\"\nversion = \"1.0\"";
446 let summary = extract_lock_summary(lock, "Cargo.lock");
447 assert!(summary.contains("2 packages"));
448 }
449
450 #[test]
451 fn npm_lock_summary() {
452 let lock = r#"{"name":"app","lockfileVersion":3,"packages":{"":{},"node_modules/a":{},"node_modules/b":{}}}"#;
453 let summary = extract_lock_summary(lock, "package-lock.json");
454 assert!(summary.contains("app"));
455 assert!(summary.contains("3 packages"));
456 }
457
458 #[test]
459 fn yarn_lock_summary_counts() {
460 let lock = "# yarn lockfile v1\n\na@^1.0:\n version \"1.0\"\n\nb@^2.0:\n version \"2.0\"";
461 let summary = extract_lock_summary(lock, "yarn.lock");
462 assert!(summary.contains("2 packages"));
463 }
464
465 #[test]
466 fn go_sum_summary_counts_modules() {
467 let sum = "github.com/a/b v1.0.0 h1:abc=\ngithub.com/a/b v1.0.0/go.mod h1:def=\ngithub.com/c/d v2.0.0 h1:ghi=";
468 let summary = extract_lock_summary(sum, "go.sum");
469 assert!(summary.contains("2 modules"));
470 }
471}