1use crate::build::{catalog_index_from_value, CatalogIndex};
4use crate::paths::{self, decomposed_prefix, get_root_tool_key, json_ext, md_ext, tool_id_from_decomposed_rel};
5use crate::policies::{
6 append_description_reinstate_entries, effective_policy, mcp_required_enum_values,
7 needs_description_reinstate, required_enum_values_by_tool, system_required_enum_values,
8 PolicyContext, ToolPolicy,
9};
10use crate::runtime_config;
11use serde_json::{json, Map, Value};
12use std::collections::{HashMap, HashSet};
13use std::path::{Path, PathBuf};
14
15#[derive(Debug, Clone, Default)]
17pub struct DecomposedCatalog {
18 pub(crate) json_files: HashMap<String, Value>,
20}
21
22impl DecomposedCatalog {
23 #[must_use]
25 pub const fn from_json_files(json_files: HashMap<String, Value>) -> Self {
26 Self { json_files }
27 }
28
29 #[must_use]
31 pub const fn json_files(&self) -> &HashMap<String, Value> {
32 &self.json_files
33 }
34
35 #[must_use]
37 pub fn from_catalog_index(index: &CatalogIndex) -> Self {
38 let mut json_files = HashMap::new();
39 for (rel_path, content) in &index.files {
40 if rel_path.starts_with(&decomposed_prefix()) && rel_path.ends_with(&json_ext())
41 && let Ok(parsed) = serde_json::from_str::<Value>(content)
42 && parsed.is_object() {
43 json_files.insert(rel_path.clone(), parsed);
44 }
45 }
46 Self { json_files }
47 }
48
49 #[must_use]
51 pub fn from_catalog_dict(data: &Value) -> Self {
52 let mut json_files = HashMap::new();
53 if let Some(entries) = data.get("json").and_then(|v| v.as_array()) {
54 for entry in entries {
55 let Some(obj) = entry.as_object() else {
56 continue;
57 };
58 let Some(file_path) = obj.get("file_path").and_then(|v| v.as_str()) else {
59 continue;
60 };
61 let Some(content) = obj.get("content") else {
62 continue;
63 };
64 if !content.is_object() {
65 continue;
66 }
67 if let Some(key) = paths::to_decomposed_key(file_path) {
68 json_files.insert(key, content.clone());
69 }
70 }
71 }
72 Self { json_files }
73 }
74
75 pub fn merge_json_files(&mut self, other: &Self) {
77 self.json_files.extend(other.json_files.clone());
78 }
79
80 #[must_use]
82 pub fn resolve_key(&self, file_path: &str) -> Option<String> {
83 let mut candidates = Vec::new();
84 if let Some(normalized) = paths::to_decomposed_key(file_path) {
85 candidates.push(normalized);
86 }
87 candidates.push(file_path.to_string());
88 candidates
89 .into_iter()
90 .find(|candidate| self.has_json(candidate))
91 }
92
93 #[must_use]
95 pub fn has_json(&self, key: &str) -> bool {
96 self.json_files.contains_key(key)
97 }
98
99 #[must_use]
101 pub fn get_json(&self, key: &str) -> Option<&Value> {
102 self.json_files.get(key)
103 }
104}
105
106#[must_use]
108pub fn decomposed_catalog_from_value(val: &Value) -> DecomposedCatalog {
109 if val.get("tools").is_some() && val.get("files").is_some() {
110 let idx = catalog_index_from_value(val);
111 return DecomposedCatalog::from_catalog_index(&idx);
112 }
113 if let Some(map) = val.as_object() {
114 let mut json_files = HashMap::new();
115 for (k, v) in map {
116 if v.is_object() {
117 json_files.insert(k.clone(), v.clone());
118 }
119 }
120 if !json_files.is_empty() {
121 return DecomposedCatalog::from_json_files(json_files);
122 }
123 }
124 DecomposedCatalog::default()
125}
126
127#[must_use]
129pub fn deep_merge(base: &Value, override_val: &Value) -> Value {
130 match (base, override_val) {
131 (Value::Object(base_map), Value::Object(override_map)) => {
132 let mut result = base_map.clone();
133 for (key, val) in override_map {
134 if let Some(existing) = result.get(key)
135 && existing.is_object() && val.is_object() {
136 result.insert(key.clone(), deep_merge(existing, val));
137 continue;
138 }
139 result.insert(key.clone(), val.clone());
140 }
141 Value::Object(result)
142 }
143 _ => override_val.clone(),
144 }
145}
146
147#[must_use]
149pub fn climb_and_merge(leaf_path: &str, catalog: &DecomposedCatalog) -> Value {
150 let leaf_key = catalog.resolve_key(leaf_path).unwrap_or_else(|| {
151 paths::to_decomposed_key(leaf_path).unwrap_or_else(|| leaf_path.to_string())
152 });
153
154 let Some(mut current) = catalog.get_json(&leaf_key).cloned() else {
155 return json!({});
156 };
157
158 let mut current_path = PathBuf::from(&leaf_key);
159 current_path.pop();
160
161 let decomposed_root = paths::decomposed_root();
162
163 loop {
164 let parent_dir = current_path.parent().map(std::path::Path::to_path_buf);
165 let Some(parent_dir) = parent_dir else {
166 break;
167 };
168 if parent_dir == decomposed_root || !parent_dir.starts_with(&decomposed_root) {
169 break;
170 }
171
172 let parent_key = format!(
173 "{}/{}{}",
174 parent_dir.to_string_lossy(),
175 current_path
176 .file_name()
177 .unwrap_or_default()
178 .to_string_lossy(),
179 json_ext(),
180 );
181 if let Some(parent) = catalog.get_json(&parent_key) {
182 current = deep_merge(parent, ¤t);
183 }
184 current_path = parent_dir;
185 }
186 current
187}
188
189#[must_use]
191pub fn extract_scores(data: &Value) -> HashMap<String, f64> {
192 let mut scores = HashMap::new();
193 let Some(obj) = data.as_object() else {
194 return scores;
195 };
196 if let Some(md) = obj.get("md").and_then(|v| v.as_array()) {
197 for entry in md {
198 if let Some(e) = entry.as_object()
199 && let (Some(content), Some(score)) = (
200 e.get("content").and_then(|v| v.as_str()),
201 json_f64(e.get("score")),
202 ) {
203 scores.insert(content.to_string(), score);
204 }
205 }
206 }
207 if let Some(json_arr) = obj.get("json").and_then(|v| v.as_array()) {
208 for entry in json_arr {
209 if let Some(e) = entry.as_object()
210 && let (Some(fp), Some(score)) = (
211 e.get("file_path").and_then(|v| v.as_str()),
212 json_f64(e.get("score")),
213 ) {
214 scores.insert(fp.to_string(), score);
215 }
216 }
217 }
218 scores
219}
220
221fn json_f64(value: Option<&Value>) -> Option<f64> {
223 let v = value?;
224 if let Some(n) = v.as_f64() {
225 return Some(n);
226 }
227 v.as_str()
228 .and_then(|s| s.trim().parse::<f64>().ok())
229}
230
231fn extract_from_dict(
232 data: &Map<String, Value>,
233 apply_decomposed_score_filter: bool,
234) -> Vec<String> {
235 let mut input_files = Vec::new();
236 for (key, value) in data {
237 if key == "md" {
238 continue;
239 }
240 if let Some(arr) = value.as_array() {
241 for entry in arr {
242 if let Some(e) = entry.as_object()
243 && let Some(fp) = e.get("file_path").and_then(|v| v.as_str()) {
244 if key == "json" && apply_decomposed_score_filter {
245 let score = json_f64(e.get("score")).unwrap_or(0.0);
246 if score <= runtime_config::decomposed_score() {
247 continue;
248 }
249 }
250 input_files.push(fp.to_string());
251 }
252 }
253 } else if let Some(e) = value.as_object()
254 && let Some(fp) = e.get("file_path").and_then(|v| v.as_str()) {
255 input_files.push(fp.to_string());
256 }
257 }
258 input_files
259}
260
261#[must_use]
263pub fn extract_input_files(data: &Value, apply_decomposed_score_filter: bool) -> Vec<String> {
264 if let Some(obj) = data.as_object() {
265 return extract_from_dict(obj, apply_decomposed_score_filter);
266 }
267 if let Some(arr) = data.as_array() {
268 return arr
269 .iter()
270 .filter_map(|entry| {
271 entry
272 .as_object()
273 .and_then(|e| e.get("file_path"))
274 .and_then(|v| v.as_str())
275 .map(String::from)
276 })
277 .collect();
278 }
279 Vec::new()
280}
281
282#[must_use]
284pub fn parse_json_input(
285 data: &Value,
286 apply_decomposed_score_filter: bool,
287) -> (Vec<String>, HashMap<String, f64>) {
288 (
289 extract_input_files(data, apply_decomposed_score_filter),
290 extract_scores(data),
291 )
292}
293
294fn filter_items(items_with_scores: &[(Value, f64)]) -> Vec<Value> {
295 let first_3_above = items_with_scores
296 .iter()
297 .take(3)
298 .all(|(_, score)| *score >= runtime_config::enum_score());
299
300 if first_3_above {
301 items_with_scores
302 .iter()
303 .filter(|(_, score)| *score >= runtime_config::enum_score())
304 .map(|(item, _)| item.clone())
305 .collect()
306 } else {
307 items_with_scores
308 .iter()
309 .take(3)
310 .map(|(item, _)| item.clone())
311 .collect()
312 }
313}
314
315pub fn filter_and_sort_enums<S: std::hash::BuildHasher, P: std::hash::BuildHasher>(
317 schema: &mut Value,
318 scores: &HashMap<String, f64, S>,
319 preserve_values: Option<&HashSet<String, P>>,
320) {
321 match schema {
322 Value::Object(map) => {
323 let keys: Vec<String> = map.keys().cloned().collect();
324 for key in keys {
325 if key == "enum" {
326 if let Some(Value::Array(items)) = map.get("enum").cloned() {
327 let mut preserved = Vec::new();
328 let mut prunable = Vec::new();
329 for item in items {
330 if preserve_values
331 .is_some_and(|pv| pv.contains(&item.to_string()))
332 {
333 preserved.push(item);
334 } else {
335 prunable.push(item);
336 }
337 }
338 let mut items_with_scores: Vec<(Value, f64)> = prunable
339 .into_iter()
340 .map(|item| {
341 let score = scores.get(&item.to_string()).copied().unwrap_or(0.0);
342 (item, score)
343 })
344 .collect();
345 items_with_scores.sort_by(|a, b| {
346 b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
347 });
348 preserved.extend(filter_items(&items_with_scores));
349 map.insert("enum".into(), Value::Array(preserved));
350 }
351 } else if let Some(val) = map.get(&key).cloned() {
352 let mut inner = val;
353 filter_and_sort_enums(&mut inner, scores, preserve_values);
354 map.insert(key, inner);
355 }
356 }
357 }
358 Value::Array(items) => {
359 for item in items.iter_mut() {
360 filter_and_sort_enums(item, scores, preserve_values);
361 }
362 }
363 _ => {}
364 }
365}
366
367#[must_use]
369pub fn group_files(
370 input_files: &[String],
371 catalog: &DecomposedCatalog,
372) -> (HashMap<String, Vec<String>>, HashSet<String>) {
373 let mut groups: HashMap<String, Vec<String>> = HashMap::new();
374 let mut tool_files = HashSet::new();
375 let decomposed_root = paths::decomposed_root();
376
377 for file_path in input_files {
378 let Some(key) = catalog.resolve_key(file_path) else {
379 eprintln!("Warning: File not found: {file_path}");
380 continue;
381 };
382 let rel = Path::new(&key)
383 .strip_prefix(&decomposed_root)
384 .unwrap_or_else(|_| Path::new(&key));
385 let parts: Vec<_> = rel.components().collect();
386 let is_tool =
387 parts.len() == 1 && parts[0].as_os_str().to_string_lossy().ends_with(&json_ext());
388
389 let Some(root_tool) = paths::get_root_tool_key(&key) else {
390 continue;
391 };
392 if is_tool {
393 tool_files.insert(key.clone());
394 }
395 groups.entry(root_tool).or_default().push(key);
396 }
397 (groups, tool_files)
398}
399
400fn tool_shell_from_root_key(root_tool: &str) -> Value {
401 let name = Path::new(root_tool)
402 .file_stem()
403 .unwrap_or_default()
404 .to_string_lossy();
405 json!({
406 "name": name,
407 "inputSchema": {"type": "object", "properties": {}},
408 })
409}
410
411#[must_use]
413pub fn build_process_groups_options(
414 ctx: &PolicyContext,
415 catalog_dict: &Value,
416 store: &DecomposedCatalog,
417 preserve_values: Option<Vec<String>>,
418) -> ProcessGroupsOptions {
419 let mut system_preserve = system_required_enum_values(catalog_dict);
420 if let Some(pv) = preserve_values
421 && system_preserve.is_empty() {
422 system_preserve = pv.into_iter().collect();
423 }
424 let mcp_preserve = mcp_required_enum_values(catalog_dict);
425 let required_by_tool = required_enum_values_by_tool(catalog_dict);
426
427 let mut prune_optional_tools = HashSet::new();
428 for key in store.json_files().keys() {
429 if let Some(root_tool) = get_root_tool_key(key) {
430 let tool_name = tool_id_from_decomposed_rel(&root_tool);
431 let policy = effective_policy(ctx, &tool_name);
432 if matches!(
433 policy,
434 ToolPolicy::PruneOptional | ToolPolicy::PruneOptionalDescriptions
435 ) {
436 prune_optional_tools.insert(tool_name);
437 }
438 }
439 }
440
441 ProcessGroupsOptions {
442 system_preserve: (!system_preserve.is_empty()).then_some(system_preserve),
443 mcp_preserve: (!mcp_preserve.is_empty()).then_some(mcp_preserve),
444 required_by_tool,
445 prune_optional_tools,
446 }
447}
448
449#[derive(Debug, Clone, Default)]
451pub struct ProcessGroupsOptions {
452 pub system_preserve: Option<HashSet<String>>,
454 pub mcp_preserve: Option<HashSet<String>>,
456 pub required_by_tool: HashMap<String, HashSet<String>>,
458 pub prune_optional_tools: HashSet<String>,
460}
461
462#[must_use]
464pub fn process_groups_options_from_fields<S: std::hash::BuildHasher + Default>(
465 system_preserve: Option<Vec<String>>,
466 mcp_preserve: Option<Vec<String>>,
467 required_by_tool: Option<HashMap<String, Vec<String>, S>>,
468 required_enum_values_by_tool: Option<HashMap<String, Vec<String>, S>>,
469 prune_optional_tools: Option<Vec<String>>,
470) -> ProcessGroupsOptions {
471 let required_by_tool = required_by_tool
472 .or(required_enum_values_by_tool)
473 .unwrap_or_default()
474 .into_iter()
475 .map(|(k, v)| (k, v.into_iter().collect()))
476 .collect();
477 ProcessGroupsOptions {
478 system_preserve: system_preserve.map(|items| items.into_iter().collect()),
479 mcp_preserve: mcp_preserve.map(|items| items.into_iter().collect()),
480 required_by_tool,
481 prune_optional_tools: prune_optional_tools
482 .unwrap_or_default()
483 .into_iter()
484 .collect(),
485 }
486}
487
488#[must_use]
490pub fn process_groups<S: std::hash::BuildHasher>(
491 groups: &HashMap<String, Vec<String>, S>,
492 tool_files: &HashSet<String, S>,
493 scores: &HashMap<String, f64, S>,
494 catalog: &DecomposedCatalog,
495 opts: &ProcessGroupsOptions,
496) -> Vec<Value> {
497 let mut tools = Vec::new();
498
499 for (root_tool, files) in groups {
500 let mut base_tool = catalog
501 .get_json(root_tool)
502 .cloned()
503 .unwrap_or_else(|| tool_shell_from_root_key(root_tool));
504
505 let tool_name_in_schema = base_tool
506 .get("name")
507 .and_then(|v| v.as_str())
508 .unwrap_or("")
509 .to_string();
510
511 for file_key in files {
512 if tool_files.contains(file_key) {
513 continue;
514 }
515 base_tool = deep_merge(&base_tool, &climb_and_merge(file_key, catalog));
516 }
517
518 let stem_name = Path::new(root_tool)
519 .file_stem()
520 .unwrap_or_default()
521 .to_string_lossy()
522 .into_owned();
523 let tool_name = base_tool
524 .get("name")
525 .and_then(|v| v.as_str())
526 .filter(|s| !s.is_empty())
527 .unwrap_or(if tool_name_in_schema.is_empty() {
528 stem_name.as_str()
529 } else {
530 tool_name_in_schema.as_str()
531 })
532 .to_string();
533
534 if let Some(obj) = base_tool.as_object().cloned() {
535 let mut obj = obj;
536 obj.insert("name".into(), Value::String(tool_name.clone()));
537 obj.remove("id");
538 base_tool = Value::Object(obj);
539 }
540
541 if !scores.is_empty() {
542 let enum_preserve = if opts.prune_optional_tools.contains(&tool_name) {
543 opts.required_by_tool
544 .get(&tool_name)
545 .cloned()
546 .or_else(|| opts.system_preserve.clone())
547 .or_else(|| opts.mcp_preserve.clone())
548 } else {
549 None
550 };
551 filter_and_sort_enums(&mut base_tool, scores, enum_preserve.as_ref());
552 }
553 tools.push(base_tool);
554 }
555 tools
556}
557
558#[derive(Debug, Clone, Default)]
560pub struct RetrieveOptions {
561 pub apply_decomposed_score_filter: bool,
563 pub process_groups: ProcessGroupsOptions,
565}
566
567pub fn resolve_build_catalog(catalog: &Value, survivor_data: &Value) -> Value {
569 if catalog.get("tools").is_some() && catalog.get("files").is_some() {
570 return catalog_index_from_value(catalog).to_catalog_dict();
571 }
572 if catalog
573 .get("json")
574 .and_then(Value::as_array)
575 .is_some_and(|arr| !arr.is_empty())
576 {
577 return catalog.clone();
578 }
579 survivor_data.clone()
580}
581pub fn apply_description_reinstate_to_data(
584 ctx: &PolicyContext,
585 data: &Value,
586 build_catalog: &Value,
587) -> (Value, DecomposedCatalog) {
588 let mut retrieve_data = data.clone();
589 let mut survivor = DecomposedCatalog::from_catalog_dict(data);
590 if !needs_description_reinstate(ctx) {
591 return (retrieve_data, survivor);
592 }
593
594 let json_entries = data
595 .get("json")
596 .and_then(Value::as_array)
597 .map_or(&[] as &[Value], std::vec::Vec::as_slice);
598 let empty_index = CatalogIndex {
599 tools: Vec::new(),
600 files: HashMap::new(),
601 };
602 let mitigated = append_description_reinstate_entries(
603 ctx,
604 json_entries,
605 build_catalog,
606 &empty_index,
607 );
608 if let Some(obj) = retrieve_data.as_object_mut() {
609 obj.insert("json".into(), Value::Array(mitigated));
610 }
611 survivor = DecomposedCatalog::from_catalog_dict(&retrieve_data);
612 (retrieve_data, survivor)
613}
614
615pub fn retrieve_tools_from_catalog(
617 ctx: &PolicyContext,
618 data: &Value,
619 build_catalog: &Value,
620 store: &mut DecomposedCatalog,
621 opts: &RetrieveOptions,
622) -> Vec<Value> {
623 let (retrieve_data, survivor) = apply_description_reinstate_to_data(ctx, data, build_catalog);
624 retrieve_core(&retrieve_data, store, &survivor, opts)
625}
626
627pub fn retrieve_core(
629 data: &Value,
630 store: &mut DecomposedCatalog,
631 survivor_overlay: &DecomposedCatalog,
632 opts: &RetrieveOptions,
633) -> Vec<Value> {
634 if !survivor_overlay.json_files.is_empty() {
635 store.merge_json_files(survivor_overlay);
636 }
637
638 let (input_files, scores) = parse_json_input(data, opts.apply_decomposed_score_filter);
639 let (groups, tool_files) = group_files(&input_files, store);
640 process_groups(&groups, &tool_files, &scores, store, &opts.process_groups)
641}
642
643#[derive(Debug, Clone, Default)]
645pub struct RemovedChunksOptions {
646 pub apply_decomposed_score_filter: bool,
649}
650
651#[must_use]
653pub fn chunk_survivor_key(entry: &Value, section: &str) -> Option<String> {
654 let obj = entry.as_object()?;
655 if let Some(fp) = obj.get("file_path").and_then(|v| v.as_str()) {
656 return paths::to_decomposed_key(fp).or_else(|| Some(fp.to_string()));
657 }
658 if section == "md"
659 && let Some(content) = obj.get("content").and_then(|v| v.as_str()) {
660 return Some(format!("md:content:{content}"));
661 }
662 None
663}
664
665fn survivor_key_sets(
666 surviving: &Value,
667 apply_decomposed_score_filter: bool,
668) -> (HashSet<String>, HashSet<String>) {
669 let mut json_keys = HashSet::new();
670 let mut md_keys = HashSet::new();
671 let Some(obj) = surviving.as_object() else {
672 return (json_keys, md_keys);
673 };
674 if let Some(arr) = obj.get("json").and_then(|v| v.as_array()) {
675 for entry in arr {
676 let Some(e) = entry.as_object() else {
677 continue;
678 };
679 if apply_decomposed_score_filter {
680 let score = json_f64(e.get("score")).unwrap_or(0.0);
681 if score <= runtime_config::decomposed_score() {
682 continue;
683 }
684 }
685 if let Some(key) = chunk_survivor_key(entry, "json") {
686 json_keys.insert(key);
687 }
688 }
689 }
690 if let Some(arr) = obj.get("md").and_then(|v| v.as_array()) {
691 for entry in arr {
692 if let Some(key) = chunk_survivor_key(entry, "md") {
693 md_keys.insert(key);
694 }
695 }
696 }
697 (json_keys, md_keys)
698}
699
700fn removed_section(
701 full: &Value,
702 section: &str,
703 survivor_keys: &HashSet<String>,
704) -> Vec<Value> {
705 let Some(arr) = full.get(section).and_then(|v| v.as_array()) else {
706 return Vec::new();
707 };
708 let mut removed = Vec::new();
709 for entry in arr {
710 let key = chunk_survivor_key(entry, section);
711 if key.as_ref().is_some_and(|k| survivor_keys.contains(k)) {
712 continue;
713 }
714 removed.push(entry.clone());
715 }
716 removed
717}
718
719#[must_use]
721pub fn removed_chunks(
722 full_catalog: &Value,
723 surviving: &Value,
724 opts: &RemovedChunksOptions,
725) -> Value {
726 let (json_keys, md_keys) =
727 survivor_key_sets(surviving, opts.apply_decomposed_score_filter);
728 let json = removed_section(full_catalog, "json", &json_keys);
729 let md = removed_section(full_catalog, "md", &md_keys);
730 json!({
731 "json": json,
732 "md": md,
733 })
734}
735
736pub fn load_catalog_from_dir(dir_path: &str) -> Result<Value, String> {
742 let root = Path::new(dir_path);
743 if !root.is_dir() {
744 return Err(format!("Directory not found: {dir_path}"));
745 }
746
747 let mut md_entries = Vec::new();
748 let mut json_entries = Vec::new();
749
750 for entry in walkdir_light(root)? {
751 let path = entry;
752 if !path.is_file() {
753 continue;
754 }
755 let path_str = path.to_string_lossy();
756 if !paths::is_catalog_decomposed_path(&path_str) {
757 continue;
758 }
759 let suffix = path.extension().and_then(|s| s.to_str()).unwrap_or("");
760 let is_skills_md = paths::to_skills_decomposed_key(&path_str).is_some()
761 && suffix.eq_ignore_ascii_case(trim_dot(&md_ext()))
762 && path.file_name().and_then(|n| n.to_str()) != Some("document.json");
763 if is_skills_md || (paths::to_decomposed_key(&path_str).is_some()
764 && suffix.eq_ignore_ascii_case(trim_dot(&md_ext())))
765 {
766 if let Ok(content) = std::fs::read_to_string(&path) {
767 md_entries.push(json!({
768 "id": path.file_stem().unwrap_or_default().to_string_lossy(),
769 "file_path": path.to_string_lossy(),
770 "score": 0.0,
771 "start_line": 1,
772 "end_line": 1,
773 "language": "markdown",
774 "content": content,
775 }));
776 }
777 } else if suffix.eq_ignore_ascii_case(trim_dot(&json_ext()))
778 && paths::to_decomposed_key(&path_str).is_some()
779 {
780 let raw_text = std::fs::read_to_string(&path).map_err(|e| e.to_string())?;
781 let content: Value = serde_json::from_str(&raw_text).map_err(|e| e.to_string())?;
782 let line_count = raw_text.lines().count();
783 let rel_path = path.to_string_lossy();
784 let decomposed_key = paths::to_decomposed_key(&rel_path);
785 let entry_id = content
786 .get("id")
787 .cloned()
788 .or_else(|| {
789 decomposed_key
790 .as_ref()
791 .map(|k| Value::String(paths::tool_id_from_decomposed_rel(k)))
792 })
793 .unwrap_or_else(|| {
794 Value::String(
795 path.file_stem()
796 .unwrap_or_default()
797 .to_string_lossy()
798 .into_owned(),
799 )
800 });
801 json_entries.push(json!({
802 "id": entry_id,
803 "name": entry_id,
804 "file_path": rel_path,
805 "score": 0.0,
806 "start_line": 1,
807 "end_line": line_count,
808 "language": "json",
809 "content": content,
810 }));
811 }
812 }
813
814 if md_entries.is_empty() && json_entries.is_empty() {
815 eprintln!("Warning: No .json or .md files found in {dir_path}");
816 }
817
818 Ok(json!({
819 "md": md_entries,
820 "json": json_entries,
821 }))
822}
823
824fn trim_dot(ext: &str) -> &str {
825 ext.strip_prefix('.').unwrap_or(ext)
826}
827
828fn walkdir_light(root: &Path) -> Result<Vec<PathBuf>, String> {
829 let mut stack = vec![root.to_path_buf()];
830 let mut files = Vec::new();
831 while let Some(dir) = stack.pop() {
832 let entries = std::fs::read_dir(&dir).map_err(|e| e.to_string())?;
833 for entry in entries {
834 let entry = entry.map_err(|e| e.to_string())?;
835 let path = entry.path();
836 if path.is_dir() {
837 stack.push(path);
838 } else {
839 files.push(path);
840 }
841 }
842 }
843 Ok(files)
844}
845
846#[cfg(test)]
847mod tests {
848 use super::*;
849 use serde_json::json;
850
851 #[test]
852 fn low_rerank_scores_kept_without_score_filter() {
853 let data = json!({
854 "json": [{
855 "file_path": "schemas/decomposed/Agent.json",
856 "score": "0.003",
857 }]
858 });
859 let files = extract_input_files(&data, false);
860 assert_eq!(files.len(), 1);
861 }
862
863 #[test]
864 fn low_rerank_scores_dropped_with_score_filter() {
865 let data = json!({
866 "json": [{
867 "file_path": "schemas/decomposed/Agent.json",
868 "score": "0.003",
869 }]
870 });
871 let files = extract_input_files(&data, true);
872 assert!(files.is_empty());
873 }
874
875 #[test]
876 fn removed_chunks_excludes_survivors_by_decomposed_key() {
877 let full = json!({
878 "json": [
879 {"file_path": "schemas/decomposed/Agent.json", "content": {"name": "Agent"}},
880 {"file_path": "schemas/decomposed/Agent/extra.json", "content": {}},
881 ],
882 "md": [
883 {"file_path": "schemas/decomposed/haiku.md", "content": "haiku"},
884 {"file_path": "schemas/decomposed/sonnet.md", "content": "sonnet"},
885 ],
886 });
887 let surviving = json!({
888 "json": [{"file_path": "src/catalog/schemas/decomposed/Agent.json"}],
889 "md": [{"file_path": "src/catalog/schemas/decomposed/haiku.md"}],
890 });
891 let removed = removed_chunks(&full, &surviving, &RemovedChunksOptions::default());
892 let json_removed = removed.get("json").and_then(Value::as_array);
893 assert_eq!(json_removed.map(std::vec::Vec::len), Some(1));
894 assert_eq!(
895 json_removed
896 .and_then(|entries| entries.first())
897 .and_then(|entry| entry.get("file_path"))
898 .and_then(Value::as_str),
899 Some("schemas/decomposed/Agent/extra.json")
900 );
901 let md_removed = removed.get("md").and_then(Value::as_array);
902 assert_eq!(md_removed.map(std::vec::Vec::len), Some(1));
903 assert_eq!(
904 md_removed
905 .and_then(|entries| entries.first())
906 .and_then(|entry| entry.get("file_path"))
907 .and_then(Value::as_str),
908 Some("schemas/decomposed/sonnet.md")
909 );
910 }
911
912 #[test]
913 fn removed_chunks_respects_score_filter_on_survivors() {
914 let full = json!({
915 "json": [
916 {"file_path": "schemas/decomposed/Keep.json", "score": 0.9},
917 {"file_path": "schemas/decomposed/Drop.json", "score": 0.9},
918 ],
919 });
920 let surviving = json!({
921 "json": [
922 {"file_path": "schemas/decomposed/Keep.json", "score": 0.9},
923 {"file_path": "schemas/decomposed/Drop.json", "score": 0.1},
924 ],
925 });
926 let removed = removed_chunks(
927 &full,
928 &surviving,
929 &RemovedChunksOptions {
930 apply_decomposed_score_filter: true,
931 },
932 );
933 let json_removed = removed.get("json").and_then(Value::as_array);
934 assert_eq!(json_removed.map(std::vec::Vec::len), Some(1));
935 assert_eq!(
936 json_removed
937 .and_then(|entries| entries.first())
938 .and_then(|entry| entry.get("file_path"))
939 .and_then(Value::as_str),
940 Some("schemas/decomposed/Drop.json")
941 );
942 }
943}