1use std::path::{Path, PathBuf};
15
16use seshat_core::{BranchId, KnowledgeNature, KnowledgeNode, KnowledgeWeight, NodeId};
17
18use crate::error::ScanError;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum DocType {
23 Markdown,
25 JsonSchema,
27 OpenApi,
29}
30
31impl DocType {
32 pub fn from_extension(ext: &str) -> Option<Self> {
36 match ext.to_lowercase().as_str() {
37 "md" => Some(Self::Markdown),
38 "json" => Some(Self::JsonSchema),
39 "yaml" | "yml" => Some(Self::OpenApi),
40 _ => None,
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct DocumentationResult {
48 pub path: PathBuf,
50 pub doc_type: DocType,
52 pub nodes: Vec<KnowledgeNode>,
54}
55
56pub fn parse_documentation(
69 path: &Path,
70 content: &str,
71 branch_id: &BranchId,
72) -> Result<DocumentationResult, ScanError> {
73 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
74
75 let doc_type = DocType::from_extension(ext).ok_or_else(|| ScanError::DocumentationError {
76 path: path.to_path_buf(),
77 reason: format!("Unsupported documentation extension: {ext}"),
78 })?;
79
80 let nodes = match doc_type {
81 DocType::Markdown => parse_markdown(path, content, branch_id),
82 DocType::JsonSchema => parse_json_schema(path, content, branch_id)?,
83 DocType::OpenApi => parse_openapi(path, content, branch_id)?,
84 };
85
86 Ok(DocumentationResult {
87 path: path.to_path_buf(),
88 doc_type,
89 nodes,
90 })
91}
92
93fn parse_markdown(path: &Path, content: &str, branch_id: &BranchId) -> Vec<KnowledgeNode> {
108 fn flush_section(
110 counter: &mut i64,
111 nodes: &mut Vec<KnowledgeNode>,
112 section: Option<(String, u32, Vec<String>)>,
113 path: &Path,
114 branch_id: &BranchId,
115 ) {
116 let Some((title, level, body_lines)) = section else {
117 return;
118 };
119 let body = body_lines
121 .iter()
122 .map(String::as_str)
123 .collect::<Vec<_>>()
124 .join("\n")
125 .trim_end()
126 .to_owned();
127 *counter += 1;
128 nodes.push(make_doc_node(
129 NodeId(*counter),
130 branch_id,
131 KnowledgeNature::Fact,
132 KnowledgeWeight::Info,
133 title,
134 serde_json::json!({
135 "source": "documentation",
136 "doc_type": "markdown",
137 "file": path.to_string_lossy(),
138 "element": "section",
139 "level": level,
140 "content": body,
141 }),
142 ));
143 }
144
145 let mut nodes = Vec::new();
146 let mut node_counter: i64 = 0;
147
148 let mut current: Option<(String, u32, Vec<String>)> = None;
150
151 for line in content.lines() {
152 let trimmed = line.trim();
153
154 if let Some(heading) = parse_heading(trimmed) {
155 if heading.level <= 2 {
157 flush_section(
159 &mut node_counter,
160 &mut nodes,
161 current.take(),
162 path,
163 branch_id,
164 );
165 current = Some((heading.text, heading.level, Vec::new()));
166 continue;
167 }
168 }
169
170 if let Some((_, _, ref mut body)) = current {
173 body.push(line.to_owned());
174 }
175 }
177
178 flush_section(&mut node_counter, &mut nodes, current, path, branch_id);
180
181 nodes
182}
183
184struct HeadingInfo {
186 level: u32,
187 text: String,
188}
189
190fn parse_heading(line: &str) -> Option<HeadingInfo> {
192 if !line.starts_with('#') {
193 return None;
194 }
195
196 let hashes = line.chars().take_while(|&c| c == '#').count() as u32;
197 if hashes > 6 {
198 return None;
199 }
200
201 let rest = &line[hashes as usize..];
202 if !rest.starts_with(' ') {
204 return None;
205 }
206
207 let text = rest.trim().to_string();
208 if text.is_empty() {
209 return None;
210 }
211
212 Some(HeadingInfo {
213 level: hashes,
214 text,
215 })
216}
217
218fn parse_json_schema(
227 path: &Path,
228 content: &str,
229 branch_id: &BranchId,
230) -> Result<Vec<KnowledgeNode>, ScanError> {
231 let value: serde_json::Value =
232 serde_json::from_str(content).map_err(|e| ScanError::DocumentationError {
233 path: path.to_path_buf(),
234 reason: format!("Invalid JSON: {e}"),
235 })?;
236
237 let obj = value
239 .as_object()
240 .ok_or_else(|| ScanError::DocumentationError {
241 path: path.to_path_buf(),
242 reason: "JSON Schema must be an object".to_string(),
243 })?;
244
245 let is_schema = obj.contains_key("$schema")
246 || obj.contains_key("properties")
247 || (obj.contains_key("type") && obj.contains_key("title"));
248
249 if !is_schema {
250 return Ok(Vec::new());
251 }
252
253 let mut nodes = Vec::new();
254 let mut node_counter: i64 = 0;
255
256 let schema_title = obj
258 .get("title")
259 .and_then(|v| v.as_str())
260 .unwrap_or("Untitled Schema");
261
262 let schema_description = obj
263 .get("description")
264 .and_then(|v| v.as_str())
265 .unwrap_or("");
266
267 let description = if schema_description.is_empty() {
268 format!("JSON Schema: {schema_title}")
269 } else {
270 format!("JSON Schema: {schema_title} — {schema_description}")
271 };
272
273 node_counter += 1;
274 nodes.push(make_doc_node(
275 NodeId(node_counter),
276 branch_id,
277 KnowledgeNature::Fact,
278 KnowledgeWeight::Info,
279 description,
280 serde_json::json!({
281 "source": "documentation",
282 "doc_type": "json_schema",
283 "file": path.to_string_lossy(),
284 "element": "schema",
285 "schema_title": schema_title,
286 }),
287 ));
288
289 if let Some(properties) = obj.get("properties").and_then(|v| v.as_object()) {
291 let required: Vec<&str> = obj
292 .get("required")
293 .and_then(|v| v.as_array())
294 .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
295 .unwrap_or_default();
296
297 for (prop_name, prop_value) in properties {
298 let prop_type = prop_value
299 .get("type")
300 .and_then(|v| v.as_str())
301 .unwrap_or("unknown");
302 let prop_desc = prop_value
303 .get("description")
304 .and_then(|v| v.as_str())
305 .unwrap_or("");
306 let is_required = required.contains(&prop_name.as_str());
307
308 let desc = if prop_desc.is_empty() {
309 format!(
310 "Property: {prop_name} ({prop_type}{})",
311 if is_required { ", required" } else { "" }
312 )
313 } else {
314 format!(
315 "Property: {prop_name} ({prop_type}{}) — {prop_desc}",
316 if is_required { ", required" } else { "" }
317 )
318 };
319
320 node_counter += 1;
321 nodes.push(make_doc_node(
322 NodeId(node_counter),
323 branch_id,
324 KnowledgeNature::Fact,
325 KnowledgeWeight::Info,
326 desc,
327 serde_json::json!({
328 "source": "documentation",
329 "doc_type": "json_schema",
330 "file": path.to_string_lossy(),
331 "element": "property",
332 "schema_title": schema_title,
333 "property_name": prop_name,
334 "property_type": prop_type,
335 "required": is_required,
336 }),
337 ));
338 }
339 }
340
341 let defs = obj
343 .get("definitions")
344 .or_else(|| obj.get("$defs"))
345 .and_then(|v| v.as_object());
346
347 if let Some(definitions) = defs {
348 for (def_name, def_value) in definitions {
349 let def_desc = def_value
350 .get("description")
351 .and_then(|v| v.as_str())
352 .unwrap_or("");
353 let def_type = def_value
354 .get("type")
355 .and_then(|v| v.as_str())
356 .unwrap_or("object");
357
358 let desc = if def_desc.is_empty() {
359 format!("Definition: {def_name} ({def_type})")
360 } else {
361 format!("Definition: {def_name} ({def_type}) — {def_desc}")
362 };
363
364 node_counter += 1;
365 nodes.push(make_doc_node(
366 NodeId(node_counter),
367 branch_id,
368 KnowledgeNature::Fact,
369 KnowledgeWeight::Info,
370 desc,
371 serde_json::json!({
372 "source": "documentation",
373 "doc_type": "json_schema",
374 "file": path.to_string_lossy(),
375 "element": "definition",
376 "definition_name": def_name,
377 "definition_type": def_type,
378 }),
379 ));
380 }
381 }
382
383 Ok(nodes)
384}
385
386fn parse_openapi(
394 path: &Path,
395 content: &str,
396 branch_id: &BranchId,
397) -> Result<Vec<KnowledgeNode>, ScanError> {
398 let value: serde_norway::Value =
399 serde_norway::from_str(content).map_err(|e| ScanError::DocumentationError {
400 path: path.to_path_buf(),
401 reason: format!("Invalid YAML: {e}"),
402 })?;
403
404 let mapping = value
406 .as_mapping()
407 .ok_or_else(|| ScanError::DocumentationError {
408 path: path.to_path_buf(),
409 reason: "OpenAPI spec must be a YAML mapping".to_string(),
410 })?;
411
412 let has_openapi = mapping.contains_key(yaml_key("openapi"));
413 let has_swagger = mapping.contains_key(yaml_key("swagger"));
414
415 if !has_openapi && !has_swagger {
416 return Ok(Vec::new());
417 }
418
419 let mut nodes = Vec::new();
420 let mut node_counter: i64 = 0;
421
422 let api_title = yaml_get_mapping(mapping, "info")
424 .and_then(|m| yaml_get_str(m, "title"))
425 .unwrap_or("Untitled API");
426
427 let api_version = yaml_get_mapping(mapping, "info")
428 .and_then(|m| yaml_get_str(m, "version"))
429 .unwrap_or("");
430
431 let api_desc = if api_version.is_empty() {
432 format!("API: {api_title}")
433 } else {
434 format!("API: {api_title} (v{api_version})")
435 };
436
437 node_counter += 1;
438 nodes.push(make_doc_node(
439 NodeId(node_counter),
440 branch_id,
441 KnowledgeNature::Fact,
442 KnowledgeWeight::Info,
443 api_desc,
444 serde_json::json!({
445 "source": "documentation",
446 "doc_type": "openapi",
447 "file": path.to_string_lossy(),
448 "element": "api",
449 "api_title": api_title,
450 "api_version": api_version,
451 }),
452 ));
453
454 if let Some(paths) = yaml_get_mapping(mapping, "paths") {
456 let http_methods = [
457 "get", "post", "put", "delete", "patch", "options", "head", "trace",
458 ];
459
460 for (path_key, path_value) in paths {
461 let endpoint_path = match path_key.as_str() {
462 Some(p) => p,
463 None => continue,
464 };
465
466 let methods = match path_value.as_mapping() {
467 Some(m) => m,
468 None => continue,
469 };
470
471 for method_name in &http_methods {
472 let method_key = serde_norway::Value::String(method_name.to_string());
473 if let Some(method_value) = methods.get(&method_key) {
474 let method_map = method_value.as_mapping();
475
476 let summary = method_map
477 .and_then(|m| yaml_get_str(m, "summary"))
478 .unwrap_or("");
479
480 let operation_id = method_map
481 .and_then(|m| yaml_get_str(m, "operationId"))
482 .unwrap_or("");
483
484 let method_upper = method_name.to_uppercase();
485 let desc = if summary.is_empty() {
486 format!("Endpoint: {method_upper} {endpoint_path}")
487 } else {
488 format!("Endpoint: {method_upper} {endpoint_path} — {summary}")
489 };
490
491 let response_codes: Vec<String> = method_map
493 .and_then(|m| yaml_get_mapping(m, "responses"))
494 .map(|responses| {
495 responses
496 .keys()
497 .filter_map(|k| k.as_str().map(String::from))
498 .collect()
499 })
500 .unwrap_or_default();
501
502 let tags: Vec<String> = method_map
504 .and_then(|m| yaml_get_seq(m, "tags"))
505 .map(|seq| {
506 seq.iter()
507 .filter_map(|v| v.as_str().map(String::from))
508 .collect()
509 })
510 .unwrap_or_default();
511
512 node_counter += 1;
513 nodes.push(make_doc_node(
514 NodeId(node_counter),
515 branch_id,
516 KnowledgeNature::Fact,
517 KnowledgeWeight::Info,
518 desc,
519 serde_json::json!({
520 "source": "documentation",
521 "doc_type": "openapi",
522 "file": path.to_string_lossy(),
523 "element": "endpoint",
524 "api_title": api_title,
525 "path": endpoint_path,
526 "method": method_upper,
527 "operation_id": operation_id,
528 "response_codes": response_codes,
529 "tags": tags,
530 }),
531 ));
532 }
533 }
534 }
535 }
536
537 if let Some(schemas) =
539 yaml_get_mapping(mapping, "components").and_then(|m| yaml_get_mapping(m, "schemas"))
540 {
541 for (schema_key, schema_value) in schemas {
542 let schema_name = match schema_key.as_str() {
543 Some(n) => n,
544 None => continue,
545 };
546
547 let schema_map = schema_value.as_mapping();
548
549 let schema_type = schema_map
550 .and_then(|m| yaml_get_str(m, "type"))
551 .unwrap_or("object");
552
553 let schema_desc = schema_map
554 .and_then(|m| yaml_get_str(m, "description"))
555 .unwrap_or("");
556
557 let desc = if schema_desc.is_empty() {
558 format!("Schema: {schema_name} ({schema_type})")
559 } else {
560 format!("Schema: {schema_name} ({schema_type}) — {schema_desc}")
561 };
562
563 node_counter += 1;
564 nodes.push(make_doc_node(
565 NodeId(node_counter),
566 branch_id,
567 KnowledgeNature::Fact,
568 KnowledgeWeight::Info,
569 desc,
570 serde_json::json!({
571 "source": "documentation",
572 "doc_type": "openapi",
573 "file": path.to_string_lossy(),
574 "element": "schema",
575 "api_title": api_title,
576 "schema_name": schema_name,
577 "schema_type": schema_type,
578 }),
579 ));
580 }
581 }
582
583 if let Some(definitions) = yaml_get_mapping(mapping, "definitions") {
585 for (def_key, def_value) in definitions {
586 let def_name = match def_key.as_str() {
587 Some(n) => n,
588 None => continue,
589 };
590
591 let def_map = def_value.as_mapping();
592
593 let def_type = def_map
594 .and_then(|m| yaml_get_str(m, "type"))
595 .unwrap_or("object");
596
597 let def_desc = def_map
598 .and_then(|m| yaml_get_str(m, "description"))
599 .unwrap_or("");
600
601 let desc = if def_desc.is_empty() {
602 format!("Schema: {def_name} ({def_type})")
603 } else {
604 format!("Schema: {def_name} ({def_type}) — {def_desc}")
605 };
606
607 node_counter += 1;
608 nodes.push(make_doc_node(
609 NodeId(node_counter),
610 branch_id,
611 KnowledgeNature::Fact,
612 KnowledgeWeight::Info,
613 desc,
614 serde_json::json!({
615 "source": "documentation",
616 "doc_type": "openapi",
617 "file": path.to_string_lossy(),
618 "element": "schema",
619 "api_title": api_title,
620 "schema_name": def_name,
621 "schema_type": def_type,
622 }),
623 ));
624 }
625 }
626
627 Ok(nodes)
628}
629
630fn yaml_key(key: &str) -> serde_norway::Value {
636 serde_norway::Value::String(key.to_string())
637}
638
639fn yaml_get_str<'a>(mapping: &'a serde_norway::Mapping, key: &str) -> Option<&'a str> {
641 mapping.get(yaml_key(key)).and_then(|v| v.as_str())
642}
643
644fn yaml_get_mapping<'a>(
646 mapping: &'a serde_norway::Mapping,
647 key: &str,
648) -> Option<&'a serde_norway::Mapping> {
649 mapping.get(yaml_key(key)).and_then(|v| v.as_mapping())
650}
651
652fn yaml_get_seq<'a>(
654 mapping: &'a serde_norway::Mapping,
655 key: &str,
656) -> Option<&'a serde_norway::Sequence> {
657 mapping.get(yaml_key(key)).and_then(|v| v.as_sequence())
658}
659
660fn make_doc_node(
662 id: NodeId,
663 branch_id: &BranchId,
664 nature: KnowledgeNature,
665 weight: KnowledgeWeight,
666 description: String,
667 ext_data: serde_json::Value,
668) -> KnowledgeNode {
669 KnowledgeNode {
670 id,
671 branch_id: branch_id.clone(),
672 nature,
673 weight,
674 confidence: 1.0,
675 adoption_count: 1,
676 total_count: 1,
677 description,
678 ext_data: Some(ext_data),
679 }
680}
681
682#[cfg(test)]
687mod tests {
688 use super::*;
689 use seshat_core::BranchId;
690
691 fn branch() -> BranchId {
692 BranchId::from("test")
693 }
694
695 #[test]
700 fn doc_type_from_extension_markdown() {
701 assert_eq!(DocType::from_extension("md"), Some(DocType::Markdown));
702 }
703
704 #[test]
705 fn doc_type_from_extension_json() {
706 assert_eq!(DocType::from_extension("json"), Some(DocType::JsonSchema));
707 }
708
709 #[test]
710 fn doc_type_from_extension_yaml() {
711 assert_eq!(DocType::from_extension("yaml"), Some(DocType::OpenApi));
712 assert_eq!(DocType::from_extension("yml"), Some(DocType::OpenApi));
713 }
714
715 #[test]
716 fn doc_type_from_extension_unknown() {
717 assert_eq!(DocType::from_extension("rs"), None);
718 assert_eq!(DocType::from_extension("txt"), None);
719 }
720
721 #[test]
722 fn doc_type_case_insensitive() {
723 assert_eq!(DocType::from_extension("MD"), Some(DocType::Markdown));
724 assert_eq!(DocType::from_extension("YAML"), Some(DocType::OpenApi));
725 assert_eq!(DocType::from_extension("Json"), Some(DocType::JsonSchema));
726 }
727
728 #[test]
733 fn parse_documentation_unsupported_extension() {
734 let result = parse_documentation(Path::new("file.txt"), "content", &branch());
735 assert!(result.is_err());
736 let err = result.unwrap_err();
737 assert!(matches!(err, ScanError::DocumentationError { .. }));
738 }
739
740 #[test]
741 fn parse_documentation_routes_to_markdown() {
742 let content = "# Hello\n- item";
744 let result = parse_documentation(Path::new("README.md"), content, &branch()).unwrap();
745 assert_eq!(result.doc_type, DocType::Markdown);
746 assert_eq!(result.nodes.len(), 1);
747 }
748
749 #[test]
750 fn parse_documentation_routes_to_json_schema() {
751 let content = r#"{"$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "title": "Test"}"#;
752 let result = parse_documentation(Path::new("schema.json"), content, &branch()).unwrap();
753 assert_eq!(result.doc_type, DocType::JsonSchema);
754 assert!(!result.nodes.is_empty());
755 }
756
757 #[test]
758 fn parse_documentation_routes_to_openapi() {
759 let content = "openapi: '3.0.0'\ninfo:\n title: Test\n version: '1.0'\npaths: {}";
760 let result = parse_documentation(Path::new("api.yaml"), content, &branch()).unwrap();
761 assert_eq!(result.doc_type, DocType::OpenApi);
762 assert!(!result.nodes.is_empty());
763 }
764
765 #[test]
770 fn markdown_extracts_h1_h2_as_sections() {
771 let content = "# Title\n\nSome text\n\n## Section\n\nMore text\n\n### Subsection";
773 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
774
775 assert_eq!(nodes.len(), 2, "only H1 and H2 create nodes");
776 assert_eq!(nodes[0].description, "Title");
777 assert_eq!(nodes[1].description, "Section");
778
779 assert_eq!(nodes[0].ext_data.as_ref().unwrap()["level"], 1);
781 assert_eq!(nodes[1].ext_data.as_ref().unwrap()["level"], 2);
782
783 let section_content = nodes[1].ext_data.as_ref().unwrap()["content"]
785 .as_str()
786 .unwrap();
787 assert!(
788 section_content.contains("### Subsection"),
789 "H3 should appear in H2 section content"
790 );
791 }
792
793 #[test]
794 fn markdown_heading_requires_space() {
795 let content = "#NoSpace\n# Has Space";
796 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
797 assert_eq!(nodes.len(), 1);
798 assert_eq!(nodes[0].description, "Has Space");
799 }
800
801 #[test]
802 fn markdown_heading_max_level() {
803 let content = "# Top\n###### H6 content\n####### H7 content";
806 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
807 assert_eq!(nodes.len(), 1);
808 assert_eq!(nodes[0].description, "Top");
809 let body = nodes[0].ext_data.as_ref().unwrap()["content"]
810 .as_str()
811 .unwrap();
812 assert!(body.contains("H6 content"));
813 }
814
815 #[test]
816 fn markdown_list_items_are_body_content() {
817 let content = "# Section\n- First item\n- Second item\n* Third item";
819 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
820
821 assert_eq!(nodes.len(), 1, "only one node for the H1 section");
822 assert_eq!(nodes[0].description, "Section");
823
824 let body = nodes[0].ext_data.as_ref().unwrap()["content"]
825 .as_str()
826 .unwrap();
827 assert!(body.contains("First item"));
828 assert!(body.contains("Second item"));
829 assert!(body.contains("Third item"));
830 }
831
832 #[test]
833 fn markdown_multiple_h2_sections() {
834 let content = "# Doc\n\npreamble\n\n## Section A\n- item A\n## Section B\n- item B";
835 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
836
837 assert_eq!(nodes.len(), 3);
839 assert_eq!(nodes[0].description, "Doc");
840 assert_eq!(nodes[1].description, "Section A");
841 assert_eq!(nodes[2].description, "Section B");
842
843 let body_a = nodes[1].ext_data.as_ref().unwrap()["content"]
844 .as_str()
845 .unwrap();
846 assert!(body_a.contains("item A"));
847 assert!(!body_a.contains("item B"));
848 }
849
850 #[test]
851 fn markdown_orphan_content_before_first_heading_discarded() {
852 let content = "some preamble\n# First heading\nbody";
854 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
855 assert_eq!(nodes.len(), 1);
856 assert_eq!(nodes[0].description, "First heading");
857 }
858
859 #[test]
860 fn markdown_all_nodes_tagged_with_source() {
861 let content = "# Heading\n- Item\n## Sub\ntext";
862 let nodes = parse_markdown(Path::new("doc.md"), content, &branch());
863 for node in &nodes {
864 let ext = node.ext_data.as_ref().unwrap();
865 assert_eq!(ext["source"], "documentation");
866 assert_eq!(ext["doc_type"], "markdown");
867 assert_eq!(ext["element"], "section");
868 }
869 }
870
871 #[test]
872 fn markdown_empty_content() {
873 let content = "";
874 let nodes = parse_markdown(Path::new("empty.md"), content, &branch());
875 assert!(nodes.is_empty());
876 }
877
878 #[test]
879 fn markdown_prose_only_no_structured_content() {
880 let content = "This is just a paragraph.\nWith no headings or lists.";
882 let nodes = parse_markdown(Path::new("prose.md"), content, &branch());
883 assert!(nodes.is_empty());
884 }
885
886 #[test]
891 fn json_schema_extracts_title_and_properties() {
892 let content = r#"{
893 "$schema": "http://json-schema.org/draft-07/schema#",
894 "title": "User",
895 "description": "A user account",
896 "type": "object",
897 "required": ["id", "email"],
898 "properties": {
899 "id": {"type": "integer", "description": "Unique identifier"},
900 "email": {"type": "string", "description": "Email address"},
901 "name": {"type": "string"}
902 }
903 }"#;
904
905 let nodes = parse_json_schema(Path::new("user.json"), content, &branch()).unwrap();
906
907 assert_eq!(nodes.len(), 4);
909 assert!(nodes[0].description.contains("User"));
910 assert!(nodes[0].description.contains("A user account"));
911
912 let id_node = nodes.iter().find(|n| n.description.contains("id")).unwrap();
914 assert!(id_node.description.contains("integer"));
915 assert!(id_node.description.contains("required"));
916
917 let email_node = nodes
918 .iter()
919 .find(|n| n.description.contains("email"))
920 .unwrap();
921 assert!(email_node.description.contains("required"));
922
923 let name_node = nodes
924 .iter()
925 .find(|n| n.description.contains("name") && !n.description.contains("User"))
926 .unwrap();
927 assert!(!name_node.description.contains("required"));
928 }
929
930 #[test]
931 fn json_schema_extracts_definitions() {
932 let content = r#"{
933 "$schema": "http://json-schema.org/draft-07/schema#",
934 "title": "API",
935 "type": "object",
936 "definitions": {
937 "Address": {
938 "type": "object",
939 "description": "A postal address"
940 },
941 "PhoneNumber": {
942 "type": "string"
943 }
944 }
945 }"#;
946
947 let nodes = parse_json_schema(Path::new("api.json"), content, &branch()).unwrap();
948
949 assert_eq!(nodes.len(), 3);
951
952 let addr = nodes
953 .iter()
954 .find(|n| n.description.contains("Address"))
955 .unwrap();
956 assert!(addr.description.contains("A postal address"));
957
958 let phone = nodes
959 .iter()
960 .find(|n| n.description.contains("PhoneNumber"))
961 .unwrap();
962 assert!(phone.description.contains("string"));
963 }
964
965 #[test]
966 fn json_schema_extracts_defs_key() {
967 let content = r#"{
968 "$schema": "https://json-schema.org/draft/2020-12/schema",
969 "title": "Modern",
970 "type": "object",
971 "$defs": {
972 "Color": {"type": "string", "description": "A color value"}
973 }
974 }"#;
975
976 let nodes = parse_json_schema(Path::new("modern.json"), content, &branch()).unwrap();
977 assert_eq!(nodes.len(), 2);
978 assert!(nodes[1].description.contains("Color"));
979 }
980
981 #[test]
982 fn json_schema_not_a_schema() {
983 let content = r#"{"name": "John", "age": 30}"#;
984 let nodes = parse_json_schema(Path::new("data.json"), content, &branch()).unwrap();
985 assert!(nodes.is_empty());
986 }
987
988 #[test]
989 fn json_schema_invalid_json() {
990 let result = parse_json_schema(Path::new("bad.json"), "not json", &branch());
991 assert!(result.is_err());
992 }
993
994 #[test]
995 fn json_schema_not_object() {
996 let result = parse_json_schema(Path::new("array.json"), "[1,2,3]", &branch());
997 assert!(result.is_err());
998 }
999
1000 #[test]
1001 fn json_schema_all_nodes_tagged_with_source() {
1002 let content = r#"{
1003 "$schema": "http://json-schema.org/draft-07/schema#",
1004 "title": "T",
1005 "type": "object",
1006 "properties": {"x": {"type": "string"}}
1007 }"#;
1008 let nodes = parse_json_schema(Path::new("t.json"), content, &branch()).unwrap();
1009 for node in &nodes {
1010 let ext = node.ext_data.as_ref().unwrap();
1011 assert_eq!(ext["source"], "documentation");
1012 assert_eq!(ext["doc_type"], "json_schema");
1013 }
1014 }
1015
1016 #[test]
1021 fn openapi_extracts_api_info_and_endpoints() {
1022 let content = r#"
1023openapi: '3.0.0'
1024info:
1025 title: Pet Store
1026 version: '1.0.0'
1027paths:
1028 /pets:
1029 get:
1030 summary: List all pets
1031 operationId: listPets
1032 tags:
1033 - pets
1034 responses:
1035 '200':
1036 description: A list of pets
1037 post:
1038 summary: Create a pet
1039 operationId: createPet
1040 responses:
1041 '201':
1042 description: Pet created
1043 /pets/{petId}:
1044 get:
1045 summary: Get a pet by ID
1046 operationId: showPetById
1047 responses:
1048 '200':
1049 description: A single pet
1050 '404':
1051 description: Pet not found
1052"#;
1053
1054 let nodes = parse_openapi(Path::new("api.yaml"), content, &branch()).unwrap();
1055
1056 assert_eq!(nodes.len(), 4);
1058
1059 let api_node = &nodes[0];
1060 assert!(api_node.description.contains("Pet Store"));
1061 assert!(api_node.description.contains("v1.0.0"));
1062
1063 let get_pets = nodes
1065 .iter()
1066 .find(|n| n.description.contains("GET /pets") && !n.description.contains("{petId}"))
1067 .unwrap();
1068 assert!(get_pets.description.contains("List all pets"));
1069
1070 let post_pets = nodes
1071 .iter()
1072 .find(|n| n.description.contains("POST /pets"))
1073 .unwrap();
1074 assert!(post_pets.description.contains("Create a pet"));
1075
1076 let get_pet = nodes
1077 .iter()
1078 .find(|n| n.description.contains("GET /pets/{petId}"))
1079 .unwrap();
1080 assert!(get_pet.description.contains("Get a pet by ID"));
1081
1082 let ext = get_pets.ext_data.as_ref().unwrap();
1084 assert_eq!(ext["source"], "documentation");
1085 assert_eq!(ext["operation_id"], "listPets");
1086 assert_eq!(ext["tags"], serde_json::json!(["pets"]));
1087 assert_eq!(ext["response_codes"], serde_json::json!(["200"]));
1088 }
1089
1090 #[test]
1091 fn openapi_extracts_component_schemas() {
1092 let content = r#"
1093openapi: '3.0.0'
1094info:
1095 title: Test API
1096 version: '1.0'
1097paths: {}
1098components:
1099 schemas:
1100 Pet:
1101 type: object
1102 description: A pet in the store
1103 Error:
1104 type: object
1105 description: An error response
1106"#;
1107
1108 let nodes = parse_openapi(Path::new("api.yml"), content, &branch()).unwrap();
1109
1110 assert_eq!(nodes.len(), 3);
1112
1113 let pet = nodes
1114 .iter()
1115 .find(|n| n.description.contains("Pet"))
1116 .unwrap();
1117 assert!(pet.description.contains("A pet in the store"));
1118
1119 let error = nodes
1120 .iter()
1121 .find(|n| n.description.contains("Error"))
1122 .unwrap();
1123 assert!(error.description.contains("An error response"));
1124 }
1125
1126 #[test]
1127 fn openapi_swagger_2_definitions() {
1128 let content = r#"
1129swagger: '2.0'
1130info:
1131 title: Legacy API
1132 version: '0.1'
1133paths:
1134 /users:
1135 get:
1136 summary: List users
1137 responses:
1138 '200':
1139 description: OK
1140definitions:
1141 User:
1142 type: object
1143 description: A user object
1144"#;
1145
1146 let nodes = parse_openapi(Path::new("legacy.yaml"), content, &branch()).unwrap();
1147
1148 assert_eq!(nodes.len(), 3);
1150
1151 let user = nodes
1152 .iter()
1153 .find(|n| n.description.contains("User"))
1154 .unwrap();
1155 assert!(user.description.contains("A user object"));
1156 }
1157
1158 #[test]
1159 fn openapi_not_an_api_spec() {
1160 let content = "name: John\nage: 30";
1161 let nodes = parse_openapi(Path::new("data.yaml"), content, &branch()).unwrap();
1162 assert!(nodes.is_empty());
1163 }
1164
1165 #[test]
1166 fn openapi_invalid_yaml() {
1167 let result = parse_openapi(Path::new("bad.yaml"), "{{invalid yaml", &branch());
1168 assert!(result.is_err());
1169 }
1170
1171 #[test]
1172 fn openapi_not_mapping() {
1173 let result = parse_openapi(Path::new("list.yaml"), "- item1\n- item2", &branch());
1174 assert!(result.is_err());
1175 }
1176
1177 #[test]
1178 fn openapi_all_nodes_tagged_with_source() {
1179 let content = r#"
1180openapi: '3.0.0'
1181info:
1182 title: T
1183 version: '1'
1184paths:
1185 /x:
1186 get:
1187 summary: X
1188 responses:
1189 '200':
1190 description: OK
1191"#;
1192 let nodes = parse_openapi(Path::new("api.yaml"), content, &branch()).unwrap();
1193 for node in &nodes {
1194 let ext = node.ext_data.as_ref().unwrap();
1195 assert_eq!(ext["source"], "documentation");
1196 assert_eq!(ext["doc_type"], "openapi");
1197 }
1198 }
1199
1200 #[test]
1201 fn openapi_endpoint_without_summary() {
1202 let content = r#"
1203openapi: '3.0.0'
1204info:
1205 title: Minimal
1206 version: '1'
1207paths:
1208 /health:
1209 get:
1210 responses:
1211 '200':
1212 description: OK
1213"#;
1214 let nodes = parse_openapi(Path::new("api.yaml"), content, &branch()).unwrap();
1215 let endpoint = nodes
1216 .iter()
1217 .find(|n| n.description.contains("GET /health"))
1218 .unwrap();
1219 assert_eq!(endpoint.description, "Endpoint: GET /health");
1221 }
1222
1223 #[test]
1228 fn all_nodes_are_facts_with_info_weight() {
1229 let md = "# Title\n- Item";
1230 let md_nodes = parse_markdown(Path::new("doc.md"), md, &branch());
1231 for node in &md_nodes {
1232 assert_eq!(node.nature, KnowledgeNature::Fact);
1233 assert_eq!(node.weight, KnowledgeWeight::Info);
1234 assert!((node.confidence - 1.0).abs() < f64::EPSILON);
1235 }
1236 }
1237
1238 #[test]
1239 fn documentation_result_contains_correct_path() {
1240 let result = parse_documentation(Path::new("docs/README.md"), "# Hi", &branch()).unwrap();
1241 assert_eq!(result.path, Path::new("docs/README.md"));
1242 }
1243}