1use std::collections::HashMap;
2
3use docx_store::models::{DocBlock, DocSource, Ingest, RelationRecord, Symbol};
4use docx_store::schema::{
5 REL_CONTAINS,
6 REL_DOCUMENTS,
7 REL_INHERITS,
8 REL_MEMBER_OF,
9 REL_PARAM_TYPE,
10 REL_REFERENCES,
11 REL_RETURNS,
12 REL_SEE_ALSO,
13 SOURCE_KIND_CSHARP_XML,
14 SOURCE_KIND_RUSTDOC_JSON,
15 TABLE_DOC_BLOCK,
16 TABLE_SYMBOL,
17 make_csharp_symbol_key,
18 make_record_id,
19 make_symbol_key,
20};
21use serde::{Deserialize, Serialize};
22use tokio::fs;
23use surrealdb::Connection;
24
25use crate::parsers::{CsharpParseOptions, CsharpXmlParser, RustdocJsonParser, RustdocParseOptions};
26use crate::store::StoreError;
27
28use super::{ControlError, DocxControlPlane};
29use super::metadata::ProjectUpsertRequest;
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct CsharpIngestRequest {
34 pub project_id: String,
35 pub xml: Option<String>,
36 pub xml_path: Option<String>,
37 pub ingest_id: Option<String>,
38 pub source_path: Option<String>,
39 pub source_modified_at: Option<String>,
40 pub tool_version: Option<String>,
41 pub source_hash: Option<String>,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct CsharpIngestReport {
47 pub assembly_name: Option<String>,
48 pub symbol_count: usize,
49 pub doc_block_count: usize,
50 pub documents_edge_count: usize,
51 pub doc_source_id: Option<String>,
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct RustdocIngestRequest {
57 pub project_id: String,
58 pub json: Option<String>,
59 pub json_path: Option<String>,
60 pub ingest_id: Option<String>,
61 pub source_path: Option<String>,
62 pub source_modified_at: Option<String>,
63 pub tool_version: Option<String>,
64 pub source_hash: Option<String>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct RustdocIngestReport {
70 pub crate_name: Option<String>,
71 pub symbol_count: usize,
72 pub doc_block_count: usize,
73 pub documents_edge_count: usize,
74 pub doc_source_id: Option<String>,
75}
76
77impl<C: Connection> DocxControlPlane<C> {
78 pub async fn ingest_csharp_xml(
83 &self,
84 request: CsharpIngestRequest,
85 ) -> Result<CsharpIngestReport, ControlError> {
86 let CsharpIngestRequest {
87 project_id,
88 xml,
89 xml_path,
90 ingest_id,
91 source_path,
92 source_modified_at,
93 tool_version,
94 source_hash,
95 } = request;
96
97 if project_id.trim().is_empty() {
98 return Err(ControlError::Store(StoreError::InvalidInput(
99 "project_id is required".to_string(),
100 )));
101 }
102
103 let xml = resolve_ingest_payload(xml, xml_path, "xml")
104 .await
105 .map_err(ControlError::Store)?;
106
107 let mut options = CsharpParseOptions::new(project_id.clone());
108 if let Some(ref ingest_id) = ingest_id {
109 options = options.with_ingest_id(ingest_id.clone());
110 }
111
112 let parsed = CsharpXmlParser::parse_async(xml, options).await?;
113 let ingest_source_modified_at = source_modified_at.clone();
114
115 if let Some(ref assembly_name) = parsed.assembly_name {
116 let _ = self
117 .upsert_project(ProjectUpsertRequest {
118 project_id: project_id.clone(),
119 name: None,
120 language: Some("csharp".to_string()),
121 root_path: None,
122 description: None,
123 aliases: vec![assembly_name.clone()],
124 })
125 .await?;
126 }
127
128 let stored_symbols = self.store_symbols(parsed.symbols).await?;
129 let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
130 let doc_source_id = self
131 .create_doc_source_if_needed(DocSourceInput {
132 project_id: project_id.clone(),
133 ingest_id: ingest_id.clone(),
134 language: "csharp".to_string(),
135 source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
136 source_path,
137 tool_version,
138 source_hash,
139 source_modified_at,
140 })
141 .await?;
142 let documents_edge_count = self
143 .persist_relations(&stored_symbols, &stored_blocks, &project_id, ingest_id.as_deref())
144 .await?;
145 let _ = self
146 .create_ingest_record(&project_id, ingest_id.as_deref(), ingest_source_modified_at)
147 .await?;
148
149 Ok(CsharpIngestReport {
150 assembly_name: parsed.assembly_name,
151 symbol_count: stored_symbols.len(),
152 doc_block_count: stored_blocks.len(),
153 documents_edge_count,
154 doc_source_id,
155 })
156 }
157
158 pub async fn ingest_rustdoc_json(
163 &self,
164 request: RustdocIngestRequest,
165 ) -> Result<RustdocIngestReport, ControlError> {
166 let RustdocIngestRequest {
167 project_id,
168 json,
169 json_path,
170 ingest_id,
171 source_path,
172 source_modified_at,
173 tool_version,
174 source_hash,
175 } = request;
176
177 if project_id.trim().is_empty() {
178 return Err(ControlError::Store(StoreError::InvalidInput(
179 "project_id is required".to_string(),
180 )));
181 }
182
183 let json = resolve_ingest_payload(json, json_path, "json")
184 .await
185 .map_err(ControlError::Store)?;
186
187 let mut options = RustdocParseOptions::new(project_id.clone());
188 if let Some(ref ingest_id) = ingest_id {
189 options = options.with_ingest_id(ingest_id.clone());
190 }
191
192 let parsed = RustdocJsonParser::parse_async(json, options).await?;
193 let ingest_source_modified_at = source_modified_at.clone();
194
195 if let Some(ref crate_name) = parsed.crate_name {
196 let _ = self
197 .upsert_project(ProjectUpsertRequest {
198 project_id: project_id.clone(),
199 name: None,
200 language: Some("rust".to_string()),
201 root_path: None,
202 description: None,
203 aliases: vec![crate_name.clone()],
204 })
205 .await?;
206 }
207
208 let stored_symbols = self.store_symbols(parsed.symbols).await?;
209 let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
210 let doc_source_id = self
211 .create_doc_source_if_needed(DocSourceInput {
212 project_id: project_id.clone(),
213 ingest_id: ingest_id.clone(),
214 language: "rust".to_string(),
215 source_kind: SOURCE_KIND_RUSTDOC_JSON.to_string(),
216 source_path,
217 tool_version,
218 source_hash,
219 source_modified_at,
220 })
221 .await?;
222 let documents_edge_count = self
223 .persist_relations(&stored_symbols, &stored_blocks, &project_id, ingest_id.as_deref())
224 .await?;
225 let _ = self
226 .create_ingest_record(&project_id, ingest_id.as_deref(), ingest_source_modified_at)
227 .await?;
228
229 Ok(RustdocIngestReport {
230 crate_name: parsed.crate_name,
231 symbol_count: stored_symbols.len(),
232 doc_block_count: stored_blocks.len(),
233 documents_edge_count,
234 doc_source_id,
235 })
236 }
237
238 async fn store_symbols(&self, symbols: Vec<Symbol>) -> Result<Vec<Symbol>, ControlError> {
239 let mut stored = Vec::with_capacity(symbols.len());
240 for symbol in symbols {
241 stored.push(self.store.upsert_symbol(symbol).await?);
242 }
243 Ok(stored)
244 }
245
246 async fn create_doc_source_if_needed(
247 &self,
248 input: DocSourceInput,
249 ) -> Result<Option<String>, ControlError> {
250 let has_source = input.source_path.is_some()
251 || input.tool_version.is_some()
252 || input.source_hash.is_some()
253 || input.source_modified_at.is_some();
254 if !has_source {
255 return Ok(None);
256 }
257
258 let source = DocSource {
259 id: None,
260 project_id: input.project_id,
261 ingest_id: input.ingest_id,
262 language: Some(input.language),
263 source_kind: Some(input.source_kind),
264 path: input.source_path,
265 tool_version: input.tool_version,
266 hash: input.source_hash,
267 source_modified_at: input.source_modified_at,
268 extra: None,
269 };
270 let created = self.store.create_doc_source(source).await?;
271 Ok(created.id)
272 }
273
274 async fn create_ingest_record(
275 &self,
276 project_id: &str,
277 ingest_id: Option<&str>,
278 source_modified_at: Option<String>,
279 ) -> Result<Option<String>, ControlError> {
280 let ingest = Ingest {
281 id: ingest_id.map(str::to_string),
282 project_id: project_id.to_string(),
283 git_commit: None,
284 git_branch: None,
285 git_tag: None,
286 project_version: None,
287 source_modified_at,
288 ingested_at: None,
289 extra: None,
290 };
291 let created = self.store.create_ingest(ingest).await?;
292 Ok(created.id)
293 }
294
295 async fn persist_relations(
296 &self,
297 stored_symbols: &[Symbol],
298 stored_blocks: &[DocBlock],
299 project_id: &str,
300 ingest_id: Option<&str>,
301 ) -> Result<usize, ControlError> {
302 let documents = build_documents_edges(stored_symbols, stored_blocks, project_id, ingest_id);
303 let documents_edge_count = documents.len();
304 if !documents.is_empty() {
305 let _ = self.store.create_relations(REL_DOCUMENTS, documents).await?;
306 }
307
308 let relations = build_symbol_relations(stored_symbols, project_id, ingest_id);
309 if !relations.is_empty() {
310 let _ = self.store.create_relations(REL_MEMBER_OF, relations.member_of).await?;
311 let _ = self.store.create_relations(REL_CONTAINS, relations.contains).await?;
312 let _ = self.store.create_relations(REL_RETURNS, relations.returns).await?;
313 let _ = self.store.create_relations(REL_PARAM_TYPE, relations.param_types).await?;
314 }
315
316 let doc_relations = build_doc_block_relations(stored_symbols, stored_blocks, project_id, ingest_id);
317 if !doc_relations.is_empty() {
318 let _ = self
319 .store
320 .create_relations(REL_SEE_ALSO, doc_relations.see_also)
321 .await?;
322 let _ = self
323 .store
324 .create_relations(REL_INHERITS, doc_relations.inherits)
325 .await?;
326 let _ = self
327 .store
328 .create_relations(REL_REFERENCES, doc_relations.references)
329 .await?;
330 }
331
332 Ok(documents_edge_count)
333 }
334}
335
336async fn resolve_ingest_payload(
337 raw: Option<String>,
338 path: Option<String>,
339 field: &str,
340) -> Result<String, StoreError> {
341 if let Some(value) = normalize_payload(raw) {
342 return Ok(strip_bom(&value));
343 }
344 if let Some(path) = normalize_payload(path) {
345 let contents = fs::read_to_string(&path).await.map_err(|err| {
346 StoreError::InvalidInput(format!("failed to read {field}_path '{path}': {err}"))
347 })?;
348 return Ok(strip_bom(&contents));
349 }
350 Err(StoreError::InvalidInput(format!(
351 "{field} is required (provide {field} or {field}_path)"
352 )))
353}
354
355fn normalize_payload(value: Option<String>) -> Option<String> {
356 value.and_then(|payload| {
357 let trimmed = payload.trim();
358 if trimmed.is_empty() {
359 None
360 } else {
361 Some(payload)
362 }
363 })
364}
365
366fn strip_bom(value: &str) -> String {
367 value.strip_prefix('\u{feff}').unwrap_or(value).to_string()
368}
369
370struct DocSourceInput {
371 project_id: String,
372 ingest_id: Option<String>,
373 language: String,
374 source_kind: String,
375 source_path: Option<String>,
376 tool_version: Option<String>,
377 source_hash: Option<String>,
378 source_modified_at: Option<String>,
379}
380
381fn build_documents_edges(
383 symbols: &[Symbol],
384 blocks: &[DocBlock],
385 project_id: &str,
386 ingest_id: Option<&str>,
387) -> Vec<RelationRecord> {
388 let mut symbol_map = HashMap::new();
389 for symbol in symbols {
390 if let Some(id) = symbol.id.as_ref() {
391 let record_id = make_record_id(TABLE_SYMBOL, id);
392 symbol_map.insert(symbol.symbol_key.as_str(), record_id);
393 }
394 }
395
396 let mut relations = Vec::new();
397 for block in blocks {
398 let Some(block_id) = block.id.as_ref() else {
399 continue;
400 };
401 let Some(symbol_key) = block.symbol_key.as_ref() else {
402 continue;
403 };
404 let Some(symbol_id) = symbol_map.get(symbol_key.as_str()) else {
405 continue;
406 };
407 let block_record_id = make_record_id(TABLE_DOC_BLOCK, block_id);
408 relations.push(RelationRecord {
409 id: None,
410 in_id: block_record_id,
411 out_id: symbol_id.clone(),
412 project_id: project_id.to_string(),
413 ingest_id: ingest_id.map(str::to_string),
414 kind: None,
415 extra: None,
416 });
417 }
418 relations
419}
420
421#[derive(Default)]
423struct SymbolRelations {
424 member_of: Vec<RelationRecord>,
425 contains: Vec<RelationRecord>,
426 returns: Vec<RelationRecord>,
427 param_types: Vec<RelationRecord>,
428}
429
430impl SymbolRelations {
431 const fn is_empty(&self) -> bool {
433 self.member_of.is_empty()
434 && self.contains.is_empty()
435 && self.returns.is_empty()
436 && self.param_types.is_empty()
437 }
438}
439
440fn build_symbol_relations(
442 symbols: &[Symbol],
443 project_id: &str,
444 ingest_id: Option<&str>,
445) -> SymbolRelations {
446 let mut relations = SymbolRelations::default();
447 let mut symbol_by_qualified = HashMap::new();
448 let mut symbol_by_key = HashMap::new();
449
450 for symbol in symbols {
451 if let (Some(id), Some(qualified_name)) = (symbol.id.as_ref(), symbol.qualified_name.as_ref()) {
452 symbol_by_qualified.insert(qualified_name.as_str(), id.as_str());
453 }
454 if let Some(id) = symbol.id.as_ref() {
455 symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
456 }
457 }
458
459 for symbol in symbols {
460 let Some(symbol_id) = symbol.id.as_ref() else {
461 continue;
462 };
463 let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
464 let ingest_id = ingest_id.map(str::to_string);
465
466 if let Some(parent) = symbol
467 .qualified_name
468 .as_ref()
469 .and_then(|qualified| qualified.rsplit_once("::").map(|pair| pair.0.to_string()))
470 .and_then(|parent| symbol_by_qualified.get(parent.as_str()).copied())
471 {
472 let parent_record = make_record_id(TABLE_SYMBOL, parent);
473 relations.member_of.push(RelationRecord {
474 id: None,
475 in_id: symbol_record.clone(),
476 out_id: parent_record.clone(),
477 project_id: project_id.to_string(),
478 ingest_id: ingest_id.clone(),
479 kind: None,
480 extra: None,
481 });
482 relations.contains.push(RelationRecord {
483 id: None,
484 in_id: parent_record,
485 out_id: symbol_record.clone(),
486 project_id: project_id.to_string(),
487 ingest_id: ingest_id.clone(),
488 kind: None,
489 extra: None,
490 });
491 }
492
493 if let Some(return_key) = symbol
494 .return_type
495 .as_ref()
496 .and_then(|ty| ty.symbol_key.as_ref())
497 .and_then(|key| symbol_by_key.get(key.as_str()).copied())
498 {
499 relations.returns.push(RelationRecord {
500 id: None,
501 in_id: symbol_record.clone(),
502 out_id: make_record_id(TABLE_SYMBOL, return_key),
503 project_id: project_id.to_string(),
504 ingest_id: ingest_id.clone(),
505 kind: None,
506 extra: None,
507 });
508 }
509
510 for param in &symbol.params {
511 let Some(param_key) = param
512 .type_ref
513 .as_ref()
514 .and_then(|ty| ty.symbol_key.as_ref())
515 .and_then(|key| symbol_by_key.get(key.as_str()).copied())
516 else {
517 continue;
518 };
519 relations.param_types.push(RelationRecord {
520 id: None,
521 in_id: symbol_record.clone(),
522 out_id: make_record_id(TABLE_SYMBOL, param_key),
523 project_id: project_id.to_string(),
524 ingest_id: ingest_id.clone(),
525 kind: Some(param.name.clone()),
526 extra: None,
527 });
528 }
529 }
530
531 relations
532}
533
534#[derive(Default)]
536struct DocBlockRelations {
537 see_also: Vec<RelationRecord>,
538 inherits: Vec<RelationRecord>,
539 references: Vec<RelationRecord>,
540}
541
542impl DocBlockRelations {
543 const fn is_empty(&self) -> bool {
545 self.see_also.is_empty() && self.inherits.is_empty() && self.references.is_empty()
546 }
547}
548
549fn build_doc_block_relations(
551 symbols: &[Symbol],
552 blocks: &[DocBlock],
553 project_id: &str,
554 ingest_id: Option<&str>,
555) -> DocBlockRelations {
556 let mut relations = DocBlockRelations::default();
557 let mut symbol_by_key = HashMap::new();
558 for symbol in symbols {
559 if let Some(id) = symbol.id.as_ref() {
560 symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
561 }
562 }
563
564 for block in blocks {
565 let Some(symbol_key) = block.symbol_key.as_ref() else {
566 continue;
567 };
568 let Some(symbol_id) = symbol_by_key.get(symbol_key.as_str()).copied() else {
569 continue;
570 };
571 let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
572 let ingest_id = ingest_id.map(str::to_string);
573 let language = block.language.as_deref();
574
575 for link in &block.see_also {
576 if let Some(target_id) = resolve_symbol_reference(
577 &link.target,
578 language,
579 project_id,
580 &symbol_by_key,
581 ) {
582 relations.see_also.push(RelationRecord {
583 id: None,
584 in_id: symbol_record.clone(),
585 out_id: make_record_id(TABLE_SYMBOL, target_id),
586 project_id: project_id.to_string(),
587 ingest_id: ingest_id.clone(),
588 kind: link.target_kind.clone(),
589 extra: None,
590 });
591 }
592 }
593
594 if let Some(inherit) = block.inherit_doc.as_ref() {
595 let target = inherit.cref.as_deref().or(inherit.path.as_deref());
596 if let Some(target) = target
597 && let Some(target_id) =
598 resolve_symbol_reference(target, language, project_id, &symbol_by_key)
599 {
600 relations.inherits.push(RelationRecord {
601 id: None,
602 in_id: symbol_record.clone(),
603 out_id: make_record_id(TABLE_SYMBOL, target_id),
604 project_id: project_id.to_string(),
605 ingest_id: ingest_id.clone(),
606 kind: Some("inheritdoc".to_string()),
607 extra: None,
608 });
609 }
610 }
611
612 for exception in &block.exceptions {
613 let Some(target_id) = exception
614 .type_ref
615 .as_ref()
616 .and_then(|ty| ty.symbol_key.as_ref())
617 .and_then(|key| symbol_by_key.get(key.as_str()).copied())
618 else {
619 continue;
620 };
621 relations.references.push(RelationRecord {
622 id: None,
623 in_id: symbol_record.clone(),
624 out_id: make_record_id(TABLE_SYMBOL, target_id),
625 project_id: project_id.to_string(),
626 ingest_id: ingest_id.clone(),
627 kind: Some("exception".to_string()),
628 extra: None,
629 });
630 }
631 }
632
633 relations
634}
635
636fn resolve_symbol_reference<'a>(
637 target: &str,
638 language: Option<&str>,
639 project_id: &str,
640 symbol_by_key: &'a HashMap<&'a str, &'a str>,
641) -> Option<&'a str> {
642 if let Some(id) = symbol_by_key.get(target).copied() {
643 return Some(id);
644 }
645 match language {
646 Some("csharp") => {
647 let key = make_csharp_symbol_key(project_id, target);
648 symbol_by_key.get(key.as_str()).copied()
649 }
650 Some("rust") => {
651 let key = make_symbol_key("rust", project_id, target);
652 symbol_by_key.get(key.as_str()).copied()
653 }
654 _ => None,
655 }
656}
657
658#[cfg(test)]
659mod tests {
660 use super::*;
661 use docx_store::models::{DocException, DocInherit, SeeAlso, TypeRef};
662
663 fn build_symbol(project_id: &str, id: &str, key: &str) -> Symbol {
664 Symbol {
665 id: Some(id.to_string()),
666 project_id: project_id.to_string(),
667 language: Some("csharp".to_string()),
668 symbol_key: key.to_string(),
669 kind: None,
670 name: None,
671 qualified_name: None,
672 display_name: None,
673 signature: None,
674 signature_hash: None,
675 visibility: None,
676 is_static: None,
677 is_async: None,
678 is_const: None,
679 is_deprecated: None,
680 since: None,
681 stability: None,
682 source_path: None,
683 line: None,
684 col: None,
685 return_type: None,
686 params: Vec::new(),
687 type_params: Vec::new(),
688 attributes: Vec::new(),
689 source_ids: Vec::new(),
690 doc_summary: None,
691 extra: None,
692 }
693 }
694
695 fn build_doc_block(project_id: &str, symbol_key: &str) -> DocBlock {
696 DocBlock {
697 id: Some("block-1".to_string()),
698 project_id: project_id.to_string(),
699 ingest_id: None,
700 symbol_key: Some(symbol_key.to_string()),
701 language: Some("csharp".to_string()),
702 source_kind: Some(SOURCE_KIND_CSHARP_XML.to_string()),
703 doc_hash: None,
704 summary: None,
705 remarks: None,
706 returns: None,
707 value: None,
708 params: Vec::new(),
709 type_params: Vec::new(),
710 exceptions: Vec::new(),
711 examples: Vec::new(),
712 notes: Vec::new(),
713 warnings: Vec::new(),
714 safety: None,
715 panics: None,
716 errors: None,
717 see_also: Vec::new(),
718 deprecated: None,
719 inherit_doc: None,
720 sections: Vec::new(),
721 raw: None,
722 extra: None,
723 }
724 }
725
726 #[test]
727 fn build_doc_block_relations_extracts_csharp_references() {
728 let project_id = "docx";
729 let foo_key = make_csharp_symbol_key(project_id, "T:Foo");
730 let bar_key = make_csharp_symbol_key(project_id, "T:Bar");
731
732 let symbols = vec![
733 build_symbol(project_id, "foo", &foo_key),
734 build_symbol(project_id, "bar", &bar_key),
735 ];
736
737 let mut block = build_doc_block(project_id, &foo_key);
738 block.see_also.push(SeeAlso {
739 label: Some("Bar".to_string()),
740 target: "T:Bar".to_string(),
741 target_kind: Some("cref".to_string()),
742 });
743 block.inherit_doc = Some(DocInherit {
744 cref: Some("T:Bar".to_string()),
745 path: None,
746 });
747 block.exceptions.push(DocException {
748 type_ref: Some(TypeRef {
749 display: Some("Bar".to_string()),
750 canonical: Some("Bar".to_string()),
751 language: Some("csharp".to_string()),
752 symbol_key: Some(bar_key),
753 generics: Vec::new(),
754 modifiers: Vec::new(),
755 }),
756 description: None,
757 });
758
759 let relations = build_doc_block_relations(&symbols, &[block], project_id, None);
760
761 assert_eq!(relations.see_also.len(), 1);
762 assert_eq!(relations.inherits.len(), 1);
763 assert_eq!(relations.references.len(), 1);
764
765 let target_record = make_record_id(TABLE_SYMBOL, "bar");
766 assert_eq!(relations.see_also[0].out_id, target_record);
767 assert_eq!(relations.see_also[0].kind.as_deref(), Some("cref"));
768 assert_eq!(relations.inherits[0].kind.as_deref(), Some("inheritdoc"));
769 assert_eq!(relations.references[0].kind.as_deref(), Some("exception"));
770 }
771}