1use std::collections::HashMap;
2use std::io::ErrorKind;
3
4use docx_store::models::{DocBlock, DocSource, Ingest, RelationRecord, Symbol};
5use docx_store::schema::{
6 REL_CONTAINS,
7 REL_DOCUMENTS,
8 REL_INHERITS,
9 REL_MEMBER_OF,
10 REL_PARAM_TYPE,
11 REL_REFERENCES,
12 REL_RETURNS,
13 REL_SEE_ALSO,
14 SOURCE_KIND_CSHARP_XML,
15 SOURCE_KIND_RUSTDOC_JSON,
16 TABLE_DOC_BLOCK,
17 TABLE_SYMBOL,
18 make_csharp_symbol_key,
19 make_record_id,
20 make_symbol_key,
21};
22use serde::{Deserialize, Serialize};
23use tokio::fs;
24use surrealdb::Connection;
25
26use crate::parsers::{CsharpParseOptions, CsharpXmlParser, RustdocJsonParser, RustdocParseOptions};
27use crate::store::StoreError;
28
29use super::{ControlError, DocxControlPlane};
30use super::metadata::ProjectUpsertRequest;
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct CsharpIngestRequest {
35 pub project_id: String,
36 pub xml: Option<String>,
37 pub xml_path: Option<String>,
38 pub ingest_id: Option<String>,
39 pub source_path: Option<String>,
40 pub source_modified_at: Option<String>,
41 pub tool_version: Option<String>,
42 pub source_hash: Option<String>,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct CsharpIngestReport {
48 pub assembly_name: Option<String>,
49 pub symbol_count: usize,
50 pub doc_block_count: usize,
51 pub documents_edge_count: usize,
52 pub doc_source_id: Option<String>,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct RustdocIngestRequest {
58 pub project_id: String,
59 pub json: Option<String>,
60 pub json_path: Option<String>,
61 pub ingest_id: Option<String>,
62 pub source_path: Option<String>,
63 pub source_modified_at: Option<String>,
64 pub tool_version: Option<String>,
65 pub source_hash: Option<String>,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct RustdocIngestReport {
71 pub crate_name: Option<String>,
72 pub symbol_count: usize,
73 pub doc_block_count: usize,
74 pub documents_edge_count: usize,
75 pub doc_source_id: Option<String>,
76}
77
78impl<C: Connection> DocxControlPlane<C> {
79 pub async fn ingest_csharp_xml(
84 &self,
85 request: CsharpIngestRequest,
86 ) -> Result<CsharpIngestReport, ControlError> {
87 let CsharpIngestRequest {
88 project_id,
89 xml,
90 xml_path,
91 ingest_id,
92 source_path,
93 source_modified_at,
94 tool_version,
95 source_hash,
96 } = request;
97
98 if project_id.trim().is_empty() {
99 return Err(ControlError::Store(StoreError::InvalidInput(
100 "project_id is required".to_string(),
101 )));
102 }
103
104 let xml = resolve_ingest_payload(xml, xml_path, "xml")
105 .await
106 .map_err(ControlError::Store)?;
107
108 let mut options = CsharpParseOptions::new(project_id.clone());
109 if let Some(ref ingest_id) = ingest_id {
110 options = options.with_ingest_id(ingest_id.clone());
111 }
112
113 let parsed = CsharpXmlParser::parse_async(xml, options).await?;
114 let ingest_source_modified_at = source_modified_at.clone();
115
116 if let Some(ref assembly_name) = parsed.assembly_name {
117 let _ = self
118 .upsert_project(ProjectUpsertRequest {
119 project_id: project_id.clone(),
120 name: None,
121 language: Some("csharp".to_string()),
122 root_path: None,
123 description: None,
124 aliases: vec![assembly_name.clone()],
125 })
126 .await?;
127 }
128
129 let stored_symbols = self.store_symbols(parsed.symbols).await?;
130 let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
131 let doc_source_id = self
132 .create_doc_source_if_needed(DocSourceInput {
133 project_id: project_id.clone(),
134 ingest_id: ingest_id.clone(),
135 language: "csharp".to_string(),
136 source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
137 source_path,
138 tool_version,
139 source_hash,
140 source_modified_at,
141 })
142 .await?;
143 let documents_edge_count = self
144 .persist_relations(&stored_symbols, &stored_blocks, &project_id, ingest_id.as_deref())
145 .await?;
146 let _ = self
147 .create_ingest_record(&project_id, ingest_id.as_deref(), ingest_source_modified_at)
148 .await?;
149
150 Ok(CsharpIngestReport {
151 assembly_name: parsed.assembly_name,
152 symbol_count: stored_symbols.len(),
153 doc_block_count: stored_blocks.len(),
154 documents_edge_count,
155 doc_source_id,
156 })
157 }
158
159 pub async fn ingest_rustdoc_json(
164 &self,
165 request: RustdocIngestRequest,
166 ) -> Result<RustdocIngestReport, ControlError> {
167 let RustdocIngestRequest {
168 project_id,
169 json,
170 json_path,
171 ingest_id,
172 source_path,
173 source_modified_at,
174 tool_version,
175 source_hash,
176 } = request;
177
178 if project_id.trim().is_empty() {
179 return Err(ControlError::Store(StoreError::InvalidInput(
180 "project_id is required".to_string(),
181 )));
182 }
183
184 let json = resolve_ingest_payload(json, json_path, "json")
185 .await
186 .map_err(ControlError::Store)?;
187
188 let mut options = RustdocParseOptions::new(project_id.clone());
189 if let Some(ref ingest_id) = ingest_id {
190 options = options.with_ingest_id(ingest_id.clone());
191 }
192
193 let parsed = RustdocJsonParser::parse_async(json, options).await?;
194 let ingest_source_modified_at = source_modified_at.clone();
195
196 if let Some(ref crate_name) = parsed.crate_name {
197 let _ = self
198 .upsert_project(ProjectUpsertRequest {
199 project_id: project_id.clone(),
200 name: None,
201 language: Some("rust".to_string()),
202 root_path: None,
203 description: None,
204 aliases: vec![crate_name.clone()],
205 })
206 .await?;
207 }
208
209 let stored_symbols = self.store_symbols(parsed.symbols).await?;
210 let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
211 let doc_source_id = self
212 .create_doc_source_if_needed(DocSourceInput {
213 project_id: project_id.clone(),
214 ingest_id: ingest_id.clone(),
215 language: "rust".to_string(),
216 source_kind: SOURCE_KIND_RUSTDOC_JSON.to_string(),
217 source_path,
218 tool_version,
219 source_hash,
220 source_modified_at,
221 })
222 .await?;
223 let documents_edge_count = self
224 .persist_relations(&stored_symbols, &stored_blocks, &project_id, ingest_id.as_deref())
225 .await?;
226 let _ = self
227 .create_ingest_record(&project_id, ingest_id.as_deref(), ingest_source_modified_at)
228 .await?;
229
230 Ok(RustdocIngestReport {
231 crate_name: parsed.crate_name,
232 symbol_count: stored_symbols.len(),
233 doc_block_count: stored_blocks.len(),
234 documents_edge_count,
235 doc_source_id,
236 })
237 }
238
239 async fn store_symbols(&self, symbols: Vec<Symbol>) -> Result<Vec<Symbol>, ControlError> {
240 let mut stored = Vec::with_capacity(symbols.len());
241 for symbol in symbols {
242 stored.push(self.store.upsert_symbol(symbol).await?);
243 }
244 Ok(stored)
245 }
246
247 async fn create_doc_source_if_needed(
248 &self,
249 input: DocSourceInput,
250 ) -> Result<Option<String>, ControlError> {
251 let has_source = input.source_path.is_some()
252 || input.tool_version.is_some()
253 || input.source_hash.is_some()
254 || input.source_modified_at.is_some();
255 if !has_source {
256 return Ok(None);
257 }
258
259 let source = DocSource {
260 id: None,
261 project_id: input.project_id,
262 ingest_id: input.ingest_id,
263 language: Some(input.language),
264 source_kind: Some(input.source_kind),
265 path: input.source_path,
266 tool_version: input.tool_version,
267 hash: input.source_hash,
268 source_modified_at: input.source_modified_at,
269 extra: None,
270 };
271 let created = self.store.create_doc_source(source).await?;
272 Ok(created.id)
273 }
274
275 async fn create_ingest_record(
276 &self,
277 project_id: &str,
278 ingest_id: Option<&str>,
279 source_modified_at: Option<String>,
280 ) -> Result<Option<String>, ControlError> {
281 let ingest = Ingest {
282 id: ingest_id.map(str::to_string),
283 project_id: project_id.to_string(),
284 git_commit: None,
285 git_branch: None,
286 git_tag: None,
287 project_version: None,
288 source_modified_at,
289 ingested_at: None,
290 extra: None,
291 };
292 let created = self.store.create_ingest(ingest).await?;
293 Ok(created.id)
294 }
295
296 async fn persist_relations(
297 &self,
298 stored_symbols: &[Symbol],
299 stored_blocks: &[DocBlock],
300 project_id: &str,
301 ingest_id: Option<&str>,
302 ) -> Result<usize, ControlError> {
303 let documents = build_documents_edges(stored_symbols, stored_blocks, project_id, ingest_id);
304 let documents_edge_count = documents.len();
305 if !documents.is_empty() {
306 let _ = self.store.create_relations(REL_DOCUMENTS, documents).await?;
307 }
308
309 let relations = build_symbol_relations(stored_symbols, project_id, ingest_id);
310 if !relations.is_empty() {
311 let _ = self.store.create_relations(REL_MEMBER_OF, relations.member_of).await?;
312 let _ = self.store.create_relations(REL_CONTAINS, relations.contains).await?;
313 let _ = self.store.create_relations(REL_RETURNS, relations.returns).await?;
314 let _ = self.store.create_relations(REL_PARAM_TYPE, relations.param_types).await?;
315 }
316
317 let doc_relations = build_doc_block_relations(stored_symbols, stored_blocks, project_id, ingest_id);
318 if !doc_relations.is_empty() {
319 let _ = self
320 .store
321 .create_relations(REL_SEE_ALSO, doc_relations.see_also)
322 .await?;
323 let _ = self
324 .store
325 .create_relations(REL_INHERITS, doc_relations.inherits)
326 .await?;
327 let _ = self
328 .store
329 .create_relations(REL_REFERENCES, doc_relations.references)
330 .await?;
331 }
332
333 Ok(documents_edge_count)
334 }
335}
336
337async fn resolve_ingest_payload(
338 raw: Option<String>,
339 path: Option<String>,
340 field: &str,
341) -> Result<String, StoreError> {
342 if let Some(value) = normalize_payload(raw) {
343 return Ok(strip_bom(&value));
344 }
345 if let Some(path) = normalize_payload(path) {
346 let contents = fs::read_to_string(&path).await.map_err(|err| {
347 let mut message = format!("failed to read {field}_path '{path}': {err}");
348 if err.kind() == ErrorKind::NotFound {
349 message.push_str(
350 "; file not found on server host. If running in Docker, mount the file into the container or send raw contents instead.",
351 );
352 }
353 StoreError::InvalidInput(message)
354 })?;
355 return Ok(strip_bom(&contents));
356 }
357 Err(StoreError::InvalidInput(format!(
358 "{field} is required (provide {field} or {field}_path)"
359 )))
360}
361
362fn normalize_payload(value: Option<String>) -> Option<String> {
363 value.and_then(|payload| {
364 let trimmed = payload.trim();
365 if trimmed.is_empty() {
366 None
367 } else {
368 Some(payload)
369 }
370 })
371}
372
373fn strip_bom(value: &str) -> String {
374 value.strip_prefix('\u{feff}').unwrap_or(value).to_string()
375}
376
377struct DocSourceInput {
378 project_id: String,
379 ingest_id: Option<String>,
380 language: String,
381 source_kind: String,
382 source_path: Option<String>,
383 tool_version: Option<String>,
384 source_hash: Option<String>,
385 source_modified_at: Option<String>,
386}
387
388fn build_documents_edges(
390 symbols: &[Symbol],
391 blocks: &[DocBlock],
392 project_id: &str,
393 ingest_id: Option<&str>,
394) -> Vec<RelationRecord> {
395 let mut symbol_map = HashMap::new();
396 for symbol in symbols {
397 if let Some(id) = symbol.id.as_ref() {
398 let record_id = make_record_id(TABLE_SYMBOL, id);
399 symbol_map.insert(symbol.symbol_key.as_str(), record_id);
400 }
401 }
402
403 let mut relations = Vec::new();
404 for block in blocks {
405 let Some(block_id) = block.id.as_ref() else {
406 continue;
407 };
408 let Some(symbol_key) = block.symbol_key.as_ref() else {
409 continue;
410 };
411 let Some(symbol_id) = symbol_map.get(symbol_key.as_str()) else {
412 continue;
413 };
414 let block_record_id = make_record_id(TABLE_DOC_BLOCK, block_id);
415 relations.push(RelationRecord {
416 id: None,
417 in_id: block_record_id,
418 out_id: symbol_id.clone(),
419 project_id: project_id.to_string(),
420 ingest_id: ingest_id.map(str::to_string),
421 kind: None,
422 extra: None,
423 });
424 }
425 relations
426}
427
428#[derive(Default)]
430struct SymbolRelations {
431 member_of: Vec<RelationRecord>,
432 contains: Vec<RelationRecord>,
433 returns: Vec<RelationRecord>,
434 param_types: Vec<RelationRecord>,
435}
436
437impl SymbolRelations {
438 const fn is_empty(&self) -> bool {
440 self.member_of.is_empty()
441 && self.contains.is_empty()
442 && self.returns.is_empty()
443 && self.param_types.is_empty()
444 }
445}
446
447fn build_symbol_relations(
449 symbols: &[Symbol],
450 project_id: &str,
451 ingest_id: Option<&str>,
452) -> SymbolRelations {
453 let mut relations = SymbolRelations::default();
454 let mut symbol_by_qualified = HashMap::new();
455 let mut symbol_by_key = HashMap::new();
456
457 for symbol in symbols {
458 if let (Some(id), Some(qualified_name)) = (symbol.id.as_ref(), symbol.qualified_name.as_ref()) {
459 symbol_by_qualified.insert(qualified_name.as_str(), id.as_str());
460 }
461 if let Some(id) = symbol.id.as_ref() {
462 symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
463 }
464 }
465
466 for symbol in symbols {
467 let Some(symbol_id) = symbol.id.as_ref() else {
468 continue;
469 };
470 let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
471 let ingest_id = ingest_id.map(str::to_string);
472
473 if let Some(parent) = symbol
474 .qualified_name
475 .as_ref()
476 .and_then(|qualified| qualified.rsplit_once("::").map(|pair| pair.0.to_string()))
477 .and_then(|parent| symbol_by_qualified.get(parent.as_str()).copied())
478 {
479 let parent_record = make_record_id(TABLE_SYMBOL, parent);
480 relations.member_of.push(RelationRecord {
481 id: None,
482 in_id: symbol_record.clone(),
483 out_id: parent_record.clone(),
484 project_id: project_id.to_string(),
485 ingest_id: ingest_id.clone(),
486 kind: None,
487 extra: None,
488 });
489 relations.contains.push(RelationRecord {
490 id: None,
491 in_id: parent_record,
492 out_id: symbol_record.clone(),
493 project_id: project_id.to_string(),
494 ingest_id: ingest_id.clone(),
495 kind: None,
496 extra: None,
497 });
498 }
499
500 if let Some(return_key) = symbol
501 .return_type
502 .as_ref()
503 .and_then(|ty| ty.symbol_key.as_ref())
504 .and_then(|key| symbol_by_key.get(key.as_str()).copied())
505 {
506 relations.returns.push(RelationRecord {
507 id: None,
508 in_id: symbol_record.clone(),
509 out_id: make_record_id(TABLE_SYMBOL, return_key),
510 project_id: project_id.to_string(),
511 ingest_id: ingest_id.clone(),
512 kind: None,
513 extra: None,
514 });
515 }
516
517 for param in &symbol.params {
518 let Some(param_key) = param
519 .type_ref
520 .as_ref()
521 .and_then(|ty| ty.symbol_key.as_ref())
522 .and_then(|key| symbol_by_key.get(key.as_str()).copied())
523 else {
524 continue;
525 };
526 relations.param_types.push(RelationRecord {
527 id: None,
528 in_id: symbol_record.clone(),
529 out_id: make_record_id(TABLE_SYMBOL, param_key),
530 project_id: project_id.to_string(),
531 ingest_id: ingest_id.clone(),
532 kind: Some(param.name.clone()),
533 extra: None,
534 });
535 }
536 }
537
538 relations
539}
540
541#[derive(Default)]
543struct DocBlockRelations {
544 see_also: Vec<RelationRecord>,
545 inherits: Vec<RelationRecord>,
546 references: Vec<RelationRecord>,
547}
548
549impl DocBlockRelations {
550 const fn is_empty(&self) -> bool {
552 self.see_also.is_empty() && self.inherits.is_empty() && self.references.is_empty()
553 }
554}
555
556fn build_doc_block_relations(
558 symbols: &[Symbol],
559 blocks: &[DocBlock],
560 project_id: &str,
561 ingest_id: Option<&str>,
562) -> DocBlockRelations {
563 let mut relations = DocBlockRelations::default();
564 let mut symbol_by_key = HashMap::new();
565 for symbol in symbols {
566 if let Some(id) = symbol.id.as_ref() {
567 symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
568 }
569 }
570
571 for block in blocks {
572 let Some(symbol_key) = block.symbol_key.as_ref() else {
573 continue;
574 };
575 let Some(symbol_id) = symbol_by_key.get(symbol_key.as_str()).copied() else {
576 continue;
577 };
578 let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
579 let ingest_id = ingest_id.map(str::to_string);
580 let language = block.language.as_deref();
581
582 for link in &block.see_also {
583 if let Some(target_id) = resolve_symbol_reference(
584 &link.target,
585 language,
586 project_id,
587 &symbol_by_key,
588 ) {
589 relations.see_also.push(RelationRecord {
590 id: None,
591 in_id: symbol_record.clone(),
592 out_id: make_record_id(TABLE_SYMBOL, target_id),
593 project_id: project_id.to_string(),
594 ingest_id: ingest_id.clone(),
595 kind: link.target_kind.clone(),
596 extra: None,
597 });
598 }
599 }
600
601 if let Some(inherit) = block.inherit_doc.as_ref() {
602 let target = inherit.cref.as_deref().or(inherit.path.as_deref());
603 if let Some(target) = target
604 && let Some(target_id) =
605 resolve_symbol_reference(target, language, project_id, &symbol_by_key)
606 {
607 relations.inherits.push(RelationRecord {
608 id: None,
609 in_id: symbol_record.clone(),
610 out_id: make_record_id(TABLE_SYMBOL, target_id),
611 project_id: project_id.to_string(),
612 ingest_id: ingest_id.clone(),
613 kind: Some("inheritdoc".to_string()),
614 extra: None,
615 });
616 }
617 }
618
619 for exception in &block.exceptions {
620 let Some(target_id) = exception
621 .type_ref
622 .as_ref()
623 .and_then(|ty| ty.symbol_key.as_ref())
624 .and_then(|key| symbol_by_key.get(key.as_str()).copied())
625 else {
626 continue;
627 };
628 relations.references.push(RelationRecord {
629 id: None,
630 in_id: symbol_record.clone(),
631 out_id: make_record_id(TABLE_SYMBOL, target_id),
632 project_id: project_id.to_string(),
633 ingest_id: ingest_id.clone(),
634 kind: Some("exception".to_string()),
635 extra: None,
636 });
637 }
638 }
639
640 relations
641}
642
643fn resolve_symbol_reference<'a>(
644 target: &str,
645 language: Option<&str>,
646 project_id: &str,
647 symbol_by_key: &'a HashMap<&'a str, &'a str>,
648) -> Option<&'a str> {
649 if let Some(id) = symbol_by_key.get(target).copied() {
650 return Some(id);
651 }
652 match language {
653 Some("csharp") => {
654 let key = make_csharp_symbol_key(project_id, target);
655 symbol_by_key.get(key.as_str()).copied()
656 }
657 Some("rust") => {
658 let key = make_symbol_key("rust", project_id, target);
659 symbol_by_key.get(key.as_str()).copied()
660 }
661 _ => None,
662 }
663}
664
665#[cfg(test)]
666mod tests {
667 use super::*;
668 use docx_store::models::{DocException, DocInherit, SeeAlso, TypeRef};
669
670 fn build_symbol(project_id: &str, id: &str, key: &str) -> Symbol {
671 Symbol {
672 id: Some(id.to_string()),
673 project_id: project_id.to_string(),
674 language: Some("csharp".to_string()),
675 symbol_key: key.to_string(),
676 kind: None,
677 name: None,
678 qualified_name: None,
679 display_name: None,
680 signature: None,
681 signature_hash: None,
682 visibility: None,
683 is_static: None,
684 is_async: None,
685 is_const: None,
686 is_deprecated: None,
687 since: None,
688 stability: None,
689 source_path: None,
690 line: None,
691 col: None,
692 return_type: None,
693 params: Vec::new(),
694 type_params: Vec::new(),
695 attributes: Vec::new(),
696 source_ids: Vec::new(),
697 doc_summary: None,
698 extra: None,
699 }
700 }
701
702 fn build_doc_block(project_id: &str, symbol_key: &str) -> DocBlock {
703 DocBlock {
704 id: Some("block-1".to_string()),
705 project_id: project_id.to_string(),
706 ingest_id: None,
707 symbol_key: Some(symbol_key.to_string()),
708 language: Some("csharp".to_string()),
709 source_kind: Some(SOURCE_KIND_CSHARP_XML.to_string()),
710 doc_hash: None,
711 summary: None,
712 remarks: None,
713 returns: None,
714 value: None,
715 params: Vec::new(),
716 type_params: Vec::new(),
717 exceptions: Vec::new(),
718 examples: Vec::new(),
719 notes: Vec::new(),
720 warnings: Vec::new(),
721 safety: None,
722 panics: None,
723 errors: None,
724 see_also: Vec::new(),
725 deprecated: None,
726 inherit_doc: None,
727 sections: Vec::new(),
728 raw: None,
729 extra: None,
730 }
731 }
732
733 #[test]
734 fn build_doc_block_relations_extracts_csharp_references() {
735 let project_id = "docx";
736 let foo_key = make_csharp_symbol_key(project_id, "T:Foo");
737 let bar_key = make_csharp_symbol_key(project_id, "T:Bar");
738
739 let symbols = vec![
740 build_symbol(project_id, "foo", &foo_key),
741 build_symbol(project_id, "bar", &bar_key),
742 ];
743
744 let mut block = build_doc_block(project_id, &foo_key);
745 block.see_also.push(SeeAlso {
746 label: Some("Bar".to_string()),
747 target: "T:Bar".to_string(),
748 target_kind: Some("cref".to_string()),
749 });
750 block.inherit_doc = Some(DocInherit {
751 cref: Some("T:Bar".to_string()),
752 path: None,
753 });
754 block.exceptions.push(DocException {
755 type_ref: Some(TypeRef {
756 display: Some("Bar".to_string()),
757 canonical: Some("Bar".to_string()),
758 language: Some("csharp".to_string()),
759 symbol_key: Some(bar_key),
760 generics: Vec::new(),
761 modifiers: Vec::new(),
762 }),
763 description: None,
764 });
765
766 let relations = build_doc_block_relations(&symbols, &[block], project_id, None);
767
768 assert_eq!(relations.see_also.len(), 1);
769 assert_eq!(relations.inherits.len(), 1);
770 assert_eq!(relations.references.len(), 1);
771
772 let target_record = make_record_id(TABLE_SYMBOL, "bar");
773 assert_eq!(relations.see_also[0].out_id, target_record);
774 assert_eq!(relations.see_also[0].kind.as_deref(), Some("cref"));
775 assert_eq!(relations.inherits[0].kind.as_deref(), Some("inheritdoc"));
776 assert_eq!(relations.references[0].kind.as_deref(), Some("exception"));
777 }
778}