Skip to main content

docx_core/control/
ingest.rs

1use std::collections::HashMap;
2
3use docx_store::models::{DocBlock, DocSource, RelationRecord, Symbol};
4use docx_store::schema::{
5    REL_CONTAINS,
6    REL_DOCUMENTS,
7    REL_INHERITS,
8    REL_MEMBER_OF,
9    REL_PARAM_TYPE,
10    REL_REFERENCES,
11    REL_RETURNS,
12    REL_SEE_ALSO,
13    SOURCE_KIND_CSHARP_XML,
14    SOURCE_KIND_RUSTDOC_JSON,
15    TABLE_DOC_BLOCK,
16    TABLE_SYMBOL,
17    make_csharp_symbol_key,
18    make_record_id,
19    make_symbol_key,
20};
21use serde::{Deserialize, Serialize};
22use surrealdb::Connection;
23
24use crate::parsers::{CsharpParseOptions, CsharpXmlParser, RustdocJsonParser, RustdocParseOptions};
25use crate::store::StoreError;
26
27use super::{ControlError, DocxControlPlane};
28use super::metadata::ProjectUpsertRequest;
29
30/// Input payload for ingesting C# XML documentation.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct CsharpIngestRequest {
33    pub project_id: String,
34    pub xml: String,
35    pub ingest_id: Option<String>,
36    pub source_path: Option<String>,
37    pub source_modified_at: Option<String>,
38    pub tool_version: Option<String>,
39    pub source_hash: Option<String>,
40}
41
42/// Summary of a C# XML ingest operation.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct CsharpIngestReport {
45    pub assembly_name: Option<String>,
46    pub symbol_count: usize,
47    pub doc_block_count: usize,
48    pub documents_edge_count: usize,
49    pub doc_source_id: Option<String>,
50}
51
52/// Input payload for ingesting rustdoc JSON output.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct RustdocIngestRequest {
55    pub project_id: String,
56    pub json: String,
57    pub ingest_id: Option<String>,
58    pub source_path: Option<String>,
59    pub source_modified_at: Option<String>,
60    pub tool_version: Option<String>,
61    pub source_hash: Option<String>,
62}
63
64/// Summary of a rustdoc JSON ingest operation.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct RustdocIngestReport {
67    pub crate_name: Option<String>,
68    pub symbol_count: usize,
69    pub doc_block_count: usize,
70    pub documents_edge_count: usize,
71    pub doc_source_id: Option<String>,
72}
73
74impl<C: Connection> DocxControlPlane<C> {
75    /// Ingests C# XML documentation into the store.
76    ///
77    /// # Errors
78    /// Returns `ControlError` if validation fails, parsing fails, or store writes fail.
79    pub async fn ingest_csharp_xml(
80        &self,
81        request: CsharpIngestRequest,
82    ) -> Result<CsharpIngestReport, ControlError> {
83        let CsharpIngestRequest {
84            project_id,
85            xml,
86            ingest_id,
87            source_path,
88            source_modified_at,
89            tool_version,
90            source_hash,
91        } = request;
92
93        if project_id.trim().is_empty() {
94            return Err(ControlError::Store(StoreError::InvalidInput(
95                "project_id is required".to_string(),
96            )));
97        }
98
99        let mut options = CsharpParseOptions::new(project_id.clone());
100        if let Some(ref ingest_id) = ingest_id {
101            options = options.with_ingest_id(ingest_id.clone());
102        }
103
104        let parsed = CsharpXmlParser::parse_async(xml, options).await?;
105
106        if let Some(ref assembly_name) = parsed.assembly_name {
107            let _ = self
108                .upsert_project(ProjectUpsertRequest {
109                    project_id: project_id.clone(),
110                    name: None,
111                    language: Some("csharp".to_string()),
112                    root_path: None,
113                    description: None,
114                    aliases: vec![assembly_name.clone()],
115                })
116                .await?;
117        }
118
119        let stored_symbols = self.store_symbols(parsed.symbols).await?;
120        let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
121        let doc_source_id = self
122            .create_doc_source_if_needed(DocSourceInput {
123                project_id: project_id.clone(),
124                ingest_id: ingest_id.clone(),
125                language: "csharp".to_string(),
126                source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
127                source_path,
128                tool_version,
129                source_hash,
130                source_modified_at,
131            })
132            .await?;
133        let documents_edge_count = self
134            .persist_relations(&stored_symbols, &stored_blocks, &project_id, ingest_id.as_deref())
135            .await?;
136
137        Ok(CsharpIngestReport {
138            assembly_name: parsed.assembly_name,
139            symbol_count: stored_symbols.len(),
140            doc_block_count: stored_blocks.len(),
141            documents_edge_count,
142            doc_source_id,
143        })
144    }
145
146    /// Ingests rustdoc JSON documentation into the store.
147    ///
148    /// # Errors
149    /// Returns `ControlError` if validation fails, parsing fails, or store writes fail.
150    pub async fn ingest_rustdoc_json(
151        &self,
152        request: RustdocIngestRequest,
153    ) -> Result<RustdocIngestReport, ControlError> {
154        let RustdocIngestRequest {
155            project_id,
156            json,
157            ingest_id,
158            source_path,
159            source_modified_at,
160            tool_version,
161            source_hash,
162        } = request;
163
164        if project_id.trim().is_empty() {
165            return Err(ControlError::Store(StoreError::InvalidInput(
166                "project_id is required".to_string(),
167            )));
168        }
169
170        let mut options = RustdocParseOptions::new(project_id.clone());
171        if let Some(ref ingest_id) = ingest_id {
172            options = options.with_ingest_id(ingest_id.clone());
173        }
174
175        let parsed = RustdocJsonParser::parse_async(json, options).await?;
176
177        if let Some(ref crate_name) = parsed.crate_name {
178            let _ = self
179                .upsert_project(ProjectUpsertRequest {
180                    project_id: project_id.clone(),
181                    name: None,
182                    language: Some("rust".to_string()),
183                    root_path: None,
184                    description: None,
185                    aliases: vec![crate_name.clone()],
186                })
187                .await?;
188        }
189
190        let stored_symbols = self.store_symbols(parsed.symbols).await?;
191        let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
192        let doc_source_id = self
193            .create_doc_source_if_needed(DocSourceInput {
194                project_id: project_id.clone(),
195                ingest_id: ingest_id.clone(),
196                language: "rust".to_string(),
197                source_kind: SOURCE_KIND_RUSTDOC_JSON.to_string(),
198                source_path,
199                tool_version,
200                source_hash,
201                source_modified_at,
202            })
203            .await?;
204        let documents_edge_count = self
205            .persist_relations(&stored_symbols, &stored_blocks, &project_id, ingest_id.as_deref())
206            .await?;
207
208        Ok(RustdocIngestReport {
209            crate_name: parsed.crate_name,
210            symbol_count: stored_symbols.len(),
211            doc_block_count: stored_blocks.len(),
212            documents_edge_count,
213            doc_source_id,
214        })
215    }
216
217    async fn store_symbols(&self, symbols: Vec<Symbol>) -> Result<Vec<Symbol>, ControlError> {
218        let mut stored = Vec::with_capacity(symbols.len());
219        for symbol in symbols {
220            stored.push(self.store.upsert_symbol(symbol).await?);
221        }
222        Ok(stored)
223    }
224
225    async fn create_doc_source_if_needed(
226        &self,
227        input: DocSourceInput,
228    ) -> Result<Option<String>, ControlError> {
229        let has_source = input.source_path.is_some()
230            || input.tool_version.is_some()
231            || input.source_hash.is_some()
232            || input.source_modified_at.is_some();
233        if !has_source {
234            return Ok(None);
235        }
236
237        let source = DocSource {
238            id: None,
239            project_id: input.project_id,
240            ingest_id: input.ingest_id,
241            language: Some(input.language),
242            source_kind: Some(input.source_kind),
243            path: input.source_path,
244            tool_version: input.tool_version,
245            hash: input.source_hash,
246            source_modified_at: input.source_modified_at,
247            extra: None,
248        };
249        let created = self.store.create_doc_source(source).await?;
250        Ok(created.id)
251    }
252
253    async fn persist_relations(
254        &self,
255        stored_symbols: &[Symbol],
256        stored_blocks: &[DocBlock],
257        project_id: &str,
258        ingest_id: Option<&str>,
259    ) -> Result<usize, ControlError> {
260        let documents = build_documents_edges(stored_symbols, stored_blocks, project_id, ingest_id);
261        let documents_edge_count = documents.len();
262        if !documents.is_empty() {
263            let _ = self.store.create_relations(REL_DOCUMENTS, documents).await?;
264        }
265
266        let relations = build_symbol_relations(stored_symbols, project_id, ingest_id);
267        if !relations.is_empty() {
268            let _ = self.store.create_relations(REL_MEMBER_OF, relations.member_of).await?;
269            let _ = self.store.create_relations(REL_CONTAINS, relations.contains).await?;
270            let _ = self.store.create_relations(REL_RETURNS, relations.returns).await?;
271            let _ = self.store.create_relations(REL_PARAM_TYPE, relations.param_types).await?;
272        }
273
274        let doc_relations = build_doc_block_relations(stored_symbols, stored_blocks, project_id, ingest_id);
275        if !doc_relations.is_empty() {
276            let _ = self
277                .store
278                .create_relations(REL_SEE_ALSO, doc_relations.see_also)
279                .await?;
280            let _ = self
281                .store
282                .create_relations(REL_INHERITS, doc_relations.inherits)
283                .await?;
284            let _ = self
285                .store
286                .create_relations(REL_REFERENCES, doc_relations.references)
287                .await?;
288        }
289
290        Ok(documents_edge_count)
291    }
292}
293
294struct DocSourceInput {
295    project_id: String,
296    ingest_id: Option<String>,
297    language: String,
298    source_kind: String,
299    source_path: Option<String>,
300    tool_version: Option<String>,
301    source_hash: Option<String>,
302    source_modified_at: Option<String>,
303}
304
305/// Builds `documents` relation edges between doc blocks and symbols.
306fn build_documents_edges(
307    symbols: &[Symbol],
308    blocks: &[DocBlock],
309    project_id: &str,
310    ingest_id: Option<&str>,
311) -> Vec<RelationRecord> {
312    let mut symbol_map = HashMap::new();
313    for symbol in symbols {
314        if let Some(id) = symbol.id.as_ref() {
315            let record_id = make_record_id(TABLE_SYMBOL, id);
316            symbol_map.insert(symbol.symbol_key.as_str(), record_id);
317        }
318    }
319
320    let mut relations = Vec::new();
321    for block in blocks {
322        let Some(block_id) = block.id.as_ref() else {
323            continue;
324        };
325        let Some(symbol_key) = block.symbol_key.as_ref() else {
326            continue;
327        };
328        let Some(symbol_id) = symbol_map.get(symbol_key.as_str()) else {
329            continue;
330        };
331        let block_record_id = make_record_id(TABLE_DOC_BLOCK, block_id);
332        relations.push(RelationRecord {
333            id: None,
334            in_id: block_record_id,
335            out_id: symbol_id.clone(),
336            project_id: project_id.to_string(),
337            ingest_id: ingest_id.map(str::to_string),
338            kind: None,
339            extra: None,
340        });
341    }
342    relations
343}
344
345/// Bundles relation edges derived from symbol metadata.
346#[derive(Default)]
347struct SymbolRelations {
348    member_of: Vec<RelationRecord>,
349    contains: Vec<RelationRecord>,
350    returns: Vec<RelationRecord>,
351    param_types: Vec<RelationRecord>,
352}
353
354impl SymbolRelations {
355    /// Returns true when all relation collections are empty.
356    const fn is_empty(&self) -> bool {
357        self.member_of.is_empty()
358            && self.contains.is_empty()
359            && self.returns.is_empty()
360            && self.param_types.is_empty()
361    }
362}
363
364/// Builds relation edges for symbol membership, containment, and type references.
365fn build_symbol_relations(
366    symbols: &[Symbol],
367    project_id: &str,
368    ingest_id: Option<&str>,
369) -> SymbolRelations {
370    let mut relations = SymbolRelations::default();
371    let mut symbol_by_qualified = HashMap::new();
372    let mut symbol_by_key = HashMap::new();
373
374    for symbol in symbols {
375        if let (Some(id), Some(qualified_name)) = (symbol.id.as_ref(), symbol.qualified_name.as_ref()) {
376            symbol_by_qualified.insert(qualified_name.as_str(), id.as_str());
377        }
378        if let Some(id) = symbol.id.as_ref() {
379            symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
380        }
381    }
382
383    for symbol in symbols {
384        let Some(symbol_id) = symbol.id.as_ref() else {
385            continue;
386        };
387        let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
388        let ingest_id = ingest_id.map(str::to_string);
389
390        if let Some(parent) = symbol
391            .qualified_name
392            .as_ref()
393            .and_then(|qualified| qualified.rsplit_once("::").map(|pair| pair.0.to_string()))
394            .and_then(|parent| symbol_by_qualified.get(parent.as_str()).copied())
395        {
396            let parent_record = make_record_id(TABLE_SYMBOL, parent);
397            relations.member_of.push(RelationRecord {
398                id: None,
399                in_id: symbol_record.clone(),
400                out_id: parent_record.clone(),
401                project_id: project_id.to_string(),
402                ingest_id: ingest_id.clone(),
403                kind: None,
404                extra: None,
405            });
406            relations.contains.push(RelationRecord {
407                id: None,
408                in_id: parent_record,
409                out_id: symbol_record.clone(),
410                project_id: project_id.to_string(),
411                ingest_id: ingest_id.clone(),
412                kind: None,
413                extra: None,
414            });
415        }
416
417        if let Some(return_key) = symbol
418            .return_type
419            .as_ref()
420            .and_then(|ty| ty.symbol_key.as_ref())
421            .and_then(|key| symbol_by_key.get(key.as_str()).copied())
422        {
423            relations.returns.push(RelationRecord {
424                id: None,
425                in_id: symbol_record.clone(),
426                out_id: make_record_id(TABLE_SYMBOL, return_key),
427                project_id: project_id.to_string(),
428                ingest_id: ingest_id.clone(),
429                kind: None,
430                extra: None,
431            });
432        }
433
434        for param in &symbol.params {
435            let Some(param_key) = param
436                .type_ref
437                .as_ref()
438                .and_then(|ty| ty.symbol_key.as_ref())
439                .and_then(|key| symbol_by_key.get(key.as_str()).copied())
440            else {
441                continue;
442            };
443            relations.param_types.push(RelationRecord {
444                id: None,
445                in_id: symbol_record.clone(),
446                out_id: make_record_id(TABLE_SYMBOL, param_key),
447                project_id: project_id.to_string(),
448                ingest_id: ingest_id.clone(),
449                kind: Some(param.name.clone()),
450                extra: None,
451            });
452        }
453    }
454
455    relations
456}
457
458/// Bundles relation edges derived from documentation metadata.
459#[derive(Default)]
460struct DocBlockRelations {
461    see_also: Vec<RelationRecord>,
462    inherits: Vec<RelationRecord>,
463    references: Vec<RelationRecord>,
464}
465
466impl DocBlockRelations {
467    /// Returns true when all relation collections are empty.
468    const fn is_empty(&self) -> bool {
469        self.see_also.is_empty() && self.inherits.is_empty() && self.references.is_empty()
470    }
471}
472
473/// Builds relation edges for `see also`, inheritance, and reference metadata on doc blocks.
474fn build_doc_block_relations(
475    symbols: &[Symbol],
476    blocks: &[DocBlock],
477    project_id: &str,
478    ingest_id: Option<&str>,
479) -> DocBlockRelations {
480    let mut relations = DocBlockRelations::default();
481    let mut symbol_by_key = HashMap::new();
482    for symbol in symbols {
483        if let Some(id) = symbol.id.as_ref() {
484            symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
485        }
486    }
487
488    for block in blocks {
489        let Some(symbol_key) = block.symbol_key.as_ref() else {
490            continue;
491        };
492        let Some(symbol_id) = symbol_by_key.get(symbol_key.as_str()).copied() else {
493            continue;
494        };
495        let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
496        let ingest_id = ingest_id.map(str::to_string);
497        let language = block.language.as_deref();
498
499        for link in &block.see_also {
500            if let Some(target_id) = resolve_symbol_reference(
501                &link.target,
502                language,
503                project_id,
504                &symbol_by_key,
505            ) {
506                relations.see_also.push(RelationRecord {
507                    id: None,
508                    in_id: symbol_record.clone(),
509                    out_id: make_record_id(TABLE_SYMBOL, target_id),
510                    project_id: project_id.to_string(),
511                    ingest_id: ingest_id.clone(),
512                    kind: link.target_kind.clone(),
513                    extra: None,
514                });
515            }
516        }
517
518        if let Some(inherit) = block.inherit_doc.as_ref() {
519            let target = inherit.cref.as_deref().or(inherit.path.as_deref());
520            if let Some(target) = target
521                && let Some(target_id) =
522                    resolve_symbol_reference(target, language, project_id, &symbol_by_key)
523            {
524                relations.inherits.push(RelationRecord {
525                    id: None,
526                    in_id: symbol_record.clone(),
527                    out_id: make_record_id(TABLE_SYMBOL, target_id),
528                    project_id: project_id.to_string(),
529                    ingest_id: ingest_id.clone(),
530                    kind: Some("inheritdoc".to_string()),
531                    extra: None,
532                });
533            }
534        }
535
536        for exception in &block.exceptions {
537            let Some(target_id) = exception
538                .type_ref
539                .as_ref()
540                .and_then(|ty| ty.symbol_key.as_ref())
541                .and_then(|key| symbol_by_key.get(key.as_str()).copied())
542            else {
543                continue;
544            };
545            relations.references.push(RelationRecord {
546                id: None,
547                in_id: symbol_record.clone(),
548                out_id: make_record_id(TABLE_SYMBOL, target_id),
549                project_id: project_id.to_string(),
550                ingest_id: ingest_id.clone(),
551                kind: Some("exception".to_string()),
552                extra: None,
553            });
554        }
555    }
556
557    relations
558}
559
560fn resolve_symbol_reference<'a>(
561    target: &str,
562    language: Option<&str>,
563    project_id: &str,
564    symbol_by_key: &'a HashMap<&'a str, &'a str>,
565) -> Option<&'a str> {
566    if let Some(id) = symbol_by_key.get(target).copied() {
567        return Some(id);
568    }
569    match language {
570        Some("csharp") => {
571            let key = make_csharp_symbol_key(project_id, target);
572            symbol_by_key.get(key.as_str()).copied()
573        }
574        Some("rust") => {
575            let key = make_symbol_key("rust", project_id, target);
576            symbol_by_key.get(key.as_str()).copied()
577        }
578        _ => None,
579    }
580}
581
582#[cfg(test)]
583mod tests {
584    use super::*;
585    use docx_store::models::{DocException, DocInherit, SeeAlso, TypeRef};
586
587    fn build_symbol(project_id: &str, id: &str, key: &str) -> Symbol {
588        Symbol {
589            id: Some(id.to_string()),
590            project_id: project_id.to_string(),
591            language: Some("csharp".to_string()),
592            symbol_key: key.to_string(),
593            kind: None,
594            name: None,
595            qualified_name: None,
596            display_name: None,
597            signature: None,
598            signature_hash: None,
599            visibility: None,
600            is_static: None,
601            is_async: None,
602            is_const: None,
603            is_deprecated: None,
604            since: None,
605            stability: None,
606            source_path: None,
607            line: None,
608            col: None,
609            return_type: None,
610            params: Vec::new(),
611            type_params: Vec::new(),
612            attributes: Vec::new(),
613            source_ids: Vec::new(),
614            doc_summary: None,
615            extra: None,
616        }
617    }
618
619    fn build_doc_block(project_id: &str, symbol_key: &str) -> DocBlock {
620        DocBlock {
621            id: Some("block-1".to_string()),
622            project_id: project_id.to_string(),
623            ingest_id: None,
624            symbol_key: Some(symbol_key.to_string()),
625            language: Some("csharp".to_string()),
626            source_kind: Some(SOURCE_KIND_CSHARP_XML.to_string()),
627            doc_hash: None,
628            summary: None,
629            remarks: None,
630            returns: None,
631            value: None,
632            params: Vec::new(),
633            type_params: Vec::new(),
634            exceptions: Vec::new(),
635            examples: Vec::new(),
636            notes: Vec::new(),
637            warnings: Vec::new(),
638            safety: None,
639            panics: None,
640            errors: None,
641            see_also: Vec::new(),
642            deprecated: None,
643            inherit_doc: None,
644            sections: Vec::new(),
645            raw: None,
646            extra: None,
647        }
648    }
649
650    #[test]
651    fn build_doc_block_relations_extracts_csharp_references() {
652        let project_id = "docx";
653        let foo_key = make_csharp_symbol_key(project_id, "T:Foo");
654        let bar_key = make_csharp_symbol_key(project_id, "T:Bar");
655
656        let symbols = vec![
657            build_symbol(project_id, "foo", &foo_key),
658            build_symbol(project_id, "bar", &bar_key),
659        ];
660
661        let mut block = build_doc_block(project_id, &foo_key);
662        block.see_also.push(SeeAlso {
663            label: Some("Bar".to_string()),
664            target: "T:Bar".to_string(),
665            target_kind: Some("cref".to_string()),
666        });
667        block.inherit_doc = Some(DocInherit {
668            cref: Some("T:Bar".to_string()),
669            path: None,
670        });
671        block.exceptions.push(DocException {
672            type_ref: Some(TypeRef {
673                display: Some("Bar".to_string()),
674                canonical: Some("Bar".to_string()),
675                language: Some("csharp".to_string()),
676                symbol_key: Some(bar_key),
677                generics: Vec::new(),
678                modifiers: Vec::new(),
679            }),
680            description: None,
681        });
682
683        let relations = build_doc_block_relations(&symbols, &[block], project_id, None);
684
685        assert_eq!(relations.see_also.len(), 1);
686        assert_eq!(relations.inherits.len(), 1);
687        assert_eq!(relations.references.len(), 1);
688
689        let target_record = make_record_id(TABLE_SYMBOL, "bar");
690        assert_eq!(relations.see_also[0].out_id, target_record);
691        assert_eq!(relations.see_also[0].kind.as_deref(), Some("cref"));
692        assert_eq!(relations.inherits[0].kind.as_deref(), Some("inheritdoc"));
693        assert_eq!(relations.references[0].kind.as_deref(), Some("exception"));
694    }
695}