Skip to main content

docx_core/control/
ingest.rs

1use std::collections::{HashMap, HashSet};
2use std::io::ErrorKind;
3
4use docx_store::models::{DocBlock, DocSource, Ingest, RelationRecord, Symbol};
5use docx_store::schema::{
6    REL_CONTAINS, REL_DOCUMENTS, REL_IMPLEMENTS, REL_INHERITS, REL_MEMBER_OF, REL_OBSERVED_IN,
7    REL_PARAM_TYPE, REL_REFERENCES, REL_RETURNS, REL_SEE_ALSO, SOURCE_KIND_CSHARP_XML,
8    SOURCE_KIND_RUSTDOC_JSON, TABLE_DOC_BLOCK, TABLE_DOC_SOURCE, TABLE_SYMBOL,
9    make_csharp_symbol_key, make_record_id, make_symbol_key,
10};
11use serde::{Deserialize, Serialize};
12use surrealdb::Connection;
13use tokio::fs;
14
15use crate::parsers::{CsharpParseOptions, CsharpXmlParser, RustdocJsonParser, RustdocParseOptions};
16use crate::store::StoreError;
17
18use super::metadata::ProjectUpsertRequest;
19use super::{ControlError, DocxControlPlane};
20
21/// Input payload for ingesting C# XML documentation.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct CsharpIngestRequest {
24    pub project_id: String,
25    pub xml: Option<String>,
26    pub xml_path: Option<String>,
27    pub ingest_id: Option<String>,
28    pub source_path: Option<String>,
29    pub source_modified_at: Option<String>,
30    pub tool_version: Option<String>,
31    pub source_hash: Option<String>,
32}
33
34/// Summary of a C# XML ingest operation.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct CsharpIngestReport {
37    pub assembly_name: Option<String>,
38    pub symbol_count: usize,
39    pub doc_block_count: usize,
40    pub documents_edge_count: usize,
41    pub doc_source_id: Option<String>,
42}
43
44/// Input payload for ingesting rustdoc JSON output.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct RustdocIngestRequest {
47    pub project_id: String,
48    pub json: Option<String>,
49    pub json_path: Option<String>,
50    pub ingest_id: Option<String>,
51    pub source_path: Option<String>,
52    pub source_modified_at: Option<String>,
53    pub tool_version: Option<String>,
54    pub source_hash: Option<String>,
55}
56
57/// Summary of a rustdoc JSON ingest operation.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct RustdocIngestReport {
60    pub crate_name: Option<String>,
61    pub symbol_count: usize,
62    pub doc_block_count: usize,
63    pub documents_edge_count: usize,
64    pub doc_source_id: Option<String>,
65}
66
67impl<C: Connection> DocxControlPlane<C> {
68    /// Ingests C# XML documentation into the store.
69    ///
70    /// # Errors
71    /// Returns `ControlError` if validation fails, parsing fails, or store writes fail.
72    pub async fn ingest_csharp_xml(
73        &self,
74        request: CsharpIngestRequest,
75    ) -> Result<CsharpIngestReport, ControlError> {
76        let CsharpIngestRequest {
77            project_id,
78            xml,
79            xml_path,
80            ingest_id,
81            source_path,
82            source_modified_at,
83            tool_version,
84            source_hash,
85        } = request;
86
87        if project_id.trim().is_empty() {
88            return Err(ControlError::Store(StoreError::InvalidInput(
89                "project_id is required".to_string(),
90            )));
91        }
92
93        let xml = resolve_ingest_payload(xml, xml_path, "xml")
94            .await
95            .map_err(ControlError::Store)?;
96
97        let mut options = CsharpParseOptions::new(project_id.clone());
98        if let Some(ref ingest_id) = ingest_id {
99            options = options.with_ingest_id(ingest_id.clone());
100        }
101
102        let parsed = CsharpXmlParser::parse_async(xml, options).await?;
103        let ingest_source_modified_at = source_modified_at.clone();
104
105        if let Some(ref assembly_name) = parsed.assembly_name {
106            let _ = self
107                .upsert_project(ProjectUpsertRequest {
108                    project_id: project_id.clone(),
109                    name: None,
110                    language: Some("csharp".to_string()),
111                    root_path: None,
112                    description: None,
113                    aliases: vec![assembly_name.clone()],
114                })
115                .await?;
116        }
117
118        let stored_symbols = self.store_symbols(parsed.symbols).await?;
119        let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
120        let doc_source_id = self
121            .create_doc_source_if_needed(DocSourceInput {
122                project_id: project_id.clone(),
123                ingest_id: ingest_id.clone(),
124                language: "csharp".to_string(),
125                source_kind: SOURCE_KIND_CSHARP_XML.to_string(),
126                source_path,
127                tool_version,
128                source_hash,
129                source_modified_at,
130                extra: None,
131            })
132            .await?;
133        let documents_edge_count = self
134            .persist_relations(
135                &stored_symbols,
136                &stored_blocks,
137                &project_id,
138                ingest_id.as_deref(),
139                doc_source_id.as_deref(),
140                &HashMap::new(),
141            )
142            .await?;
143        let _ = self
144            .create_ingest_record(
145                &project_id,
146                ingest_id.as_deref(),
147                ingest_source_modified_at,
148                None,
149            )
150            .await?;
151
152        Ok(CsharpIngestReport {
153            assembly_name: parsed.assembly_name,
154            symbol_count: stored_symbols.len(),
155            doc_block_count: stored_blocks.len(),
156            documents_edge_count,
157            doc_source_id,
158        })
159    }
160
161    /// Ingests rustdoc JSON documentation into the store.
162    ///
163    /// # Errors
164    /// Returns `ControlError` if validation fails, parsing fails, or store writes fail.
165    pub async fn ingest_rustdoc_json(
166        &self,
167        request: RustdocIngestRequest,
168    ) -> Result<RustdocIngestReport, ControlError> {
169        let RustdocIngestRequest {
170            project_id,
171            json,
172            json_path,
173            ingest_id,
174            source_path,
175            source_modified_at,
176            tool_version,
177            source_hash,
178        } = request;
179
180        if project_id.trim().is_empty() {
181            return Err(ControlError::Store(StoreError::InvalidInput(
182                "project_id is required".to_string(),
183            )));
184        }
185
186        let json = resolve_ingest_payload(json, json_path, "json")
187            .await
188            .map_err(ControlError::Store)?;
189
190        let mut options = RustdocParseOptions::new(project_id.clone());
191        if let Some(ref ingest_id) = ingest_id {
192            options = options.with_ingest_id(ingest_id.clone());
193        }
194
195        let parsed = RustdocJsonParser::parse_async(json, options).await?;
196        let ingest_source_modified_at = source_modified_at.clone();
197
198        if let Some(ref crate_name) = parsed.crate_name {
199            let _ = self
200                .upsert_project(ProjectUpsertRequest {
201                    project_id: project_id.clone(),
202                    name: None,
203                    language: Some("rust".to_string()),
204                    root_path: None,
205                    description: None,
206                    aliases: vec![crate_name.clone()],
207                })
208                .await?;
209        }
210
211        let stored_symbols = self.store_symbols(parsed.symbols).await?;
212        let stored_blocks = self.store.create_doc_blocks(parsed.doc_blocks).await?;
213        let doc_source_extra = serde_json::json!({
214            "format_version": parsed.format_version,
215            "includes_private": parsed.includes_private,
216        });
217        let doc_source_id = self
218            .create_doc_source_if_needed(DocSourceInput {
219                project_id: project_id.clone(),
220                ingest_id: ingest_id.clone(),
221                language: "rust".to_string(),
222                source_kind: SOURCE_KIND_RUSTDOC_JSON.to_string(),
223                source_path,
224                tool_version,
225                source_hash,
226                source_modified_at,
227                extra: Some(doc_source_extra),
228            })
229            .await?;
230        let documents_edge_count = self
231            .persist_relations(
232                &stored_symbols,
233                &stored_blocks,
234                &project_id,
235                ingest_id.as_deref(),
236                doc_source_id.as_deref(),
237                &parsed.trait_impls,
238            )
239            .await?;
240        let _ = self
241            .create_ingest_record(
242                &project_id,
243                ingest_id.as_deref(),
244                ingest_source_modified_at,
245                parsed.crate_version.clone(),
246            )
247            .await?;
248
249        Ok(RustdocIngestReport {
250            crate_name: parsed.crate_name,
251            symbol_count: stored_symbols.len(),
252            doc_block_count: stored_blocks.len(),
253            documents_edge_count,
254            doc_source_id,
255        })
256    }
257
258    async fn store_symbols(&self, symbols: Vec<Symbol>) -> Result<Vec<Symbol>, ControlError> {
259        let mut stored = Vec::new();
260        for symbol in dedupe_symbols(symbols) {
261            stored.push(self.store.upsert_symbol(symbol).await?);
262        }
263        Ok(stored)
264    }
265
266    async fn create_doc_source_if_needed(
267        &self,
268        input: DocSourceInput,
269    ) -> Result<Option<String>, ControlError> {
270        let has_source = input.source_path.is_some()
271            || input.tool_version.is_some()
272            || input.source_hash.is_some()
273            || input.source_modified_at.is_some()
274            || input.extra.is_some();
275        if !has_source {
276            return Ok(None);
277        }
278
279        let source = DocSource {
280            id: None,
281            project_id: input.project_id,
282            ingest_id: input.ingest_id,
283            language: Some(input.language),
284            source_kind: Some(input.source_kind),
285            path: input.source_path,
286            tool_version: input.tool_version,
287            hash: input.source_hash,
288            source_modified_at: input.source_modified_at,
289            extra: input.extra,
290        };
291        let created = self.store.create_doc_source(source).await?;
292        Ok(created.id)
293    }
294
295    async fn create_ingest_record(
296        &self,
297        project_id: &str,
298        ingest_id: Option<&str>,
299        source_modified_at: Option<String>,
300        project_version: Option<String>,
301    ) -> Result<Option<String>, ControlError> {
302        let ingest = Ingest {
303            id: ingest_id.map(str::to_string),
304            project_id: project_id.to_string(),
305            git_commit: None,
306            git_branch: None,
307            git_tag: None,
308            project_version,
309            source_modified_at,
310            ingested_at: Some(chrono::Utc::now().to_rfc3339()),
311            extra: None,
312        };
313        let created = self.store.create_ingest(ingest).await?;
314        Ok(created.id)
315    }
316
317    async fn persist_relations(
318        &self,
319        stored_symbols: &[Symbol],
320        stored_blocks: &[DocBlock],
321        project_id: &str,
322        ingest_id: Option<&str>,
323        doc_source_id: Option<&str>,
324        trait_impls: &HashMap<String, Vec<String>>,
325    ) -> Result<usize, ControlError> {
326        let documents = build_documents_edges(stored_symbols, stored_blocks, project_id, ingest_id);
327        let documents_edge_count = documents.len();
328        if !documents.is_empty() {
329            let _ = self
330                .store
331                .create_relations(REL_DOCUMENTS, documents)
332                .await?;
333        }
334
335        let relations = build_symbol_relations(stored_symbols, project_id, ingest_id, trait_impls);
336        if !relations.is_empty() {
337            let _ = self
338                .store
339                .create_relations(REL_MEMBER_OF, relations.member_of)
340                .await?;
341            let _ = self
342                .store
343                .create_relations(REL_CONTAINS, relations.contains)
344                .await?;
345            let _ = self
346                .store
347                .create_relations(REL_RETURNS, relations.returns)
348                .await?;
349            let _ = self
350                .store
351                .create_relations(REL_PARAM_TYPE, relations.param_types)
352                .await?;
353            if !relations.implements.is_empty() {
354                let _ = self
355                    .store
356                    .create_relations(REL_IMPLEMENTS, relations.implements)
357                    .await?;
358            }
359        }
360
361        let doc_relations =
362            build_doc_block_relations(stored_symbols, stored_blocks, project_id, ingest_id);
363        if !doc_relations.is_empty() {
364            let _ = self
365                .store
366                .create_relations(REL_SEE_ALSO, doc_relations.see_also)
367                .await?;
368            let _ = self
369                .store
370                .create_relations(REL_INHERITS, doc_relations.inherits)
371                .await?;
372            let _ = self
373                .store
374                .create_relations(REL_REFERENCES, doc_relations.references)
375                .await?;
376        }
377
378        if let Some(doc_source_id) = doc_source_id {
379            let observed_in =
380                build_observed_in_edges(stored_symbols, project_id, ingest_id, doc_source_id);
381            if !observed_in.is_empty() {
382                let _ = self
383                    .store
384                    .create_relations(REL_OBSERVED_IN, observed_in)
385                    .await?;
386            }
387        }
388
389        Ok(documents_edge_count)
390    }
391}
392
393async fn resolve_ingest_payload(
394    raw: Option<String>,
395    path: Option<String>,
396    field: &str,
397) -> Result<String, StoreError> {
398    if let Some(value) = normalize_payload(raw) {
399        return Ok(strip_bom(&value));
400    }
401    if let Some(path) = normalize_payload(path) {
402        let contents = fs::read_to_string(&path).await.map_err(|err| {
403            let mut message = format!("failed to read {field}_path '{path}': {err}");
404            if err.kind() == ErrorKind::NotFound {
405                message.push_str(
406                    "; file not found on server host. If running in Docker, mount the file into the container or send raw contents instead.",
407                );
408            }
409            StoreError::InvalidInput(message)
410        })?;
411        return Ok(strip_bom(&contents));
412    }
413    Err(StoreError::InvalidInput(format!(
414        "{field} is required (provide {field} or {field}_path)"
415    )))
416}
417
418fn normalize_payload(value: Option<String>) -> Option<String> {
419    value.and_then(|payload| {
420        let trimmed = payload.trim();
421        if trimmed.is_empty() {
422            None
423        } else {
424            Some(payload)
425        }
426    })
427}
428
429fn strip_bom(value: &str) -> String {
430    value.strip_prefix('\u{feff}').unwrap_or(value).to_string()
431}
432
433fn dedupe_symbols(symbols: Vec<Symbol>) -> Vec<Symbol> {
434    let mut seen = HashSet::new();
435    let mut deduped = Vec::with_capacity(symbols.len());
436    for symbol in symbols {
437        if seen.insert(symbol.symbol_key.clone()) {
438            deduped.push(symbol);
439        }
440    }
441    deduped
442}
443
444struct DocSourceInput {
445    project_id: String,
446    ingest_id: Option<String>,
447    language: String,
448    source_kind: String,
449    source_path: Option<String>,
450    tool_version: Option<String>,
451    source_hash: Option<String>,
452    source_modified_at: Option<String>,
453    extra: Option<serde_json::Value>,
454}
455
456/// Builds `documents` relation edges between doc blocks and symbols.
457fn build_documents_edges(
458    symbols: &[Symbol],
459    blocks: &[DocBlock],
460    project_id: &str,
461    ingest_id: Option<&str>,
462) -> Vec<RelationRecord> {
463    let mut symbol_map = HashMap::new();
464    for symbol in symbols {
465        if let Some(id) = symbol.id.as_ref() {
466            let record_id = make_record_id(TABLE_SYMBOL, id);
467            symbol_map.insert(symbol.symbol_key.as_str(), record_id);
468        }
469    }
470
471    let mut relations = Vec::new();
472    for block in blocks {
473        let Some(block_id) = block.id.as_ref() else {
474            continue;
475        };
476        let Some(symbol_key) = block.symbol_key.as_ref() else {
477            continue;
478        };
479        let Some(symbol_id) = symbol_map.get(symbol_key.as_str()) else {
480            continue;
481        };
482        let block_record_id = make_record_id(TABLE_DOC_BLOCK, block_id);
483        relations.push(RelationRecord {
484            id: None,
485            in_id: block_record_id,
486            out_id: symbol_id.clone(),
487            project_id: project_id.to_string(),
488            ingest_id: ingest_id.map(str::to_string),
489            kind: None,
490            extra: None,
491        });
492    }
493    relations
494}
495
496/// Builds `observed_in` relation edges between symbols and the ingested doc source.
497fn build_observed_in_edges(
498    symbols: &[Symbol],
499    project_id: &str,
500    ingest_id: Option<&str>,
501    doc_source_id: &str,
502) -> Vec<RelationRecord> {
503    let doc_source_record = make_record_id(TABLE_DOC_SOURCE, doc_source_id);
504    symbols
505        .iter()
506        .filter_map(|symbol| {
507            symbol.id.as_ref().map(|symbol_id| RelationRecord {
508                id: None,
509                in_id: make_record_id(TABLE_SYMBOL, symbol_id),
510                out_id: doc_source_record.clone(),
511                project_id: project_id.to_string(),
512                ingest_id: ingest_id.map(str::to_string),
513                kind: Some("doc_source".to_string()),
514                extra: None,
515            })
516        })
517        .collect()
518}
519
520/// Bundles relation edges derived from symbol metadata.
521#[derive(Default)]
522struct SymbolRelations {
523    member_of: Vec<RelationRecord>,
524    contains: Vec<RelationRecord>,
525    returns: Vec<RelationRecord>,
526    param_types: Vec<RelationRecord>,
527    implements: Vec<RelationRecord>,
528}
529
530impl SymbolRelations {
531    /// Returns true when all relation collections are empty.
532    const fn is_empty(&self) -> bool {
533        self.member_of.is_empty()
534            && self.contains.is_empty()
535            && self.returns.is_empty()
536            && self.param_types.is_empty()
537            && self.implements.is_empty()
538    }
539}
540
541/// Builds relation edges for symbol membership, containment, type references, and trait impls.
542fn build_symbol_relations(
543    symbols: &[Symbol],
544    project_id: &str,
545    ingest_id: Option<&str>,
546    trait_impls: &HashMap<String, Vec<String>>,
547) -> SymbolRelations {
548    let mut relations = SymbolRelations::default();
549    let mut symbol_by_qualified = HashMap::new();
550    let mut symbol_by_key = HashMap::new();
551
552    for symbol in symbols {
553        if let (Some(id), Some(qualified_name)) =
554            (symbol.id.as_ref(), symbol.qualified_name.as_ref())
555        {
556            symbol_by_qualified.insert(qualified_name.as_str(), id.as_str());
557        }
558        if let Some(id) = symbol.id.as_ref() {
559            symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
560        }
561    }
562
563    for symbol in symbols {
564        let Some(symbol_id) = symbol.id.as_ref() else {
565            continue;
566        };
567        let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
568        let ingest_id = ingest_id.map(str::to_string);
569
570        if let Some(parent) = symbol
571            .qualified_name
572            .as_ref()
573            .and_then(|qualified| qualified.rsplit_once("::").map(|pair| pair.0.to_string()))
574            .and_then(|parent| symbol_by_qualified.get(parent.as_str()).copied())
575        {
576            let parent_record = make_record_id(TABLE_SYMBOL, parent);
577            relations.member_of.push(RelationRecord {
578                id: None,
579                in_id: symbol_record.clone(),
580                out_id: parent_record.clone(),
581                project_id: project_id.to_string(),
582                ingest_id: ingest_id.clone(),
583                kind: None,
584                extra: None,
585            });
586            relations.contains.push(RelationRecord {
587                id: None,
588                in_id: parent_record,
589                out_id: symbol_record.clone(),
590                project_id: project_id.to_string(),
591                ingest_id: ingest_id.clone(),
592                kind: None,
593                extra: None,
594            });
595        }
596
597        if let Some(return_key) = symbol
598            .return_type
599            .as_ref()
600            .and_then(|ty| ty.symbol_key.as_ref())
601            .and_then(|key| symbol_by_key.get(key.as_str()).copied())
602        {
603            relations.returns.push(RelationRecord {
604                id: None,
605                in_id: symbol_record.clone(),
606                out_id: make_record_id(TABLE_SYMBOL, return_key),
607                project_id: project_id.to_string(),
608                ingest_id: ingest_id.clone(),
609                kind: None,
610                extra: None,
611            });
612        }
613
614        for param in &symbol.params {
615            let Some(param_key) = param
616                .type_ref
617                .as_ref()
618                .and_then(|ty| ty.symbol_key.as_ref())
619                .and_then(|key| symbol_by_key.get(key.as_str()).copied())
620            else {
621                continue;
622            };
623            relations.param_types.push(RelationRecord {
624                id: None,
625                in_id: symbol_record.clone(),
626                out_id: make_record_id(TABLE_SYMBOL, param_key),
627                project_id: project_id.to_string(),
628                ingest_id: ingest_id.clone(),
629                kind: Some(param.name.clone()),
630                extra: None,
631            });
632        }
633
634        // Build implements edges from trait_impls map
635        if let Some(qualified_name) = symbol.qualified_name.as_ref()
636            && let Some(trait_paths) = trait_impls.get(qualified_name.as_str())
637        {
638            for trait_path in trait_paths {
639                let trait_key = make_symbol_key("rust", project_id, trait_path);
640                if let Some(trait_id) = symbol_by_key.get(trait_key.as_str()).copied() {
641                    relations.implements.push(RelationRecord {
642                        id: None,
643                        in_id: symbol_record.clone(),
644                        out_id: make_record_id(TABLE_SYMBOL, trait_id),
645                        project_id: project_id.to_string(),
646                        ingest_id: ingest_id.clone(),
647                        kind: Some("trait_impl".to_string()),
648                        extra: None,
649                    });
650                }
651            }
652        }
653    }
654
655    relations
656}
657
658/// Bundles relation edges derived from documentation metadata.
659#[derive(Default)]
660struct DocBlockRelations {
661    see_also: Vec<RelationRecord>,
662    inherits: Vec<RelationRecord>,
663    references: Vec<RelationRecord>,
664}
665
666impl DocBlockRelations {
667    /// Returns true when all relation collections are empty.
668    const fn is_empty(&self) -> bool {
669        self.see_also.is_empty() && self.inherits.is_empty() && self.references.is_empty()
670    }
671}
672
673/// Builds relation edges for `see also`, inheritance, and reference metadata on doc blocks.
674fn build_doc_block_relations(
675    symbols: &[Symbol],
676    blocks: &[DocBlock],
677    project_id: &str,
678    ingest_id: Option<&str>,
679) -> DocBlockRelations {
680    let mut relations = DocBlockRelations::default();
681    let mut symbol_by_key = HashMap::new();
682    for symbol in symbols {
683        if let Some(id) = symbol.id.as_ref() {
684            symbol_by_key.insert(symbol.symbol_key.as_str(), id.as_str());
685        }
686    }
687
688    for block in blocks {
689        let Some(symbol_key) = block.symbol_key.as_ref() else {
690            continue;
691        };
692        let Some(symbol_id) = symbol_by_key.get(symbol_key.as_str()).copied() else {
693            continue;
694        };
695        let symbol_record = make_record_id(TABLE_SYMBOL, symbol_id);
696        let ingest_id = ingest_id.map(str::to_string);
697        let language = block.language.as_deref();
698
699        for link in &block.see_also {
700            if let Some(target_id) =
701                resolve_symbol_reference(&link.target, language, project_id, &symbol_by_key)
702            {
703                relations.see_also.push(RelationRecord {
704                    id: None,
705                    in_id: symbol_record.clone(),
706                    out_id: make_record_id(TABLE_SYMBOL, target_id),
707                    project_id: project_id.to_string(),
708                    ingest_id: ingest_id.clone(),
709                    kind: link.target_kind.clone(),
710                    extra: None,
711                });
712            }
713        }
714
715        if let Some(inherit) = block.inherit_doc.as_ref() {
716            let target = inherit.cref.as_deref().or(inherit.path.as_deref());
717            if let Some(target) = target
718                && let Some(target_id) =
719                    resolve_symbol_reference(target, language, project_id, &symbol_by_key)
720            {
721                relations.inherits.push(RelationRecord {
722                    id: None,
723                    in_id: symbol_record.clone(),
724                    out_id: make_record_id(TABLE_SYMBOL, target_id),
725                    project_id: project_id.to_string(),
726                    ingest_id: ingest_id.clone(),
727                    kind: Some("inheritdoc".to_string()),
728                    extra: None,
729                });
730            }
731        }
732
733        for exception in &block.exceptions {
734            let Some(target_id) = exception
735                .type_ref
736                .as_ref()
737                .and_then(|ty| ty.symbol_key.as_ref())
738                .and_then(|key| symbol_by_key.get(key.as_str()).copied())
739            else {
740                continue;
741            };
742            relations.references.push(RelationRecord {
743                id: None,
744                in_id: symbol_record.clone(),
745                out_id: make_record_id(TABLE_SYMBOL, target_id),
746                project_id: project_id.to_string(),
747                ingest_id: ingest_id.clone(),
748                kind: Some("exception".to_string()),
749                extra: None,
750            });
751        }
752    }
753
754    relations
755}
756
757fn resolve_symbol_reference<'a>(
758    target: &str,
759    language: Option<&str>,
760    project_id: &str,
761    symbol_by_key: &'a HashMap<&'a str, &'a str>,
762) -> Option<&'a str> {
763    if let Some(id) = symbol_by_key.get(target).copied() {
764        return Some(id);
765    }
766    match language {
767        Some("csharp") => {
768            let key = make_csharp_symbol_key(project_id, target);
769            symbol_by_key.get(key.as_str()).copied()
770        }
771        Some("rust") => {
772            let key = make_symbol_key("rust", project_id, target);
773            symbol_by_key.get(key.as_str()).copied()
774        }
775        _ => None,
776    }
777}
778
779#[cfg(test)]
780mod tests {
781    use super::*;
782    use docx_store::models::{DocException, DocInherit, SeeAlso, TypeRef};
783
784    fn build_symbol(project_id: &str, id: &str, key: &str) -> Symbol {
785        Symbol {
786            id: Some(id.to_string()),
787            project_id: project_id.to_string(),
788            language: Some("csharp".to_string()),
789            symbol_key: key.to_string(),
790            kind: None,
791            name: None,
792            qualified_name: None,
793            display_name: None,
794            signature: None,
795            signature_hash: None,
796            visibility: None,
797            is_static: None,
798            is_async: None,
799            is_const: None,
800            is_deprecated: None,
801            since: None,
802            stability: None,
803            source_path: None,
804            line: None,
805            col: None,
806            return_type: None,
807            params: Vec::new(),
808            type_params: Vec::new(),
809            attributes: Vec::new(),
810            source_ids: Vec::new(),
811            doc_summary: None,
812            extra: None,
813        }
814    }
815
816    fn build_doc_block(project_id: &str, symbol_key: &str) -> DocBlock {
817        DocBlock {
818            id: Some("block-1".to_string()),
819            project_id: project_id.to_string(),
820            ingest_id: None,
821            symbol_key: Some(symbol_key.to_string()),
822            language: Some("csharp".to_string()),
823            source_kind: Some(SOURCE_KIND_CSHARP_XML.to_string()),
824            doc_hash: None,
825            summary: None,
826            remarks: None,
827            returns: None,
828            value: None,
829            params: Vec::new(),
830            type_params: Vec::new(),
831            exceptions: Vec::new(),
832            examples: Vec::new(),
833            notes: Vec::new(),
834            warnings: Vec::new(),
835            safety: None,
836            panics: None,
837            errors: None,
838            see_also: Vec::new(),
839            deprecated: None,
840            inherit_doc: None,
841            sections: Vec::new(),
842            raw: None,
843            extra: None,
844        }
845    }
846
847    #[test]
848    fn build_observed_in_edges_links_symbols_to_doc_source() {
849        let symbols = vec![
850            build_symbol("docx", "foo", "csharp|docx|T:Foo"),
851            build_symbol("docx", "bar", "csharp|docx|T:Bar"),
852        ];
853
854        let edges = build_observed_in_edges(&symbols, "docx", Some("ing-1"), "source-1");
855        assert_eq!(edges.len(), 2);
856        assert_eq!(edges[0].in_id, make_record_id(TABLE_SYMBOL, "foo"));
857        assert_eq!(
858            edges[0].out_id,
859            make_record_id(TABLE_DOC_SOURCE, "source-1")
860        );
861        assert_eq!(edges[0].ingest_id.as_deref(), Some("ing-1"));
862        assert_eq!(edges[0].kind.as_deref(), Some("doc_source"));
863    }
864
865    #[test]
866    fn build_doc_block_relations_extracts_csharp_references() {
867        let project_id = "docx";
868        let foo_key = make_csharp_symbol_key(project_id, "T:Foo");
869        let bar_key = make_csharp_symbol_key(project_id, "T:Bar");
870
871        let symbols = vec![
872            build_symbol(project_id, "foo", &foo_key),
873            build_symbol(project_id, "bar", &bar_key),
874        ];
875
876        let mut block = build_doc_block(project_id, &foo_key);
877        block.see_also.push(SeeAlso {
878            label: Some("Bar".to_string()),
879            target: "T:Bar".to_string(),
880            target_kind: Some("cref".to_string()),
881        });
882        block.inherit_doc = Some(DocInherit {
883            cref: Some("T:Bar".to_string()),
884            path: None,
885        });
886        block.exceptions.push(DocException {
887            type_ref: Some(TypeRef {
888                display: Some("Bar".to_string()),
889                canonical: Some("Bar".to_string()),
890                language: Some("csharp".to_string()),
891                symbol_key: Some(bar_key),
892                generics: Vec::new(),
893                modifiers: Vec::new(),
894            }),
895            description: None,
896        });
897
898        let relations = build_doc_block_relations(&symbols, &[block], project_id, None);
899
900        assert_eq!(relations.see_also.len(), 1);
901        assert_eq!(relations.inherits.len(), 1);
902        assert_eq!(relations.references.len(), 1);
903
904        let target_record = make_record_id(TABLE_SYMBOL, "bar");
905        assert_eq!(relations.see_also[0].out_id, target_record);
906        assert_eq!(relations.see_also[0].kind.as_deref(), Some("cref"));
907        assert_eq!(relations.inherits[0].kind.as_deref(), Some("inheritdoc"));
908        assert_eq!(relations.references[0].kind.as_deref(), Some("exception"));
909    }
910
911    #[test]
912    fn dedupe_symbols_keeps_first_symbol_per_key() {
913        let mut first = build_symbol("docx", "first", "csharp|docx|T:Foo");
914        first.name = Some("first".to_string());
915        let mut duplicate = build_symbol("docx", "second", "csharp|docx|T:Foo");
916        duplicate.name = Some("second".to_string());
917        let other = build_symbol("docx", "third", "csharp|docx|T:Bar");
918
919        let deduped = dedupe_symbols(vec![first.clone(), duplicate, other.clone()]);
920
921        assert_eq!(deduped.len(), 2);
922        assert_eq!(deduped[0].symbol_key, first.symbol_key);
923        assert_eq!(deduped[0].name.as_deref(), Some("first"));
924        assert_eq!(deduped[1].symbol_key, other.symbol_key);
925    }
926}