1use std::collections::{HashMap, HashSet};
4use std::{error::Error, fmt, path::Path};
5
6use docx_store::models::{
7 AttributeRef, DocBlock, DocExample, DocParam, DocSection, DocTypeParam, Param, SeeAlso,
8 SourceId, Symbol, TypeParam, TypeRef,
9};
10use docx_store::schema::{SOURCE_KIND_RUSTDOC_JSON, make_symbol_key};
11use serde::Deserialize;
12use serde_json::Value;
13
14#[derive(Debug, Clone)]
16pub struct RustdocParseOptions {
17 pub project_id: String,
18 pub ingest_id: Option<String>,
19 pub language: String,
20 pub source_kind: String,
21}
22
23impl RustdocParseOptions {
24 pub fn new(project_id: impl Into<String>) -> Self {
25 Self {
26 project_id: project_id.into(),
27 ingest_id: None,
28 language: "rust".to_string(),
29 source_kind: SOURCE_KIND_RUSTDOC_JSON.to_string(),
30 }
31 }
32
33 #[must_use]
34 pub fn with_ingest_id(mut self, ingest_id: impl Into<String>) -> Self {
35 self.ingest_id = Some(ingest_id.into());
36 self
37 }
38}
39
40#[derive(Debug, Clone)]
42pub struct RustdocParseOutput {
43 pub crate_name: Option<String>,
44 pub crate_version: Option<String>,
45 pub format_version: u32,
46 pub includes_private: bool,
47 pub symbols: Vec<Symbol>,
48 pub doc_blocks: Vec<DocBlock>,
49 pub trait_impls: HashMap<String, Vec<String>>,
51}
52
53#[derive(Debug)]
55pub struct RustdocParseError {
56 message: String,
57}
58
59impl RustdocParseError {
60 fn new(message: impl Into<String>) -> Self {
61 Self {
62 message: message.into(),
63 }
64 }
65}
66
67impl fmt::Display for RustdocParseError {
68 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69 write!(f, "rustdoc JSON parse error: {}", self.message)
70 }
71}
72
73impl Error for RustdocParseError {}
74
75impl From<serde_json::Error> for RustdocParseError {
76 fn from(err: serde_json::Error) -> Self {
77 Self::new(err.to_string())
78 }
79}
80
81impl From<std::io::Error> for RustdocParseError {
82 fn from(err: std::io::Error) -> Self {
83 Self::new(err.to_string())
84 }
85}
86
87impl From<tokio::task::JoinError> for RustdocParseError {
88 fn from(err: tokio::task::JoinError) -> Self {
89 Self::new(err.to_string())
90 }
91}
92
93pub struct RustdocJsonParser;
95
96impl RustdocJsonParser {
97 #[allow(clippy::too_many_lines)]
102 pub fn parse(
103 json: &str,
104 options: &RustdocParseOptions,
105 ) -> Result<RustdocParseOutput, RustdocParseError> {
106 let crate_doc: RustdocCrate = serde_json::from_str(json)?;
107 let crate_version = crate_doc.crate_version.clone();
108 let format_version = crate_doc.format_version;
109 let includes_private = crate_doc.includes_private;
110 let root_id = crate_doc.root;
111 let root_item = crate_doc
112 .index
113 .get(&root_id.to_string())
114 .ok_or_else(|| RustdocParseError::new("missing root item"))?;
115
116 let crate_name = root_item.name.clone();
117 let root_crate_id = root_item.crate_id;
118 let mut id_to_path = build_id_path_map(&crate_doc, root_crate_id);
119
120 let mut state = ParserState {
121 crate_doc: &crate_doc,
122 options,
123 root_crate_id,
124 id_to_path: &mut id_to_path,
125 symbols: Vec::new(),
126 doc_blocks: Vec::new(),
127 seen: HashSet::new(),
128 used_symbol_keys: HashSet::new(),
129 trait_impls: HashMap::new(),
130 };
131
132 let mut module_path = Vec::new();
133 if let Some(name) = crate_name.clone() {
134 module_path.push(name);
135 }
136 state.visit_module(root_id, &module_path);
137
138 Ok(RustdocParseOutput {
139 crate_name,
140 crate_version,
141 format_version,
142 includes_private,
143 symbols: state.symbols,
144 doc_blocks: state.doc_blocks,
145 trait_impls: state.trait_impls,
146 })
147 }
148 pub async fn parse_async(
153 json: String,
154 options: RustdocParseOptions,
155 ) -> Result<RustdocParseOutput, RustdocParseError> {
156 tokio::task::spawn_blocking(move || Self::parse(&json, &options)).await?
157 }
158
159 pub async fn parse_file(
164 path: impl AsRef<Path>,
165 options: RustdocParseOptions,
166 ) -> Result<RustdocParseOutput, RustdocParseError> {
167 let path = path.as_ref().to_path_buf();
168 let json = tokio::task::spawn_blocking(move || std::fs::read_to_string(path)).await??;
169 Self::parse_async(json, options).await
170 }
171}
172
173#[derive(Debug, Deserialize)]
174struct RustdocCrate {
175 root: u64,
176 crate_version: Option<String>,
177 #[serde(default)]
178 format_version: u32,
179 #[serde(default)]
180 includes_private: bool,
181 index: HashMap<String, RustdocItem>,
182 #[serde(default)]
183 paths: HashMap<String, RustdocPath>,
184}
185
186#[derive(Debug, Deserialize, Clone)]
187struct RustdocItem {
188 id: u64,
189 crate_id: u64,
190 name: Option<String>,
191 span: Option<RustdocSpan>,
192 visibility: Option<Value>,
193 docs: Option<String>,
194 deprecation: Option<RustdocDeprecation>,
195 #[serde(default)]
196 attrs: Vec<Value>,
197 inner: HashMap<String, Value>,
198}
199
200#[derive(Debug, Deserialize, Clone)]
201struct RustdocSpan {
202 filename: String,
203 begin: [u32; 2],
204}
205
206#[derive(Debug, Deserialize, Clone)]
207struct RustdocPath {
208 crate_id: u64,
209 path: Vec<String>,
210}
211
212#[derive(Debug, Deserialize, Clone)]
213struct RustdocDeprecation {
214 since: Option<String>,
215}
216
217struct ParserState<'a> {
218 crate_doc: &'a RustdocCrate,
219 options: &'a RustdocParseOptions,
220 root_crate_id: u64,
221 id_to_path: &'a mut HashMap<u64, String>,
222 symbols: Vec<Symbol>,
223 doc_blocks: Vec<DocBlock>,
224 seen: HashSet<u64>,
225 used_symbol_keys: HashSet<String>,
226 trait_impls: HashMap<String, Vec<String>>,
227}
228impl ParserState<'_> {
229 fn visit_module(&mut self, module_id: u64, module_path: &[String]) {
230 if self.seen.contains(&module_id) {
231 return;
232 }
233 let Some(item) = self.get_item(module_id) else {
234 return;
235 };
236 if item.crate_id != self.root_crate_id {
237 return;
238 }
239 self.seen.insert(module_id);
240
241 self.add_symbol(&item, module_path, None, Some("module"));
242 let items = module_items(&item);
243 for child_id in items {
244 if let Some(child) = self.get_item(child_id) {
245 if child.crate_id != self.root_crate_id {
246 continue;
247 }
248 if is_inner_kind(&child, "module") {
249 let mut child_path = module_path.to_vec();
250 if let Some(name) = child.name.as_ref()
251 && !name.is_empty()
252 {
253 child_path.push(name.clone());
254 }
255 self.visit_module(child_id, &child_path);
256 } else {
257 self.visit_item(child_id, module_path);
258 }
259 }
260 }
261 }
262
263 fn visit_item(&mut self, item_id: u64, module_path: &[String]) {
264 if self.seen.contains(&item_id) {
265 return;
266 }
267 let Some(item) = self.get_item(item_id) else {
268 return;
269 };
270 if item.crate_id != self.root_crate_id {
271 return;
272 }
273 self.seen.insert(item_id);
274
275 let inner_kind = inner_kind(&item);
276 match inner_kind {
277 Some("struct") => {
278 let qualified = self.add_symbol(&item, module_path, None, Some("struct"));
279 self.visit_struct_fields(&item, &qualified);
280 self.visit_impls(&item, &qualified);
281 }
282 Some("enum") => {
283 let qualified = self.add_symbol(&item, module_path, None, Some("enum"));
284 self.visit_enum_variants(&item, &qualified);
285 self.visit_impls(&item, &qualified);
286 }
287 Some("trait") => {
288 let qualified = self.add_symbol(&item, module_path, None, Some("trait"));
289 self.visit_trait_items(&item, &qualified);
290 self.visit_impls(&item, &qualified);
291 }
292 Some("function") => {
293 self.add_symbol(&item, module_path, None, Some("function"));
294 }
295 Some("type_alias") => {
296 self.add_symbol(&item, module_path, None, Some("type_alias"));
297 }
298 Some("constant") => {
299 self.add_symbol(&item, module_path, None, Some("const"));
300 }
301 Some("static") => {
302 self.add_symbol(&item, module_path, None, Some("static"));
303 }
304 Some("union") => {
305 self.add_symbol(&item, module_path, None, Some("union"));
306 }
307 Some("macro") => {
308 self.add_symbol(&item, module_path, None, Some("macro"));
309 }
310 Some("module") => {
311 let mut child_path = module_path.to_vec();
312 if let Some(name) = item.name.as_ref()
313 && !name.is_empty()
314 {
315 child_path.push(name.clone());
316 }
317 self.visit_module(item_id, &child_path);
318 }
319 _ => {}
320 }
321 }
322
323 fn visit_struct_fields(&mut self, item: &RustdocItem, owner_name: &str) {
324 let Some(inner) = item.inner.get("struct") else {
325 return;
326 };
327 let Some(kind) = inner.get("kind") else {
328 return;
329 };
330 let field_ids = struct_kind_fields(kind);
331 for field_id in field_ids {
332 if let Some(field_item) = self.get_item(field_id) {
333 if field_item.crate_id != self.root_crate_id {
334 continue;
335 }
336 self.add_symbol(&field_item, &[], Some(owner_name), Some("field"));
337 }
338 }
339 }
340
341 fn visit_enum_variants(&mut self, item: &RustdocItem, owner_name: &str) {
342 let Some(inner) = item.inner.get("enum") else {
343 return;
344 };
345 let Some(variants) = inner.get("variants").and_then(Value::as_array) else {
346 return;
347 };
348 for variant_id in variants.iter().filter_map(Value::as_u64) {
349 if let Some(variant_item) = self.get_item(variant_id) {
350 if variant_item.crate_id != self.root_crate_id {
351 continue;
352 }
353 self.add_symbol(&variant_item, &[], Some(owner_name), Some("variant"));
354 }
355 }
356 }
357
358 fn visit_trait_items(&mut self, item: &RustdocItem, owner_name: &str) {
359 let Some(inner) = item.inner.get("trait") else {
360 return;
361 };
362 let Some(items) = inner.get("items").and_then(Value::as_array) else {
363 return;
364 };
365 for assoc_id in items.iter().filter_map(Value::as_u64) {
366 if let Some(assoc_item) = self.get_item(assoc_id) {
367 if assoc_item.crate_id != self.root_crate_id {
368 continue;
369 }
370 self.add_symbol(&assoc_item, &[], Some(owner_name), Some("trait_item"));
371 }
372 }
373 }
374
375 fn visit_impls(&mut self, item: &RustdocItem, owner_name: &str) {
376 let impl_ids = match inner_kind(item) {
377 Some("struct") => item
378 .inner
379 .get("struct")
380 .and_then(|value| value.get("impls"))
381 .and_then(Value::as_array)
382 .map(|items| extract_ids(items)),
383 Some("enum") => item
384 .inner
385 .get("enum")
386 .and_then(|value| value.get("impls"))
387 .and_then(Value::as_array)
388 .map(|items| extract_ids(items)),
389 Some("trait") => item
390 .inner
391 .get("trait")
392 .and_then(|value| value.get("impls"))
393 .and_then(Value::as_array)
394 .map(|items| extract_ids(items)),
395 _ => None,
396 };
397
398 let Some(impl_ids) = impl_ids else {
399 return;
400 };
401
402 for impl_id in impl_ids {
403 let Some(impl_item) = self.get_item(impl_id) else {
404 continue;
405 };
406 if impl_item.crate_id != self.root_crate_id {
407 continue;
408 }
409 let Some(impl_inner) = impl_item.inner.get("impl") else {
410 continue;
411 };
412
413 if let Some(trait_ref) = impl_inner.get("trait")
415 && let Some(trait_path) = trait_ref.get("path").and_then(Value::as_str)
416 {
417 self.trait_impls
418 .entry(owner_name.to_string())
419 .or_default()
420 .push(trait_path.to_string());
421 }
422
423 let Some(items) = impl_inner.get("items").and_then(Value::as_array) else {
424 continue;
425 };
426 for assoc_id in items.iter().filter_map(Value::as_u64) {
427 if let Some(assoc_item) = self.get_item(assoc_id) {
428 if assoc_item.crate_id != self.root_crate_id {
429 continue;
430 }
431 self.add_symbol(&assoc_item, &[], Some(owner_name), Some("method"));
432 }
433 }
434 }
435 }
436
437 fn add_symbol(
438 &mut self,
439 item: &RustdocItem,
440 module_path: &[String],
441 owner_name: Option<&str>,
442 kind_override: Option<&str>,
443 ) -> String {
444 let name = item.name.clone().unwrap_or_default();
445 let qualified_name = qualified_name_for_item(&name, module_path, owner_name);
446
447 let symbol_key = make_unique_symbol_key(
448 &mut self.used_symbol_keys,
449 &self.options.project_id,
450 &qualified_name,
451 item.id,
452 );
453 let doc_symbol_key = symbol_key.clone();
454 self.id_to_path.insert(item.id, qualified_name.clone());
455
456 let docs = item.docs.as_deref().unwrap_or("").trim();
457 let parsed_docs = (!docs.is_empty()).then(|| parse_markdown_docs(docs));
458
459 let (params, return_type, signature) = parse_signature(item, self, &name);
460 let type_params = parse_type_params(item);
461 let (source_path, line, col) = span_location(item);
462
463 let parts = SymbolParts {
464 name,
465 qualified_name: qualified_name.clone(),
466 symbol_key,
467 signature,
468 params,
469 return_type,
470 type_params,
471 source_path,
472 line,
473 col,
474 };
475
476 let symbol = build_symbol(
477 item,
478 self.options,
479 parts,
480 kind_override,
481 parsed_docs.as_ref(),
482 );
483 self.symbols.push(symbol);
484
485 if let Some(parsed_docs) = parsed_docs {
486 let doc_block = build_doc_block(self.options, doc_symbol_key, parsed_docs, docs);
487 self.doc_blocks.push(doc_block);
488 }
489
490 qualified_name
491 }
492
493 fn get_item(&self, item_id: u64) -> Option<RustdocItem> {
494 self.crate_doc.index.get(&item_id.to_string()).cloned()
495 }
496}
497
498fn make_unique_symbol_key(
499 used_symbol_keys: &mut HashSet<String>,
500 project_id: &str,
501 qualified_name: &str,
502 item_id: u64,
503) -> String {
504 let base_key = make_symbol_key("rust", project_id, qualified_name);
505 if used_symbol_keys.insert(base_key.clone()) {
506 return base_key;
507 }
508
509 let mut candidate = format!("{base_key}#{item_id}");
510 if used_symbol_keys.insert(candidate.clone()) {
511 return candidate;
512 }
513
514 let mut ordinal: u32 = 1;
515 loop {
516 candidate = format!("{base_key}#{item_id}_{ordinal}");
517 if used_symbol_keys.insert(candidate.clone()) {
518 return candidate;
519 }
520 ordinal += 1;
521 }
522}
523
524fn qualified_name_for_item(name: &str, module_path: &[String], owner_name: Option<&str>) -> String {
525 owner_name.map_or_else(
526 || {
527 if module_path.is_empty() {
528 name.to_string()
529 } else if name.is_empty() {
530 module_path.join("::")
531 } else {
532 format!("{}::{name}", module_path.join("::"))
533 }
534 },
535 |owner| {
536 if name.is_empty() {
537 owner.to_string()
538 } else {
539 format!("{owner}::{name}")
540 }
541 },
542 )
543}
544
545fn span_location(item: &RustdocItem) -> (Option<String>, Option<u32>, Option<u32>) {
546 item.span.as_ref().map_or((None, None, None), |span| {
547 (
548 Some(span.filename.clone()),
549 Some(span.begin[0]),
550 Some(span.begin[1]),
551 )
552 })
553}
554
555struct SymbolParts {
556 name: String,
557 qualified_name: String,
558 symbol_key: String,
559 signature: Option<String>,
560 params: Vec<Param>,
561 return_type: Option<TypeRef>,
562 type_params: Vec<TypeParam>,
563 source_path: Option<String>,
564 line: Option<u32>,
565 col: Option<u32>,
566}
567
568fn build_symbol(
569 item: &RustdocItem,
570 options: &RustdocParseOptions,
571 parts: SymbolParts,
572 kind_override: Option<&str>,
573 parsed_docs: Option<&ParsedDocs>,
574) -> Symbol {
575 let SymbolParts {
576 name,
577 qualified_name,
578 symbol_key,
579 signature,
580 params,
581 return_type,
582 type_params,
583 source_path,
584 line,
585 col,
586 } = parts;
587
588 let name_value = if name.is_empty() { None } else { Some(name) };
589 let qualified_value = if qualified_name.is_empty() {
590 None
591 } else {
592 Some(qualified_name)
593 };
594
595 Symbol {
596 id: None,
597 project_id: options.project_id.clone(),
598 language: Some(options.language.clone()),
599 symbol_key,
600 kind: kind_override
601 .map(str::to_string)
602 .or_else(|| inner_kind(item).map(str::to_string)),
603 name: name_value.clone(),
604 qualified_name: qualified_value,
605 display_name: name_value,
606 signature,
607 signature_hash: None,
608 visibility: normalize_visibility(item.visibility.as_ref()),
609 is_static: item_is_static(item),
610 is_async: item_is_async(item),
611 is_const: item_is_const(item),
612 is_deprecated: item.deprecation.is_some().then_some(true),
613 since: item.deprecation.as_ref().and_then(|dep| dep.since.clone()),
614 stability: None,
615 source_path,
616 line,
617 col,
618 return_type,
619 params,
620 type_params,
621 attributes: parse_attrs(&item.attrs),
622 source_ids: vec![SourceId {
623 kind: "rustdoc_id".to_string(),
624 value: item.id.to_string(),
625 }],
626 doc_summary: parsed_docs.and_then(|docs| docs.summary.clone()),
627 extra: None,
628 }
629}
630
631fn parse_attrs(attrs: &[Value]) -> Vec<AttributeRef> {
632 attrs
633 .iter()
634 .filter_map(|attr| {
635 let raw = attr
637 .get("other")
638 .and_then(Value::as_str)
639 .unwrap_or_else(|| attr.as_str().unwrap_or(""));
640 let raw = raw.trim();
641 if raw.is_empty() {
642 return None;
643 }
644 let inner = raw
646 .strip_prefix("#![")
647 .or_else(|| raw.strip_prefix("#["))
648 .and_then(|rest| rest.strip_suffix(']'))
649 .unwrap_or(raw)
650 .trim();
651 if inner.is_empty() {
652 return None;
653 }
654 let name = inner
656 .find(['(', '='])
657 .map_or(inner, |pos| inner[..pos].trim());
658 Some(AttributeRef {
659 name: name.to_string(),
660 args: Vec::new(),
661 target: None,
662 })
663 })
664 .collect()
665}
666
667fn normalize_visibility(visibility: Option<&Value>) -> Option<String> {
668 let value = visibility?;
669 match value {
670 Value::String(text) => Some(text.clone()),
671 Value::Object(map) => {
672 if map.contains_key("public") {
673 Some("public".to_string())
674 } else if map.contains_key("default") {
675 Some("default".to_string())
676 } else if map.contains_key("crate") {
677 Some("crate".to_string())
678 } else if let Some(restricted) = map.get("restricted") {
679 Some(restricted.get("path").and_then(Value::as_str).map_or_else(
680 || "restricted".to_string(),
681 |path| format!("restricted({path})"),
682 ))
683 } else {
684 Some(value.to_string())
685 }
686 }
687 _ => Some(value.to_string()),
688 }
689}
690
691fn build_doc_block(
692 options: &RustdocParseOptions,
693 symbol_key: String,
694 parsed_docs: ParsedDocs,
695 raw_docs: &str,
696) -> DocBlock {
697 DocBlock {
698 id: None,
699 project_id: options.project_id.clone(),
700 ingest_id: options.ingest_id.clone(),
701 symbol_key: Some(symbol_key),
702 language: Some(options.language.clone()),
703 source_kind: Some(options.source_kind.clone()),
704 doc_hash: None,
705 summary: parsed_docs.summary,
706 remarks: parsed_docs.remarks,
707 returns: parsed_docs.returns,
708 value: parsed_docs.value,
709 params: parsed_docs.params,
710 type_params: parsed_docs.type_params,
711 exceptions: Vec::new(),
712 examples: parsed_docs.examples,
713 notes: parsed_docs.notes,
714 warnings: parsed_docs.warnings,
715 safety: parsed_docs.safety,
716 panics: parsed_docs.panics,
717 errors: parsed_docs.errors,
718 see_also: parsed_docs.see_also,
719 deprecated: parsed_docs.deprecated,
720 inherit_doc: None,
721 sections: parsed_docs.sections,
722 raw: Some(raw_docs.to_string()),
723 extra: None,
724 }
725}
726
727#[derive(Debug)]
728struct ParsedDocs {
729 summary: Option<String>,
730 remarks: Option<String>,
731 returns: Option<String>,
732 value: Option<String>,
733 errors: Option<String>,
734 panics: Option<String>,
735 safety: Option<String>,
736 deprecated: Option<String>,
737 params: Vec<DocParam>,
738 type_params: Vec<DocTypeParam>,
739 examples: Vec<DocExample>,
740 notes: Vec<String>,
741 warnings: Vec<String>,
742 see_also: Vec<SeeAlso>,
743 sections: Vec<DocSection>,
744}
745
746fn build_id_path_map(crate_doc: &RustdocCrate, root_crate_id: u64) -> HashMap<u64, String> {
747 let mut map = HashMap::new();
748 for (id, path) in &crate_doc.paths {
749 if path.crate_id != root_crate_id {
750 continue;
751 }
752 if let Ok(parsed_id) = id.parse::<u64>() {
753 let joined = path.path.join("::");
754 map.insert(parsed_id, joined);
755 }
756 }
757 map
758}
759
760fn inner_kind(item: &RustdocItem) -> Option<&str> {
761 item.inner.keys().next().map(String::as_str)
762}
763
764fn is_inner_kind(item: &RustdocItem, kind: &str) -> bool {
765 matches!(inner_kind(item), Some(found) if found == kind)
766}
767
768fn module_items(item: &RustdocItem) -> Vec<u64> {
769 item.inner
770 .get("module")
771 .and_then(|value| value.get("items"))
772 .and_then(Value::as_array)
773 .map(|items| extract_ids(items))
774 .unwrap_or_default()
775}
776
777fn struct_kind_fields(kind: &Value) -> Vec<u64> {
778 if let Some(plain) = kind.get("plain") {
779 return plain
780 .get("fields")
781 .and_then(Value::as_array)
782 .map(|items| extract_ids(items))
783 .unwrap_or_default();
784 }
785 if let Some(tuple) = kind.get("tuple") {
786 return tuple
787 .get("fields")
788 .and_then(Value::as_array)
789 .map(|items| extract_ids(items))
790 .unwrap_or_default();
791 }
792 Vec::new()
793}
794
795fn extract_ids(items: &[Value]) -> Vec<u64> {
796 items.iter().filter_map(Value::as_u64).collect()
797}
798
799fn parse_signature(
800 item: &RustdocItem,
801 state: &ParserState<'_>,
802 name: &str,
803) -> (Vec<Param>, Option<TypeRef>, Option<String>) {
804 let Some(inner) = item.inner.get("function") else {
805 let return_type = match inner_kind(item) {
806 Some("constant") => item
807 .inner
808 .get("constant")
809 .and_then(|value| value.get("type"))
810 .map(|ty| type_to_ref(ty, state)),
811 Some("static") => item
812 .inner
813 .get("static")
814 .and_then(|value| value.get("type"))
815 .map(|ty| type_to_ref(ty, state)),
816 Some("struct_field") => item
817 .inner
818 .get("struct_field")
819 .map(|ty| type_to_ref(ty, state)),
820 Some("type_alias") => item
821 .inner
822 .get("type_alias")
823 .and_then(|value| value.get("type"))
824 .map(|ty| type_to_ref(ty, state)),
825 _ => None,
826 };
827 return (Vec::new(), return_type, None);
828 };
829
830 let Some(sig) = inner.get("sig") else {
831 return (Vec::new(), None, None);
832 };
833
834 let mut params = Vec::new();
835 if let Some(inputs) = sig.get("inputs").and_then(Value::as_array) {
836 for input in inputs {
837 let Some(pair) = input.as_array() else {
838 continue;
839 };
840 if pair.len() != 2 {
841 continue;
842 }
843 let name = pair[0].as_str().unwrap_or("").to_string();
844 let ty = type_to_ref(&pair[1], state);
845 params.push(Param {
846 name,
847 type_ref: Some(ty),
848 default_value: None,
849 is_optional: None,
850 });
851 }
852 }
853
854 let return_type = sig.get("output").and_then(|output| {
855 if output.is_null() {
856 None
857 } else {
858 Some(type_to_ref(output, state))
859 }
860 });
861
862 let signature = format_function_signature(name, ¶ms, return_type.as_ref());
863 (params, return_type, Some(signature))
864}
865
866fn parse_type_params(item: &RustdocItem) -> Vec<TypeParam> {
867 let Some(kind) = inner_kind(item) else {
868 return Vec::new();
869 };
870 let generics = match kind {
871 "function" => item
872 .inner
873 .get("function")
874 .and_then(|value| value.get("generics")),
875 "struct" => item
876 .inner
877 .get("struct")
878 .and_then(|value| value.get("generics")),
879 "enum" => item
880 .inner
881 .get("enum")
882 .and_then(|value| value.get("generics")),
883 "trait" => item
884 .inner
885 .get("trait")
886 .and_then(|value| value.get("generics")),
887 "type_alias" => item
888 .inner
889 .get("type_alias")
890 .and_then(|value| value.get("generics")),
891 _ => None,
892 };
893
894 let Some(generics) = generics else {
895 return Vec::new();
896 };
897 let Some(params) = generics.get("params").and_then(Value::as_array) else {
898 return Vec::new();
899 };
900
901 let mut output = Vec::new();
902 for param in params {
903 let Some(name) = param.get("name").and_then(Value::as_str) else {
904 continue;
905 };
906 let mut constraints = Vec::new();
907 if let Some(bounds) = param
908 .get("kind")
909 .and_then(|kind| kind.get("type"))
910 .and_then(|type_info| type_info.get("bounds"))
911 .and_then(Value::as_array)
912 {
913 for bound in bounds {
914 if let Some(path) = bound
915 .get("trait_bound")
916 .and_then(|trait_bound| trait_bound.get("trait"))
917 .and_then(|trait_path| trait_path.get("path"))
918 .and_then(Value::as_str)
919 {
920 constraints.push(path.to_string());
921 }
922 }
923 }
924 output.push(TypeParam {
925 name: name.to_string(),
926 constraints,
927 });
928 }
929 output
930}
931
932fn item_is_async(item: &RustdocItem) -> Option<bool> {
933 item.inner
934 .get("function")
935 .and_then(|value| value.get("header"))
936 .and_then(|value| value.get("is_async"))
937 .and_then(Value::as_bool)
938 .filter(|is_async| *is_async)
939 .map(|_| true)
940}
941
942fn item_is_const(item: &RustdocItem) -> Option<bool> {
943 if matches!(inner_kind(item), Some("constant")) {
944 return Some(true);
945 }
946 item.inner
947 .get("function")
948 .and_then(|value| value.get("header"))
949 .and_then(|value| value.get("is_const"))
950 .and_then(Value::as_bool)
951 .filter(|is_const| *is_const)
952 .map(|_| true)
953}
954
955fn item_is_static(item: &RustdocItem) -> Option<bool> {
956 matches!(inner_kind(item), Some("static")).then_some(true)
957}
958fn format_function_signature(name: &str, params: &[Param], output: Option<&TypeRef>) -> String {
959 let params = params
960 .iter()
961 .map(
962 |param| match param.type_ref.as_ref().and_then(|ty| ty.display.as_ref()) {
963 Some(ty) if !param.name.is_empty() => format!("{}: {ty}", param.name),
964 Some(ty) => ty.clone(),
965 None => param.name.clone(),
966 },
967 )
968 .collect::<Vec<_>>()
969 .join(", ");
970 let mut sig = format!("fn {name}({params})");
971 if let Some(output) = output.and_then(|ty| ty.display.as_ref())
972 && output != "()"
973 {
974 sig.push_str(" -> ");
975 sig.push_str(output);
976 }
977 sig
978}
979
980fn type_to_ref(value: &Value, state: &ParserState<'_>) -> TypeRef {
981 let display = type_to_string(value, state).unwrap_or_else(|| "<unknown>".to_string());
982 let symbol_key = type_symbol_key(value, state);
983 TypeRef {
984 display: Some(display.clone()),
985 canonical: Some(display),
986 language: Some(state.options.language.clone()),
987 symbol_key,
988 generics: Vec::new(),
989 modifiers: Vec::new(),
990 }
991}
992
993fn type_symbol_key(value: &Value, state: &ParserState<'_>) -> Option<String> {
994 let resolved = value.get("resolved_path")?;
995 let id = resolved.get("id").and_then(Value::as_u64)?;
996 let path = state.id_to_path.get(&id)?.clone();
997 Some(make_symbol_key("rust", &state.options.project_id, &path))
998}
999
1000fn type_to_string(value: &Value, state: &ParserState<'_>) -> Option<String> {
1001 primitive_type(value)
1002 .or_else(|| generic_type(value))
1003 .or_else(|| resolved_path_type(value, state))
1004 .or_else(|| borrowed_ref_type(value, state))
1005 .or_else(|| raw_pointer_type(value, state))
1006 .or_else(|| tuple_type(value, state))
1007 .or_else(|| slice_type(value, state))
1008 .or_else(|| array_type(value, state))
1009 .or_else(|| impl_trait_type(value, state))
1010 .or_else(|| dyn_trait_type(value, state))
1011 .or_else(|| qualified_path_type(value, state))
1012 .or_else(|| function_pointer_type(value, state))
1013}
1014
1015fn primitive_type(value: &Value) -> Option<String> {
1016 value
1017 .get("primitive")
1018 .and_then(Value::as_str)
1019 .map(str::to_string)
1020}
1021
1022fn generic_type(value: &Value) -> Option<String> {
1023 value
1024 .get("generic")
1025 .and_then(Value::as_str)
1026 .map(str::to_string)
1027}
1028
1029fn resolved_path_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1030 let resolved = value.get("resolved_path")?;
1031 let path = resolved.get("path").and_then(Value::as_str)?;
1032 let args = resolved.get("args");
1033 Some(format!("{}{}", path, format_type_args(args, state)))
1034}
1035
1036fn borrowed_ref_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1037 let borrowed = value.get("borrowed_ref")?;
1038 let is_mut = borrowed
1039 .get("is_mutable")
1040 .and_then(Value::as_bool)
1041 .unwrap_or(false);
1042 let inner = borrowed
1043 .get("type")
1044 .and_then(|inner| type_to_string(inner, state))?;
1045 Some(if is_mut {
1046 format!("&mut {inner}")
1047 } else {
1048 format!("&{inner}")
1049 })
1050}
1051
1052fn raw_pointer_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1053 let raw = value.get("raw_pointer")?;
1054 let is_mut = raw
1055 .get("is_mutable")
1056 .and_then(Value::as_bool)
1057 .unwrap_or(false);
1058 let inner = raw
1059 .get("type")
1060 .and_then(|inner| type_to_string(inner, state))?;
1061 Some(if is_mut {
1062 format!("*mut {inner}")
1063 } else {
1064 format!("*const {inner}")
1065 })
1066}
1067
1068fn tuple_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1069 let tuple = value.get("tuple").and_then(Value::as_array)?;
1070 let parts = tuple
1071 .iter()
1072 .filter_map(|inner| type_to_string(inner, state))
1073 .collect::<Vec<_>>()
1074 .join(", ");
1075 Some(format!("({parts})"))
1076}
1077
1078fn slice_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1079 let slice = value.get("slice")?;
1080 let inner = type_to_string(slice, state)?;
1081 Some(format!("[{inner}]"))
1082}
1083
1084fn array_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1085 let array = value.get("array")?;
1086 let inner = array
1087 .get("type")
1088 .and_then(|inner| type_to_string(inner, state))?;
1089 let len = array.get("len").and_then(Value::as_str).unwrap_or("");
1090 if len.is_empty() {
1091 Some(format!("[{inner}]"))
1092 } else {
1093 Some(format!("[{inner}; {len}]"))
1094 }
1095}
1096
1097fn impl_trait_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1098 let impl_trait = value.get("impl_trait").and_then(Value::as_array)?;
1099 let bounds = impl_trait
1100 .iter()
1101 .filter_map(|bound| trait_bound_to_string(bound, state))
1102 .collect::<Vec<_>>()
1103 .join(" + ");
1104 if bounds.is_empty() {
1105 Some("impl".to_string())
1106 } else {
1107 Some(format!("impl {bounds}"))
1108 }
1109}
1110
1111fn dyn_trait_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1112 let dyn_trait = value.get("dyn_trait")?;
1113 let traits = dyn_trait
1114 .get("traits")
1115 .and_then(Value::as_array)
1116 .map(|items| {
1117 items
1118 .iter()
1119 .filter_map(|bound| trait_bound_to_string(bound, state))
1120 .collect::<Vec<_>>()
1121 .join(" + ")
1122 })
1123 .unwrap_or_default();
1124 if traits.is_empty() {
1125 Some("dyn".to_string())
1126 } else {
1127 Some(format!("dyn {traits}"))
1128 }
1129}
1130
1131fn qualified_path_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1132 let qualified = value.get("qualified_path")?;
1133 let name = qualified.get("name").and_then(Value::as_str).unwrap_or("");
1134 let self_type = qualified
1135 .get("self_type")
1136 .and_then(|inner| type_to_string(inner, state))
1137 .unwrap_or_default();
1138 let trait_name = qualified
1139 .get("trait")
1140 .and_then(|inner| inner.get("path"))
1141 .and_then(Value::as_str)
1142 .unwrap_or("");
1143 if !trait_name.is_empty() {
1144 Some(format!("<{self_type} as {trait_name}>::{name}"))
1145 } else if !self_type.is_empty() {
1146 Some(format!("{self_type}::{name}"))
1147 } else {
1148 None
1149 }
1150}
1151
1152fn function_pointer_type(value: &Value, state: &ParserState<'_>) -> Option<String> {
1153 let fn_pointer = value.get("function_pointer")?;
1154 let decl = fn_pointer.get("decl")?;
1155 let params = decl
1156 .get("inputs")
1157 .and_then(Value::as_array)
1158 .map(|inputs| {
1159 inputs
1160 .iter()
1161 .filter_map(|pair| pair.as_array())
1162 .filter_map(|pair| pair.get(1))
1163 .filter_map(|param| type_to_string(param, state))
1164 .collect::<Vec<_>>()
1165 .join(", ")
1166 })
1167 .unwrap_or_default();
1168 let output = decl
1169 .get("output")
1170 .and_then(|output| type_to_string(output, state))
1171 .unwrap_or_else(|| "()".to_string());
1172 Some(format!("fn({params}) -> {output}"))
1173}
1174
1175fn format_type_args(args: Option<&Value>, state: &ParserState<'_>) -> String {
1176 let Some(args) = args else {
1177 return String::new();
1178 };
1179 let Some(angle) = args.get("angle_bracketed") else {
1180 return String::new();
1181 };
1182 let Some(items) = angle.get("args").and_then(Value::as_array) else {
1183 return String::new();
1184 };
1185 let mut rendered = Vec::new();
1186 for item in items {
1187 if let Some(ty) = item
1188 .get("type")
1189 .and_then(|inner| type_to_string(inner, state))
1190 {
1191 rendered.push(ty);
1192 } else if let Some(lifetime) = item.get("lifetime").and_then(Value::as_str) {
1193 rendered.push(lifetime.to_string());
1194 } else if let Some(const_val) = item.get("const").and_then(Value::as_str) {
1195 rendered.push(const_val.to_string());
1196 }
1197 }
1198 if rendered.is_empty() {
1199 String::new()
1200 } else {
1201 format!("<{}>", rendered.join(", "))
1202 }
1203}
1204
1205fn trait_bound_to_string(value: &Value, state: &ParserState<'_>) -> Option<String> {
1206 let trait_bound = value.get("trait_bound")?;
1207 let trait_path = trait_bound.get("trait")?;
1208 let path = trait_path.get("path").and_then(Value::as_str)?;
1209 let args = trait_path.get("args");
1210 Some(format!("{}{}", path, format_type_args(args, state)))
1211}
1212fn parse_markdown_docs(raw: &str) -> ParsedDocs {
1213 let normalized = raw.replace("\r\n", "\n");
1214 let (preamble, sections) = split_sections(&normalized);
1215 let (summary, remarks) = split_summary_remarks(&preamble);
1216
1217 let mut parsed = ParsedDocs {
1218 summary,
1219 remarks,
1220 returns: None,
1221 value: None,
1222 errors: None,
1223 panics: None,
1224 safety: None,
1225 deprecated: None,
1226 params: Vec::new(),
1227 type_params: Vec::new(),
1228 examples: Vec::new(),
1229 notes: Vec::new(),
1230 warnings: Vec::new(),
1231 see_also: Vec::new(),
1232 sections: Vec::new(),
1233 };
1234
1235 for (title, body) in sections {
1236 let normalized_title = title.trim().to_ascii_lowercase();
1237 let trimmed_body = body.trim();
1238 if trimmed_body.is_empty() {
1239 continue;
1240 }
1241 match normalized_title.as_str() {
1242 "errors" => parsed.errors = Some(trimmed_body.to_string()),
1243 "panics" => parsed.panics = Some(trimmed_body.to_string()),
1244 "safety" => parsed.safety = Some(trimmed_body.to_string()),
1245 "returns" => parsed.returns = Some(trimmed_body.to_string()),
1246 "value" => parsed.value = Some(trimmed_body.to_string()),
1247 "deprecated" => parsed.deprecated = Some(trimmed_body.to_string()),
1248 "examples" | "example" => parsed.examples = extract_examples(trimmed_body),
1249 "notes" | "note" => parsed.notes.push(trimmed_body.to_string()),
1250 "warnings" | "warning" => parsed.warnings.push(trimmed_body.to_string()),
1251 "see also" | "seealso" | "see-also" => {
1252 parsed.see_also = parse_see_also_section(trimmed_body);
1253 }
1254 "arguments" | "args" | "parameters" | "params" => {
1255 parsed.params = parse_param_section(trimmed_body);
1256 }
1257 "type parameters" | "type params" | "typeparam" | "typeparams" => {
1258 parsed.type_params = parse_type_param_section(trimmed_body);
1259 }
1260 _ => parsed.sections.push(DocSection {
1261 title,
1262 body: trimmed_body.to_string(),
1263 }),
1264 }
1265 }
1266
1267 parsed
1268}
1269
1270fn parse_see_also_section(body: &str) -> Vec<SeeAlso> {
1271 let mut entries = Vec::new();
1272 for line in body.lines() {
1273 let trimmed = line.trim();
1274 if trimmed.is_empty() {
1275 continue;
1276 }
1277 let item = trimmed
1278 .strip_prefix("- ")
1279 .or_else(|| trimmed.strip_prefix("* "))
1280 .unwrap_or(trimmed);
1281 if let Some(see) = parse_see_also_line(item) {
1282 entries.push(see);
1283 }
1284 }
1285 if entries.is_empty()
1286 && let Some(see) = parse_see_also_line(body.trim())
1287 {
1288 entries.push(see);
1289 }
1290 entries
1291}
1292
1293fn parse_see_also_line(text: &str) -> Option<SeeAlso> {
1294 let trimmed = text.trim();
1295 if trimmed.is_empty() {
1296 return None;
1297 }
1298 if let Some((label, target)) = parse_markdown_link(trimmed) {
1299 return Some(SeeAlso {
1300 label: Some(label),
1301 target,
1302 target_kind: Some("markdown".to_string()),
1303 });
1304 }
1305 Some(SeeAlso {
1306 label: None,
1307 target: trimmed.to_string(),
1308 target_kind: Some("text".to_string()),
1309 })
1310}
1311
1312fn parse_markdown_link(text: &str) -> Option<(String, String)> {
1313 let start = text.find('[')?;
1314 let remainder = &text[start + 1..];
1315 let mid = remainder.find("](")?;
1316 let label = remainder[..mid].trim();
1317 let tail = &remainder[mid + 2..];
1318 let end = tail.find(')')?;
1319 let target = tail[..end].trim();
1320 if label.is_empty() || target.is_empty() {
1321 return None;
1322 }
1323 Some((label.to_string(), target.to_string()))
1324}
1325
1326fn split_sections(doc: &str) -> (String, Vec<(String, String)>) {
1327 let mut preamble = Vec::new();
1328 let mut sections = Vec::new();
1329 let mut current_title: Option<String> = None;
1330 let mut current_body = Vec::new();
1331 let mut in_code = false;
1332
1333 for line in doc.lines() {
1334 let trimmed = line.trim_start();
1335 if trimmed.starts_with("```") {
1336 in_code = !in_code;
1337 if current_title.is_some() {
1338 current_body.push(line.to_string());
1339 } else {
1340 preamble.push(line.to_string());
1341 }
1342 continue;
1343 }
1344 if !in_code && let Some(title) = parse_heading(trimmed) {
1345 if let Some(active) = current_title.take() {
1346 sections.push((active, current_body.join("\n").trim().to_string()));
1347 current_body.clear();
1348 }
1349 current_title = Some(title);
1350 continue;
1351 }
1352 if current_title.is_some() {
1353 current_body.push(line.to_string());
1354 } else {
1355 preamble.push(line.to_string());
1356 }
1357 }
1358
1359 if let Some(active) = current_title.take() {
1360 sections.push((active, current_body.join("\n").trim().to_string()));
1361 }
1362
1363 (preamble.join("\n").trim().to_string(), sections)
1364}
1365
1366fn parse_heading(line: &str) -> Option<String> {
1367 let trimmed = line.trim();
1368 if !trimmed.starts_with('#') {
1369 return None;
1370 }
1371 let hash_count = trimmed.chars().take_while(|ch| *ch == '#').count();
1372 if hash_count == 0 {
1373 return None;
1374 }
1375 let rest = trimmed[hash_count..].trim_start();
1376 if rest.is_empty() {
1377 None
1378 } else {
1379 Some(rest.to_string())
1380 }
1381}
1382
1383fn split_summary_remarks(preamble: &str) -> (Option<String>, Option<String>) {
1384 let mut paragraphs = preamble
1385 .split("\n\n")
1386 .map(str::trim)
1387 .filter(|part| !part.is_empty());
1388 let summary = paragraphs.next().map(str::to_string);
1389 let rest = paragraphs.collect::<Vec<_>>().join("\n\n");
1390 let remarks = if rest.is_empty() { None } else { Some(rest) };
1391 (summary, remarks)
1392}
1393
1394fn extract_examples(body: &str) -> Vec<DocExample> {
1395 let mut examples = Vec::new();
1396 let mut in_code = false;
1397 let mut current_lang: Option<String> = None;
1398 let mut current_code = Vec::new();
1399
1400 for line in body.lines() {
1401 let trimmed = line.trim_start();
1402 if trimmed.starts_with("```") {
1403 if in_code {
1404 let code = current_code.join("\n");
1405 if !code.trim().is_empty() {
1406 examples.push(DocExample {
1407 lang: current_lang.take(),
1408 code: Some(code),
1409 caption: None,
1410 });
1411 }
1412 current_code.clear();
1413 in_code = false;
1414 } else {
1415 let lang = trimmed.trim_start_matches("```").trim();
1416 current_lang = if lang.is_empty() {
1417 None
1418 } else {
1419 Some(lang.to_string())
1420 };
1421 in_code = true;
1422 }
1423 continue;
1424 }
1425 if in_code {
1426 current_code.push(line.to_string());
1427 }
1428 }
1429
1430 if !examples.is_empty() {
1431 return examples;
1432 }
1433 let trimmed = body.trim();
1434 if trimmed.is_empty() {
1435 Vec::new()
1436 } else {
1437 vec![DocExample {
1438 lang: None,
1439 code: Some(trimmed.to_string()),
1440 caption: None,
1441 }]
1442 }
1443}
1444
1445fn parse_param_section(body: &str) -> Vec<DocParam> {
1446 let mut params = Vec::new();
1447 for line in body.lines() {
1448 let trimmed = line.trim();
1449 if !(trimmed.starts_with('-') || trimmed.starts_with('*')) {
1450 continue;
1451 }
1452 let item = trimmed.trim_start_matches(['-', '*']).trim();
1453 if item.is_empty() {
1454 continue;
1455 }
1456 if let Some((name, description)) = split_param_item(item) {
1457 params.push(DocParam {
1458 name,
1459 description,
1460 type_ref: None,
1461 });
1462 }
1463 }
1464 params
1465}
1466
1467fn parse_type_param_section(body: &str) -> Vec<DocTypeParam> {
1468 let mut params = Vec::new();
1469 for line in body.lines() {
1470 let trimmed = line.trim();
1471 if !(trimmed.starts_with('-') || trimmed.starts_with('*')) {
1472 continue;
1473 }
1474 let item = trimmed.trim_start_matches(['-', '*']).trim();
1475 if item.is_empty() {
1476 continue;
1477 }
1478 if let Some((name, description)) = split_param_item(item) {
1479 params.push(DocTypeParam { name, description });
1480 }
1481 }
1482 params
1483}
1484
1485fn split_param_item(item: &str) -> Option<(String, Option<String>)> {
1486 let (name, description) = if let Some((name, rest)) = item.split_once(':') {
1487 (name, Some(rest))
1488 } else if let Some((name, rest)) = item.split_once(" - ") {
1489 (name, Some(rest))
1490 } else {
1491 (item, None)
1492 };
1493
1494 let name = name.trim().trim_matches('`');
1495 if name.is_empty() {
1496 return None;
1497 }
1498 let description = description
1499 .map(|rest| rest.trim().to_string())
1500 .filter(|s| !s.is_empty());
1501 Some((name.to_string(), description))
1502}
1503
1504#[cfg(test)]
1505mod tests {
1506 use std::collections::HashSet;
1507
1508 use super::{make_unique_symbol_key, parse_markdown_docs};
1509
1510 #[test]
1511 fn parse_markdown_docs_extracts_see_also() {
1512 let docs = "Summary.\n\n# See Also\n- [Foo](crate::Foo)\n- Bar";
1513 let parsed = parse_markdown_docs(docs);
1514
1515 assert_eq!(parsed.see_also.len(), 2);
1516 assert_eq!(parsed.see_also[0].label.as_deref(), Some("Foo"));
1517 assert_eq!(parsed.see_also[0].target, "crate::Foo");
1518 assert_eq!(parsed.see_also[1].label.as_deref(), None);
1519 assert_eq!(parsed.see_also[1].target, "Bar");
1520 }
1521
1522 #[test]
1523 fn make_unique_symbol_key_suffixes_collisions() {
1524 let mut used = HashSet::new();
1525 let base =
1526 make_unique_symbol_key(&mut used, "docx_core", "docx_core::ControlError::from", 10);
1527 let collision =
1528 make_unique_symbol_key(&mut used, "docx_core", "docx_core::ControlError::from", 11);
1529
1530 assert_eq!(base, "rust|docx_core|docx_core::ControlError::from");
1531 assert_eq!(collision, "rust|docx_core|docx_core::ControlError::from#11");
1532 }
1533}