Skip to main content

code_moniker_core/lang/
extractor.rs

1//! Per-language extractor contract.
2//!
3//! Every supported language exposes a zero-sized `Lang` type implementing
4//! `LangExtractor`. The trait carries no dispatch overhead — it is the
5//! formal contract every extractor must satisfy.
6//!
7//! `assert_conformance::<Lang>(graph, anchor)` validates that a graph
8//! produced by an extractor respects the contract. Each extractor's
9//! `#[cfg(test)] extract_default` helper invokes it on every fixture.
10//!
11//! The trait also exposes `declare` / `to_spec` default impls that delegate
12//! to `declare::*` after validating the spec/graph carries this
13//! language's `LANG_TAG`. The dynamic-dispatch SQL entry points
14//! (`code_graph_declare`, `code_graph_to_spec`) keep using the free
15//! functions in `declare`; the typed methods on the trait give Rust
16//! callers a compile-time-typed handle to the same lifecycle.
17
18use crate::core::code_graph::CodeGraph;
19use crate::core::moniker::Moniker;
20use crate::declare::{DeclareError, SerializeError, declare_from_json_value, graph_to_spec};
21
22pub trait LangExtractor {
23	type Presets: Default;
24
25	const LANG_TAG: &'static str;
26
27	const ALLOWED_KINDS: &'static [&'static str];
28
29	const ALLOWED_VISIBILITIES: &'static [&'static str];
30
31	fn extract(
32		uri: &str,
33		source: &str,
34		anchor: &Moniker,
35		deep: bool,
36		presets: &Self::Presets,
37	) -> CodeGraph;
38
39	fn declare(spec: &serde_json::Value) -> Result<CodeGraph, DeclareError> {
40		check_spec_lang::<Self>(spec)?;
41		declare_from_json_value(spec)
42	}
43
44	fn to_spec(graph: &CodeGraph) -> Result<serde_json::Value, SerializeError> {
45		check_graph_lang::<Self>(graph)?;
46		graph_to_spec(graph)
47	}
48}
49
50fn check_spec_lang<E: LangExtractor + ?Sized>(
51	spec: &serde_json::Value,
52) -> Result<(), DeclareError> {
53	let actual = spec
54		.get("lang")
55		.and_then(|v| v.as_str())
56		.ok_or(DeclareError::MissingField {
57			path: "$".to_string(),
58			field: "lang",
59		})?;
60	if actual != E::LANG_TAG {
61		return Err(DeclareError::LangMismatch {
62			expected: E::LANG_TAG,
63			actual: actual.to_string(),
64		});
65	}
66	Ok(())
67}
68
69fn check_graph_lang<E: LangExtractor + ?Sized>(graph: &CodeGraph) -> Result<(), SerializeError> {
70	let root = graph.root();
71	let view = root.as_view();
72	let lang_bytes = view
73		.lang_segment()
74		.ok_or_else(|| SerializeError::RootHasNoLangSegment {
75			root: format!("{root:?}"),
76		})?;
77	let lang_str = std::str::from_utf8(lang_bytes).map_err(|_| SerializeError::Utf8 {
78		what: "lang segment",
79	})?;
80	if lang_str != E::LANG_TAG {
81		return Err(SerializeError::LangMismatch {
82			expected: E::LANG_TAG,
83			actual: lang_str.to_string(),
84		});
85	}
86	Ok(())
87}
88
89mod conformance {
90	use super::LangExtractor;
91	use crate::core::code_graph::{CodeGraph, assert_local_refs_closed};
92	use crate::core::kinds::{
93		BIND_IMPORT, BIND_INJECT, BIND_LOCAL, BIND_NONE, KIND_COMMENT, KIND_LOCAL, KIND_MODULE,
94		KIND_PARAM, ORIGIN_EXTRACTED, REF_ANNOTATES, REF_CALLS, REF_DI_REGISTER, REF_DI_REQUIRE,
95		REF_EXTENDS, REF_IMPLEMENTS, REF_IMPORTS_MODULE, REF_IMPORTS_SYMBOL, REF_INSTANTIATES,
96		REF_METHOD_CALL, REF_READS, REF_REEXPORTS, REF_USES_TYPE, VIS_NONE,
97	};
98	use crate::core::moniker::Moniker;
99
100	const INTERNAL_KINDS: &[&[u8]] = &[KIND_MODULE, KIND_LOCAL, KIND_PARAM, KIND_COMMENT];
101
102	pub fn assert_conformance<E: LangExtractor>(graph: &CodeGraph, anchor: &Moniker) {
103		assert_root_under_anchor::<E>(graph, anchor);
104		for d in graph.defs() {
105			assert_kind_in_profile::<E>(d.moniker.as_bytes(), &d.kind);
106			assert_visibility_in_profile::<E>(d.moniker.as_bytes(), &d.visibility);
107			assert_kind_matches_moniker_last_segment(&d.moniker, &d.kind);
108			assert_origin_extracted(&d.moniker, &d.origin);
109		}
110		for r in graph.refs() {
111			assert_ref_binding_consistent(&r.kind, &r.binding);
112		}
113		assert_local_refs_closed(graph);
114	}
115
116	fn assert_root_under_anchor<E: LangExtractor>(graph: &CodeGraph, anchor: &Moniker) {
117		let root = graph.root();
118		let root_view = root.as_view();
119		assert!(
120			anchor.as_view().is_ancestor_of(&root_view) || root.as_bytes() == anchor.as_bytes(),
121			"contract violation: root {root:?} is not anchored under {anchor:?}"
122		);
123		let lang = root_view.lang_segment().unwrap_or_else(|| {
124			panic!(
125				"contract violation: root {:?} has no `lang:` segment (lang={:?} expected)",
126				root,
127				E::LANG_TAG
128			)
129		});
130		assert_eq!(
131			lang,
132			E::LANG_TAG.as_bytes(),
133			"contract violation: root carries lang:{} but extractor LANG_TAG={}",
134			String::from_utf8_lossy(lang),
135			E::LANG_TAG
136		);
137	}
138
139	fn assert_kind_in_profile<E: LangExtractor>(moniker_bytes: &[u8], kind: &[u8]) {
140		if INTERNAL_KINDS.contains(&kind) {
141			return;
142		}
143		let kind_str = std::str::from_utf8(kind).unwrap_or_else(|_| {
144			panic!("contract violation: def kind is not UTF-8 ({kind:?})");
145		});
146		assert!(
147			E::ALLOWED_KINDS.contains(&kind_str),
148			"contract violation: def kind `{}` is not in {} profile (moniker bytes: {:?})",
149			kind_str,
150			E::LANG_TAG,
151			moniker_bytes
152		);
153	}
154
155	fn assert_visibility_in_profile<E: LangExtractor>(moniker_bytes: &[u8], vis: &[u8]) {
156		if vis == VIS_NONE {
157			return;
158		}
159		let vis_str = std::str::from_utf8(vis).unwrap_or_else(|_| {
160			panic!("contract violation: def visibility is not UTF-8 ({vis:?})");
161		});
162		assert!(
163			E::ALLOWED_VISIBILITIES.contains(&vis_str),
164			"contract violation: def visibility `{}` is not in {} profile (moniker bytes: {:?})",
165			vis_str,
166			E::LANG_TAG,
167			moniker_bytes
168		);
169	}
170
171	fn assert_kind_matches_moniker_last_segment(moniker: &Moniker, kind: &[u8]) {
172		if INTERNAL_KINDS.contains(&kind) {
173			return;
174		}
175		let last_kind = moniker.last_kind().unwrap_or_else(|| {
176			panic!("contract violation: def has no segments (kind={kind:?})");
177		});
178		assert_eq!(
179			last_kind.as_slice(),
180			kind,
181			"contract violation: def.kind {kind:?} does not match moniker last segment kind {last_kind:?}"
182		);
183	}
184
185	fn assert_origin_extracted(moniker: &Moniker, origin: &[u8]) {
186		assert_eq!(
187			origin, ORIGIN_EXTRACTED,
188			"contract violation: extractor produced def with origin={origin:?} (must be `extracted`); moniker={moniker:?}"
189		);
190	}
191
192	fn assert_ref_binding_consistent(kind: &[u8], binding: &[u8]) {
193		let expected: &[u8] =
194			if kind == REF_IMPORTS_SYMBOL || kind == REF_IMPORTS_MODULE || kind == REF_REEXPORTS {
195				BIND_IMPORT
196			} else if kind == REF_DI_REGISTER || kind == REF_DI_REQUIRE {
197				BIND_INJECT
198			} else if kind == REF_CALLS
199				|| kind == REF_METHOD_CALL
200				|| kind == REF_READS
201				|| kind == REF_USES_TYPE
202				|| kind == REF_INSTANTIATES
203				|| kind == REF_EXTENDS
204				|| kind == REF_IMPLEMENTS
205				|| kind == REF_ANNOTATES
206			{
207				BIND_LOCAL
208			} else {
209				BIND_NONE
210			};
211		assert_eq!(
212			binding,
213			expected,
214			"contract violation: ref kind={:?} got binding={:?} (expected {:?})",
215			std::str::from_utf8(kind).unwrap_or("<non-utf8>"),
216			std::str::from_utf8(binding).unwrap_or("<non-utf8>"),
217			std::str::from_utf8(expected).unwrap_or("<non-utf8>"),
218		);
219	}
220}
221
222#[doc(hidden)]
223pub use conformance::assert_conformance;
224
225#[cfg(test)]
226mod typed_lifecycle_tests {
227	use super::*;
228	use crate::declare::{DeclareError, declare_from_json_value};
229	use serde_json::json;
230
231	#[test]
232	fn typed_declare_rejects_lang_mismatch() {
233		let spec = json!({
234			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
235			"lang": "rs",
236			"symbols": []
237		});
238		let err = <crate::lang::ts::Lang as LangExtractor>::declare(&spec).unwrap_err();
239		assert!(matches!(
240			err,
241			DeclareError::LangMismatch { expected: "ts", .. }
242		));
243	}
244
245	#[test]
246	fn typed_declare_accepts_matching_lang() {
247		let spec = json!({
248			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
249			"lang": "rs",
250			"symbols": []
251		});
252		assert!(<crate::lang::rs::Lang as LangExtractor>::declare(&spec).is_ok());
253	}
254
255	#[test]
256	fn typed_to_spec_rejects_lang_mismatch() {
257		let spec = json!({
258			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
259			"lang": "rs",
260			"symbols": []
261		});
262		let g = declare_from_json_value(&spec).unwrap();
263		let err = <crate::lang::ts::Lang as LangExtractor>::to_spec(&g).unwrap_err();
264		assert!(matches!(
265			err,
266			SerializeError::LangMismatch { expected: "ts", .. }
267		));
268	}
269
270	#[test]
271	fn typed_to_spec_accepts_matching_lang() {
272		let spec = json!({
273			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
274			"lang": "rs",
275			"symbols": []
276		});
277		let g = declare_from_json_value(&spec).unwrap();
278		assert!(<crate::lang::rs::Lang as LangExtractor>::to_spec(&g).is_ok());
279	}
280}