Skip to main content

code_moniker_core/lang/
extractor.rs

1//! Per-language extractor contract.
2//!
3//! Every supported language exposes a zero-sized `Lang` type implementing
4//! `LangExtractor`. The trait carries no dispatch overhead — it is the
5//! formal contract every extractor must satisfy.
6//!
7//! `assert_conformance::<Lang>(graph, anchor)` validates that a graph
8//! produced by an extractor respects the contract. Each extractor's
9//! `#[cfg(test)] extract_default` helper invokes it on every fixture.
10//!
11//! The trait also exposes `declare` / `to_spec` default impls that delegate
12//! to `declare::*` after validating the spec/graph carries this
13//! language's `LANG_TAG`. The dynamic-dispatch SQL entry points
14//! (`code_graph_declare`, `code_graph_to_spec`) keep using the free
15//! functions in `declare`; the typed methods on the trait give Rust
16//! callers a compile-time-typed handle to the same lifecycle.
17
18use crate::core::code_graph::CodeGraph;
19use crate::core::moniker::Moniker;
20use crate::declare::{DeclareError, SerializeError, declare_from_json_value, graph_to_spec};
21
22pub trait LangExtractor {
23	type Presets: Default;
24
25	const LANG_TAG: &'static str;
26
27	const ALLOWED_KINDS: &'static [&'static str];
28
29	const ALLOWED_VISIBILITIES: &'static [&'static str];
30
31	fn extract(
32		uri: &str,
33		source: &str,
34		anchor: &Moniker,
35		deep: bool,
36		presets: &Self::Presets,
37	) -> CodeGraph;
38
39	fn declare(spec: &serde_json::Value) -> Result<CodeGraph, DeclareError> {
40		check_spec_lang::<Self>(spec)?;
41		declare_from_json_value(spec)
42	}
43
44	fn to_spec(graph: &CodeGraph) -> Result<serde_json::Value, SerializeError> {
45		check_graph_lang::<Self>(graph)?;
46		graph_to_spec(graph)
47	}
48}
49
50fn check_spec_lang<E: LangExtractor + ?Sized>(
51	spec: &serde_json::Value,
52) -> Result<(), DeclareError> {
53	let actual = spec
54		.get("lang")
55		.and_then(|v| v.as_str())
56		.ok_or(DeclareError::MissingField {
57			path: "$".to_string(),
58			field: "lang",
59		})?;
60	if actual != E::LANG_TAG {
61		return Err(DeclareError::LangMismatch {
62			expected: E::LANG_TAG,
63			actual: actual.to_string(),
64		});
65	}
66	Ok(())
67}
68
69fn check_graph_lang<E: LangExtractor + ?Sized>(graph: &CodeGraph) -> Result<(), SerializeError> {
70	let root = graph.root();
71	let view = root.as_view();
72	let lang_bytes = view
73		.lang_segment()
74		.ok_or_else(|| SerializeError::RootHasNoLangSegment {
75			root: format!("{root:?}"),
76		})?;
77	let lang_str = std::str::from_utf8(lang_bytes).map_err(|_| SerializeError::Utf8 {
78		what: "lang segment",
79	})?;
80	if lang_str != E::LANG_TAG {
81		return Err(SerializeError::LangMismatch {
82			expected: E::LANG_TAG,
83			actual: lang_str.to_string(),
84		});
85	}
86	Ok(())
87}
88
89#[cfg(test)]
90mod conformance {
91	use super::LangExtractor;
92	use crate::core::code_graph::{CodeGraph, assert_local_refs_closed};
93	use crate::core::kinds::{
94		BIND_IMPORT, BIND_INJECT, BIND_LOCAL, BIND_NONE, KIND_COMMENT, KIND_LOCAL, KIND_MODULE,
95		KIND_PARAM, ORIGIN_EXTRACTED, REF_ANNOTATES, REF_CALLS, REF_DI_REGISTER, REF_DI_REQUIRE,
96		REF_EXTENDS, REF_IMPLEMENTS, REF_IMPORTS_MODULE, REF_IMPORTS_SYMBOL, REF_INSTANTIATES,
97		REF_METHOD_CALL, REF_READS, REF_REEXPORTS, REF_USES_TYPE, VIS_NONE,
98	};
99	use crate::core::moniker::Moniker;
100
101	const INTERNAL_KINDS: &[&[u8]] = &[KIND_MODULE, KIND_LOCAL, KIND_PARAM, KIND_COMMENT];
102
103	pub fn assert_conformance<E: LangExtractor>(graph: &CodeGraph, anchor: &Moniker) {
104		assert_root_under_anchor::<E>(graph, anchor);
105		for d in graph.defs() {
106			assert_kind_in_profile::<E>(d.moniker.as_bytes(), &d.kind);
107			assert_visibility_in_profile::<E>(d.moniker.as_bytes(), &d.visibility);
108			assert_kind_matches_moniker_last_segment(&d.moniker, &d.kind);
109			assert_origin_extracted(&d.moniker, &d.origin);
110		}
111		for r in graph.refs() {
112			assert_ref_binding_consistent(&r.kind, &r.binding);
113		}
114		assert_local_refs_closed(graph);
115	}
116
117	fn assert_root_under_anchor<E: LangExtractor>(graph: &CodeGraph, anchor: &Moniker) {
118		let root = graph.root();
119		let root_view = root.as_view();
120		assert!(
121			anchor.as_view().is_ancestor_of(&root_view) || root.as_bytes() == anchor.as_bytes(),
122			"contract violation: root {root:?} is not anchored under {anchor:?}"
123		);
124		let lang = root_view.lang_segment().unwrap_or_else(|| {
125			panic!(
126				"contract violation: root {:?} has no `lang:` segment (lang={:?} expected)",
127				root,
128				E::LANG_TAG
129			)
130		});
131		assert_eq!(
132			lang,
133			E::LANG_TAG.as_bytes(),
134			"contract violation: root carries lang:{} but extractor LANG_TAG={}",
135			String::from_utf8_lossy(lang),
136			E::LANG_TAG
137		);
138	}
139
140	fn assert_kind_in_profile<E: LangExtractor>(moniker_bytes: &[u8], kind: &[u8]) {
141		if INTERNAL_KINDS.contains(&kind) {
142			return;
143		}
144		let kind_str = std::str::from_utf8(kind).unwrap_or_else(|_| {
145			panic!("contract violation: def kind is not UTF-8 ({kind:?})");
146		});
147		assert!(
148			E::ALLOWED_KINDS.contains(&kind_str),
149			"contract violation: def kind `{}` is not in {} profile (moniker bytes: {:?})",
150			kind_str,
151			E::LANG_TAG,
152			moniker_bytes
153		);
154	}
155
156	fn assert_visibility_in_profile<E: LangExtractor>(moniker_bytes: &[u8], vis: &[u8]) {
157		if vis == VIS_NONE {
158			return;
159		}
160		let vis_str = std::str::from_utf8(vis).unwrap_or_else(|_| {
161			panic!("contract violation: def visibility is not UTF-8 ({vis:?})");
162		});
163		assert!(
164			E::ALLOWED_VISIBILITIES.contains(&vis_str),
165			"contract violation: def visibility `{}` is not in {} profile (moniker bytes: {:?})",
166			vis_str,
167			E::LANG_TAG,
168			moniker_bytes
169		);
170	}
171
172	fn assert_kind_matches_moniker_last_segment(moniker: &Moniker, kind: &[u8]) {
173		if INTERNAL_KINDS.contains(&kind) {
174			return;
175		}
176		let last_kind = moniker.last_kind().unwrap_or_else(|| {
177			panic!("contract violation: def has no segments (kind={kind:?})");
178		});
179		assert_eq!(
180			last_kind.as_slice(),
181			kind,
182			"contract violation: def.kind {kind:?} does not match moniker last segment kind {last_kind:?}"
183		);
184	}
185
186	fn assert_origin_extracted(moniker: &Moniker, origin: &[u8]) {
187		assert_eq!(
188			origin, ORIGIN_EXTRACTED,
189			"contract violation: extractor produced def with origin={origin:?} (must be `extracted`); moniker={moniker:?}"
190		);
191	}
192
193	fn assert_ref_binding_consistent(kind: &[u8], binding: &[u8]) {
194		let expected: &[u8] =
195			if kind == REF_IMPORTS_SYMBOL || kind == REF_IMPORTS_MODULE || kind == REF_REEXPORTS {
196				BIND_IMPORT
197			} else if kind == REF_DI_REGISTER || kind == REF_DI_REQUIRE {
198				BIND_INJECT
199			} else if kind == REF_CALLS
200				|| kind == REF_METHOD_CALL
201				|| kind == REF_READS
202				|| kind == REF_USES_TYPE
203				|| kind == REF_INSTANTIATES
204				|| kind == REF_EXTENDS
205				|| kind == REF_IMPLEMENTS
206				|| kind == REF_ANNOTATES
207			{
208				BIND_LOCAL
209			} else {
210				BIND_NONE
211			};
212		assert_eq!(
213			binding,
214			expected,
215			"contract violation: ref kind={:?} got binding={:?} (expected {:?})",
216			std::str::from_utf8(kind).unwrap_or("<non-utf8>"),
217			std::str::from_utf8(binding).unwrap_or("<non-utf8>"),
218			std::str::from_utf8(expected).unwrap_or("<non-utf8>"),
219		);
220	}
221}
222
223#[cfg(test)]
224pub use conformance::assert_conformance;
225
226#[cfg(test)]
227mod typed_lifecycle_tests {
228	use super::*;
229	use crate::declare::{DeclareError, declare_from_json_value};
230	use serde_json::json;
231
232	#[test]
233	fn typed_declare_rejects_lang_mismatch() {
234		let spec = json!({
235			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
236			"lang": "rs",
237			"symbols": []
238		});
239		let err = <crate::lang::ts::Lang as LangExtractor>::declare(&spec).unwrap_err();
240		assert!(matches!(
241			err,
242			DeclareError::LangMismatch { expected: "ts", .. }
243		));
244	}
245
246	#[test]
247	fn typed_declare_accepts_matching_lang() {
248		let spec = json!({
249			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
250			"lang": "rs",
251			"symbols": []
252		});
253		assert!(<crate::lang::rs::Lang as LangExtractor>::declare(&spec).is_ok());
254	}
255
256	#[test]
257	fn typed_to_spec_rejects_lang_mismatch() {
258		let spec = json!({
259			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
260			"lang": "rs",
261			"symbols": []
262		});
263		let g = declare_from_json_value(&spec).unwrap();
264		let err = <crate::lang::ts::Lang as LangExtractor>::to_spec(&g).unwrap_err();
265		assert!(matches!(
266			err,
267			SerializeError::LangMismatch { expected: "ts", .. }
268		));
269	}
270
271	#[test]
272	fn typed_to_spec_accepts_matching_lang() {
273		let spec = json!({
274			"root": "code+moniker://app/srcset:main/lang:rs/module:foo",
275			"lang": "rs",
276			"symbols": []
277		});
278		let g = declare_from_json_value(&spec).unwrap();
279		assert!(<crate::lang::rs::Lang as LangExtractor>::to_spec(&g).is_ok());
280	}
281}