Skip to main content

code_moniker_core/lang/
mod.rs

1pub mod build_manifest;
2pub mod callable;
3pub mod canonical_walker;
4pub mod cs;
5pub mod extractor;
6pub mod go;
7pub mod java;
8pub mod kinds;
9pub mod python;
10pub mod rs;
11pub mod sql;
12pub mod strategy;
13pub mod tree_util;
14pub mod ts;
15
16pub use extractor::LangExtractor;
17#[doc(hidden)]
18pub use extractor::assert_conformance;
19
20/// Adding a row registers the language for `Lang::from_tag` / `tag` /
21/// `allowed_kinds` / `allowed_visibilities` and the schema-sync test.
22macro_rules! define_languages {
23	($($(#[$attr:meta])* $variant:ident => $module:ty),* $(,)?) => {
24		#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
25		pub enum Lang {
26			$(
27				$(#[$attr])*
28				$variant,
29			)*
30		}
31
32		impl Lang {
33			pub const ALL: &'static [Lang] = &[
34				$(
35					$(#[$attr])*
36					Self::$variant,
37				)*
38			];
39
40			pub fn from_tag(s: &str) -> Option<Self> {
41				$(
42					$(#[$attr])*
43					if s == <$module as $crate::lang::LangExtractor>::LANG_TAG {
44						return Some(Self::$variant);
45					}
46				)*
47				None
48			}
49
50			pub fn tag(self) -> &'static str {
51				match self {
52					$(
53						$(#[$attr])*
54						Self::$variant => <$module as $crate::lang::LangExtractor>::LANG_TAG,
55					)*
56				}
57			}
58
59			pub fn allowed_kinds(self) -> &'static [&'static str] {
60				match self {
61					$(
62						$(#[$attr])*
63						Self::$variant => <$module as $crate::lang::LangExtractor>::ALLOWED_KINDS,
64					)*
65				}
66			}
67
68			pub fn allowed_visibilities(self) -> &'static [&'static str] {
69				match self {
70					$(
71						$(#[$attr])*
72						Self::$variant => <$module as $crate::lang::LangExtractor>::ALLOWED_VISIBILITIES,
73					)*
74				}
75			}
76
77			pub fn ignores_visibility(self) -> bool {
78				self.allowed_visibilities().is_empty()
79			}
80		}
81
82		#[cfg(test)]
83		mod _conformance_dispatch {
84			use $crate::lang::LangExtractor;
85
86			/// Dispatches a closure that takes `(lang_tag, allowed_kinds, allowed_visibilities)`
87			/// over every registered language. Used by the JSON Schema sync test.
88			pub(crate) fn for_each_language(
89				mut f: impl FnMut(&'static str, &'static [&'static str], &'static [&'static str]),
90			) {
91				$(
92					$(#[$attr])*
93					f(
94						<$module as LangExtractor>::LANG_TAG,
95						<$module as LangExtractor>::ALLOWED_KINDS,
96						<$module as LangExtractor>::ALLOWED_VISIBILITIES,
97					);
98				)*
99			}
100		}
101	};
102}
103
104define_languages! {
105	Ts     => crate::lang::ts::Lang,
106	Rs     => crate::lang::rs::Lang,
107	Java   => crate::lang::java::Lang,
108	Python => crate::lang::python::Lang,
109	Go     => crate::lang::go::Lang,
110	Cs     => crate::lang::cs::Lang,
111	Sql    => crate::lang::sql::Lang,
112}
113
114#[cfg(test)]
115pub(crate) use _conformance_dispatch::for_each_language;
116
117#[cfg(test)]
118mod schema_sync_tests {
119	use super::for_each_language;
120	use serde_json::Value;
121
122	const SCHEMA_JSON: &str = include_str!("../../../../docs/postgres/declare-schema.json");
123
124	fn profile_name_for(tag: &str) -> String {
125		let mut chars = tag.chars();
126		let first = chars.next().unwrap().to_uppercase().collect::<String>();
127		format!("{first}{}Profile", chars.as_str())
128	}
129
130	fn enum_at<'a>(schema: &'a Value, profile: &str, field: &str) -> Vec<&'a str> {
131		schema
132			.get("$defs")
133			.and_then(|d| d.get(profile))
134			.and_then(|p| p.get("properties"))
135			.and_then(|p| p.get("symbols"))
136			.and_then(|s| s.get("items"))
137			.and_then(|i| i.get("properties"))
138			.and_then(|p| p.get(field))
139			.and_then(|f| f.get("enum"))
140			.and_then(|e| e.as_array())
141			.map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
142			.unwrap_or_default()
143	}
144
145	#[test]
146	fn declare_schema_matches_trait_constants() {
147		let schema: Value = serde_json::from_str(SCHEMA_JSON)
148			.expect("docs/postgres/declare-schema.json must be valid JSON");
149
150		let mut visited = 0usize;
151		for_each_language(|tag, kinds, visibilities| {
152			visited += 1;
153			let profile = profile_name_for(tag);
154
155			let schema_kinds = enum_at(&schema, &profile, "kind");
156			let trait_kinds: Vec<&str> = kinds.to_vec();
157			assert_eq!(
158				sort(&schema_kinds),
159				sort(&trait_kinds),
160				"declare-schema.json {profile}.kind enum drifted from `{tag}` trait ALLOWED_KINDS"
161			);
162
163			if visibilities.is_empty() {
164				let schema_vis = enum_at(&schema, &profile, "visibility");
165				assert!(
166					schema_vis.is_empty(),
167					"declare-schema.json {profile} declares visibilities but extractor profile is empty"
168				);
169			} else {
170				let schema_vis = enum_at(&schema, &profile, "visibility");
171				let trait_vis: Vec<&str> = visibilities.to_vec();
172				assert_eq!(
173					sort(&schema_vis),
174					sort(&trait_vis),
175					"declare-schema.json {profile}.visibility enum drifted from `{tag}` trait ALLOWED_VISIBILITIES"
176				);
177			}
178		});
179
180		assert_eq!(
181			visited,
182			super::Lang::ALL.len(),
183			"for_each_language visited {visited} languages but Lang::ALL contains {}; the cfg gates of the dispatch table and the macro variants are out of sync",
184			super::Lang::ALL.len()
185		);
186	}
187
188	fn sort<'a>(xs: &[&'a str]) -> Vec<&'a str> {
189		let mut v: Vec<&str> = xs.to_vec();
190		v.sort_unstable();
191		v
192	}
193}
194
195#[cfg(test)]
196mod shape_coverage_tests {
197	use super::for_each_language;
198	use crate::core::shape::shape_of;
199
200	#[test]
201	fn every_allowed_kind_has_a_shape() {
202		let mut missing: Vec<(String, String)> = Vec::new();
203		for_each_language(|tag, kinds, _| {
204			for k in kinds {
205				if shape_of(k.as_bytes()).is_none() {
206					missing.push((tag.to_string(), (*k).to_string()));
207				}
208			}
209		});
210		assert!(
211			missing.is_empty(),
212			"kinds in ALLOWED_KINDS without an entry in core::shape::SHAPE_TABLE: {missing:?}"
213		);
214	}
215
216	#[test]
217	fn internal_kinds_have_a_shape() {
218		for k in [b"module".as_slice(), b"comment", b"local", b"param"] {
219			assert!(
220				shape_of(k).is_some(),
221				"internal kind {:?} must have a shape entry",
222				std::str::from_utf8(k).unwrap()
223			);
224		}
225	}
226}
227
228#[cfg(test)]
229mod comment_collapse_tests {
230	use crate::core::moniker::MonikerBuilder;
231
232	struct Case {
233		tag: &'static str,
234		uri: &'static str,
235		run: fn(&'static str) -> crate::core::code_graph::CodeGraph,
236	}
237
238	fn anchor() -> crate::core::moniker::Moniker {
239		MonikerBuilder::new().project(b"app").build()
240	}
241
242	fn cases() -> Vec<Case> {
243		vec![
244			Case {
245				tag: "rs",
246				uri: "test.rs",
247				run: |src| {
248					super::rs::extract(
249						"test.rs",
250						src,
251						&anchor(),
252						false,
253						&super::rs::Presets::default(),
254					)
255				},
256			},
257			Case {
258				tag: "ts",
259				uri: "test.ts",
260				run: |src| {
261					super::ts::extract(
262						"test.ts",
263						src,
264						&anchor(),
265						false,
266						&super::ts::Presets::default(),
267					)
268				},
269			},
270			Case {
271				tag: "python",
272				uri: "test.py",
273				run: |src| {
274					super::python::extract(
275						"test.py",
276						src,
277						&anchor(),
278						false,
279						&super::python::Presets::default(),
280					)
281				},
282			},
283			Case {
284				tag: "go",
285				uri: "test.go",
286				run: |src| {
287					super::go::extract(
288						"test.go",
289						src,
290						&anchor(),
291						false,
292						&super::go::Presets::default(),
293					)
294				},
295			},
296			Case {
297				tag: "java",
298				uri: "test.java",
299				run: |src| {
300					super::java::extract(
301						"test.java",
302						src,
303						&anchor(),
304						false,
305						&super::java::Presets::default(),
306					)
307				},
308			},
309			Case {
310				tag: "cs",
311				uri: "test.cs",
312				run: |src| {
313					super::cs::extract(
314						"test.cs",
315						src,
316						&anchor(),
317						false,
318						&super::cs::Presets::default(),
319					)
320				},
321			},
322			Case {
323				tag: "sql",
324				uri: "test.sql",
325				run: |src| {
326					super::sql::extract(
327						"test.sql",
328						src,
329						&anchor(),
330						false,
331						&super::sql::Presets::default(),
332					)
333				},
334			},
335		]
336	}
337
338	const ADJACENT: &[(&str, &str)] = &[
339		("rs", "// a\n// b\n// c\nstruct Foo;\n"),
340		("ts", "// a\n// b\n// c\nclass Foo {}"),
341		("python", "# a\n# b\n# c\nclass Foo: pass\n"),
342		("go", "package x\n// a\n// b\n// c\nfunc Foo() {}\n"),
343		("java", "// a\n// b\n// c\nclass Foo {}\n"),
344		("cs", "// a\n// b\n// c\nclass Foo {}\n"),
345		(
346			"sql",
347			"-- a\n-- b\n-- c\nCREATE FUNCTION f() RETURNS int LANGUAGE sql AS $$ SELECT 1 $$;\n",
348		),
349	];
350
351	const SPLIT_BY_BLANK: &[(&str, &str)] = &[
352		("rs", "// a\n// b\n\n// c\nstruct Foo;\n"),
353		("ts", "// a\n// b\n\n// c\nclass Foo {}"),
354		("python", "# a\n# b\n\n# c\nclass Foo: pass\n"),
355		("go", "package x\n// a\n// b\n\n// c\nfunc Foo() {}\n"),
356		("java", "// a\n// b\n\n// c\nclass Foo {}\n"),
357		("cs", "// a\n// b\n\n// c\nclass Foo {}\n"),
358		(
359			"sql",
360			"-- a\n-- b\n\n-- c\nCREATE FUNCTION f() RETURNS int LANGUAGE sql AS $$ SELECT 1 $$;\n",
361		),
362	];
363
364	fn count_comments(g: &crate::core::code_graph::CodeGraph) -> usize {
365		g.defs().filter(|d| d.kind == b"comment").count()
366	}
367
368	#[test]
369	fn each_language_collapses_three_adjacent_line_comments_into_one_def() {
370		for case in cases() {
371			let src = ADJACENT
372				.iter()
373				.find(|(tag, _)| *tag == case.tag)
374				.expect("adjacent fixture")
375				.1;
376			let g = (case.run)(src);
377			assert_eq!(
378				count_comments(&g),
379				1,
380				"lang={} ({}): three adjacent line comments must collapse to one def",
381				case.tag,
382				case.uri
383			);
384		}
385	}
386
387	#[test]
388	fn each_language_splits_runs_on_blank_line() {
389		for case in cases() {
390			let src = SPLIT_BY_BLANK
391				.iter()
392				.find(|(tag, _)| *tag == case.tag)
393				.expect("blank-line fixture")
394				.1;
395			let g = (case.run)(src);
396			assert_eq!(
397				count_comments(&g),
398				2,
399				"lang={} ({}): blank line must break the run into two defs",
400				case.tag,
401				case.uri
402			);
403		}
404	}
405}