cabin_core/source_language.rs
1//! Source-file language classification.
2//!
3//! Cabin treats C/C++ as related but distinct source
4//! languages. The build planner consults this module to decide
5//! which compiler driver and which standard to use for each
6//! source file in a `library` / `executable` / `test` / `example`
7//! target. The same target may carry both `.c` and `.cc` sources;
8//! classification is per-file.
9//!
10//! This module is data and pure logic only. Filesystem traversal
11//! and process spawning live elsewhere.
12//!
13//! ## Recognized extensions
14//!
15//! | Extension | Language |
16//! | ---------------------------------- | -------- |
17//! | `.c` | [`SourceLanguage::C`] |
18//! | `.cc`, `.cpp`, `.cxx`, `.c++`, `.C` | [`SourceLanguage::Cxx`] |
19//!
20//! Headers (`.h`, `.hh`, `.hpp`) are not classified here — they
21//! are not compiled as standalone translation units. Anything
22//! outside the table above returns `None` so callers can surface
23//! a clear "unrecognized source extension" diagnostic instead of
24//! silently picking the wrong compiler.
25
26use camino::Utf8Path;
27
28/// Source-file language as observed by the build planner.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub enum SourceLanguage {
31 /// A C translation unit (`.c`).
32 C,
33 /// A C++ translation unit (`.cc`, `.cpp`, `.cxx`, `.c++`,
34 /// `.C`).
35 Cxx,
36}
37
38impl SourceLanguage {
39 /// Stable lower-case identifier suitable for diagnostics,
40 /// JSON output, and rule names. `c` for C and `cxx` for C++ —
41 /// matching the [`crate::ToolKind`] keys.
42 pub const fn as_key(self) -> &'static str {
43 match self {
44 Self::C => "c",
45 Self::Cxx => "cxx",
46 }
47 }
48
49 /// Human-readable label used in error messages so the
50 /// language is unambiguous to the user.
51 pub const fn human_label(self) -> &'static str {
52 match self {
53 Self::C => "C",
54 Self::Cxx => "C++",
55 }
56 }
57}
58
59impl std::fmt::Display for SourceLanguage {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 f.write_str(self.as_key())
62 }
63}
64
65/// Classify a source file by its filename extension. Returns
66/// `None` when the extension is missing or unrecognized — the
67/// planner surfaces an explicit diagnostic in that case rather
68/// than silently picking a default compiler.
69///
70/// Extension matching is case-sensitive on the lower-case forms
71/// (`.c`, `.cc`, `.cpp`, `.cxx`, `.c++`) and accepts the
72/// upper-case `.C` extension that traditionally indicates a C++
73/// translation unit on POSIX systems.
74pub fn classify_source(path: &Utf8Path) -> Option<SourceLanguage> {
75 // We deliberately do not lower-case the extension: `.C` is
76 // the only non-lower-case spelling Cabin recognizes (POSIX
77 // C++ convention), and matching it explicitly avoids
78 // collapsing `.C` and `.c` into the same bucket on
79 // case-insensitive filesystems.
80 let ext = path.extension()?;
81 match ext {
82 "c" => Some(SourceLanguage::C),
83 "cc" | "cpp" | "cxx" | "c++" | "C" => Some(SourceLanguage::Cxx),
84 _ => None,
85 }
86}
87
88/// Pick the link-driver language for a target whose objects
89/// span the supplied set of source languages.
90///
91/// **Rule:** if any object came from a C++ source (or any
92/// transitively linked library declares any C++ object), the
93/// link driver is the C++ compiler. Otherwise the C compiler
94/// drives the link. The C++ driver pulls in the C++ runtime
95/// (`libstdc++` / `libc++`), which is required for any
96/// translation unit that uses C++; the C driver omits that
97/// runtime, which is correct for pure-C link lines.
98///
99/// Returns [`SourceLanguage::C`] for an empty input — that is
100/// the conservative choice for an empty link line, but in
101/// practice the planner rejects executables with no objects
102/// before this is consulted.
103///
104/// The slice form (rather than a generic `IntoIterator`) keeps
105/// the predicate cheap to call on the per-target language
106/// manifests the planner already collects, and lets callers
107/// reason about the input by reading the call site directly.
108pub fn link_driver_language(languages: &[SourceLanguage]) -> SourceLanguage {
109 if languages.contains(&SourceLanguage::Cxx) {
110 SourceLanguage::Cxx
111 } else {
112 SourceLanguage::C
113 }
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use camino::Utf8PathBuf;
120
121 #[test]
122 fn classifies_c_extension_as_c() {
123 assert_eq!(
124 classify_source(&Utf8PathBuf::from("foo.c")),
125 Some(SourceLanguage::C)
126 );
127 assert_eq!(
128 classify_source(&Utf8PathBuf::from("src/lib.c")),
129 Some(SourceLanguage::C)
130 );
131 }
132
133 #[test]
134 fn classifies_cpp_extensions_as_cxx() {
135 for ext in ["cc", "cpp", "cxx", "c++", "C"] {
136 let path = Utf8PathBuf::from(format!("src/file.{ext}"));
137 assert_eq!(
138 classify_source(&path),
139 Some(SourceLanguage::Cxx),
140 "extension `.{ext}` must classify as C++"
141 );
142 }
143 }
144
145 #[test]
146 fn classification_is_case_sensitive_for_lower_case_only() {
147 // `.C` is the legitimate POSIX upper-case C++ extension;
148 // anything else upper-cased is unrecognized so the
149 // planner can surface a clear error instead of guessing.
150 assert_eq!(
151 classify_source(&Utf8PathBuf::from("file.C")),
152 Some(SourceLanguage::Cxx)
153 );
154 assert!(classify_source(&Utf8PathBuf::from("file.CPP")).is_none());
155 }
156
157 #[test]
158 fn classification_returns_none_for_unknown_or_missing_extension() {
159 assert!(classify_source(&Utf8PathBuf::from("file")).is_none());
160 assert!(classify_source(&Utf8PathBuf::from("file.h")).is_none());
161 assert!(classify_source(&Utf8PathBuf::from("file.hpp")).is_none());
162 assert!(classify_source(&Utf8PathBuf::from("file.txt")).is_none());
163 }
164
165 #[test]
166 fn link_driver_is_cxx_when_any_source_is_cpp() {
167 assert_eq!(
168 link_driver_language(&[SourceLanguage::Cxx]),
169 SourceLanguage::Cxx
170 );
171 assert_eq!(
172 link_driver_language(&[SourceLanguage::C, SourceLanguage::Cxx]),
173 SourceLanguage::Cxx
174 );
175 assert_eq!(
176 link_driver_language(&[SourceLanguage::Cxx, SourceLanguage::C]),
177 SourceLanguage::Cxx
178 );
179 }
180
181 #[test]
182 fn link_driver_is_c_when_every_source_is_c() {
183 assert_eq!(
184 link_driver_language(&[SourceLanguage::C]),
185 SourceLanguage::C
186 );
187 assert_eq!(
188 link_driver_language(&[SourceLanguage::C, SourceLanguage::C]),
189 SourceLanguage::C
190 );
191 }
192
193 #[test]
194 fn link_driver_falls_back_to_c_for_empty_input() {
195 // Empty inputs do not occur in practice (the planner
196 // rejects empty targets up-front); the documented
197 // fallback is C so a future caller cannot accidentally
198 // depend on the C++ driver being selected for an empty
199 // link line.
200 assert_eq!(link_driver_language(&[]), SourceLanguage::C);
201 }
202
203 #[test]
204 fn keys_are_stable_across_renames() {
205 // The keys land in JSON metadata and rule names; lock
206 // them down so a future contributor cannot rename the
207 // variant accidentally.
208 assert_eq!(SourceLanguage::C.as_key(), "c");
209 assert_eq!(SourceLanguage::Cxx.as_key(), "cxx");
210 assert_eq!(SourceLanguage::C.to_string(), "c");
211 assert_eq!(SourceLanguage::Cxx.to_string(), "cxx");
212 }
213}