cabin_core/source_language.rs
1//! Source-file language classification.
2//!
3//! Cabin treats C/C++ as related but distinct source
4//! languages. The build planner consults this module to decide
5//! which compiler driver and which standard to use for each
6//! source file in a `library` / `executable` / `test` / `example`
7//! target. The same target may carry both `.c` and `.cc` sources;
8//! classification is per-file.
9//!
10//! This module is data and pure logic only. Filesystem traversal
11//! and process spawning live elsewhere.
12//!
13//! ## Recognized extensions
14//!
15//! | Extension | Language |
16//! | ---------------------------------- | -------- |
17//! | `.c` | [`SourceLanguage::C`] |
18//! | `.cc`, `.cpp`, `.cxx`, `.c++`, `.C` | [`SourceLanguage::Cxx`] |
19//!
20//! Headers (`.h`, `.hh`, `.hpp`) are not classified here — they
21//! are not compiled as standalone translation units. Anything
22//! outside the table above returns `None` so callers can surface
23//! a clear "unrecognized source extension" diagnostic instead of
24//! silently picking the wrong compiler.
25
26use std::path::Path;
27
28/// Source-file language as observed by the build planner.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub enum SourceLanguage {
31 /// A C translation unit (`.c`).
32 C,
33 /// A C++ translation unit (`.cc`, `.cpp`, `.cxx`, `.c++`,
34 /// `.C`).
35 Cxx,
36}
37
38impl SourceLanguage {
39 /// Stable lower-case identifier suitable for diagnostics,
40 /// JSON output, and rule names. `c` for C and `cxx` for C++ —
41 /// matching the [`crate::ToolKind`] keys.
42 pub const fn as_key(self) -> &'static str {
43 match self {
44 Self::C => "c",
45 Self::Cxx => "cxx",
46 }
47 }
48
49 /// Human-readable label used in error messages so the
50 /// language is unambiguous to the user.
51 pub const fn human_label(self) -> &'static str {
52 match self {
53 Self::C => "C",
54 Self::Cxx => "C++",
55 }
56 }
57}
58
59impl std::fmt::Display for SourceLanguage {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 f.write_str(self.as_key())
62 }
63}
64
65/// Classify a source file by its filename extension. Returns
66/// `None` when the extension is missing or unrecognized — the
67/// planner surfaces an explicit diagnostic in that case rather
68/// than silently picking a default compiler.
69///
70/// Extension matching is case-sensitive on the lower-case forms
71/// (`.c`, `.cc`, `.cpp`, `.cxx`, `.c++`) and accepts the
72/// upper-case `.C` extension that traditionally indicates a C++
73/// translation unit on POSIX systems.
74pub fn classify_source(path: &Path) -> Option<SourceLanguage> {
75 // We deliberately do not lower-case the extension: `.C` is
76 // the only non-lower-case spelling Cabin recognizes (POSIX
77 // C++ convention), and matching it explicitly avoids
78 // collapsing `.C` and `.c` into the same bucket on
79 // case-insensitive filesystems.
80 let ext = path.extension()?;
81 let ext = ext.to_str()?;
82 match ext {
83 "c" => Some(SourceLanguage::C),
84 "cc" | "cpp" | "cxx" | "c++" | "C" => Some(SourceLanguage::Cxx),
85 _ => None,
86 }
87}
88
89/// Pick the link-driver language for a target whose objects
90/// span the supplied set of source languages.
91///
92/// **Rule:** if any object came from a C++ source (or any
93/// transitively linked library declares any C++ object), the
94/// link driver is the C++ compiler. Otherwise the C compiler
95/// drives the link. The C++ driver pulls in the C++ runtime
96/// (`libstdc++` / `libc++`), which is required for any
97/// translation unit that uses C++; the C driver omits that
98/// runtime, which is correct for pure-C link lines.
99///
100/// Returns [`SourceLanguage::C`] for an empty input — that is
101/// the conservative choice for an empty link line, but in
102/// practice the planner rejects executables with no objects
103/// before this is consulted.
104///
105/// The slice form (rather than a generic `IntoIterator`) keeps
106/// the predicate cheap to call on the per-target language
107/// manifests the planner already collects, and lets callers
108/// reason about the input by reading the call site directly.
109pub fn link_driver_language(languages: &[SourceLanguage]) -> SourceLanguage {
110 if languages.contains(&SourceLanguage::Cxx) {
111 SourceLanguage::Cxx
112 } else {
113 SourceLanguage::C
114 }
115}
116
117#[cfg(test)]
118mod tests {
119 use super::*;
120 use std::path::PathBuf;
121
122 #[test]
123 fn classifies_c_extension_as_c() {
124 assert_eq!(
125 classify_source(&PathBuf::from("foo.c")),
126 Some(SourceLanguage::C)
127 );
128 assert_eq!(
129 classify_source(&PathBuf::from("src/lib.c")),
130 Some(SourceLanguage::C)
131 );
132 }
133
134 #[test]
135 fn classifies_cpp_extensions_as_cxx() {
136 for ext in ["cc", "cpp", "cxx", "c++", "C"] {
137 let path = PathBuf::from(format!("src/file.{ext}"));
138 assert_eq!(
139 classify_source(&path),
140 Some(SourceLanguage::Cxx),
141 "extension `.{ext}` must classify as C++"
142 );
143 }
144 }
145
146 #[test]
147 fn classification_is_case_sensitive_for_lower_case_only() {
148 // `.C` is the legitimate POSIX upper-case C++ extension;
149 // anything else upper-cased is unrecognized so the
150 // planner can surface a clear error instead of guessing.
151 assert_eq!(
152 classify_source(&PathBuf::from("file.C")),
153 Some(SourceLanguage::Cxx)
154 );
155 assert!(classify_source(&PathBuf::from("file.CPP")).is_none());
156 }
157
158 #[test]
159 fn classification_returns_none_for_unknown_or_missing_extension() {
160 assert!(classify_source(&PathBuf::from("file")).is_none());
161 assert!(classify_source(&PathBuf::from("file.h")).is_none());
162 assert!(classify_source(&PathBuf::from("file.hpp")).is_none());
163 assert!(classify_source(&PathBuf::from("file.txt")).is_none());
164 }
165
166 #[test]
167 fn link_driver_is_cxx_when_any_source_is_cpp() {
168 assert_eq!(
169 link_driver_language(&[SourceLanguage::Cxx]),
170 SourceLanguage::Cxx
171 );
172 assert_eq!(
173 link_driver_language(&[SourceLanguage::C, SourceLanguage::Cxx]),
174 SourceLanguage::Cxx
175 );
176 assert_eq!(
177 link_driver_language(&[SourceLanguage::Cxx, SourceLanguage::C]),
178 SourceLanguage::Cxx
179 );
180 }
181
182 #[test]
183 fn link_driver_is_c_when_every_source_is_c() {
184 assert_eq!(
185 link_driver_language(&[SourceLanguage::C]),
186 SourceLanguage::C
187 );
188 assert_eq!(
189 link_driver_language(&[SourceLanguage::C, SourceLanguage::C]),
190 SourceLanguage::C
191 );
192 }
193
194 #[test]
195 fn link_driver_falls_back_to_c_for_empty_input() {
196 // Empty inputs do not occur in practice (the planner
197 // rejects empty targets up-front); the documented
198 // fallback is C so a future caller cannot accidentally
199 // depend on the C++ driver being selected for an empty
200 // link line.
201 assert_eq!(link_driver_language(&[]), SourceLanguage::C);
202 }
203
204 #[test]
205 fn keys_are_stable_across_renames() {
206 // The keys land in JSON metadata and rule names; lock
207 // them down so a future contributor cannot rename the
208 // variant accidentally.
209 assert_eq!(SourceLanguage::C.as_key(), "c");
210 assert_eq!(SourceLanguage::Cxx.as_key(), "cxx");
211 assert_eq!(SourceLanguage::C.to_string(), "c");
212 assert_eq!(SourceLanguage::Cxx.to_string(), "cxx");
213 }
214}