Skip to main content

cabin_core/
source_language.rs

1//! Source-file language classification.
2//!
3//! Cabin treats C/C++ as related but distinct source
4//! languages. The build planner consults this module to decide
5//! which compiler driver and which standard to use for each
6//! source file in a `library` / `executable` / `test` / `example`
7//! target. The same target may carry both `.c` and `.cc` sources;
8//! classification is per-file.
9//!
10//! This module is data and pure logic only. Filesystem traversal
11//! and process spawning live elsewhere.
12//!
13//! ## Recognized extensions
14//!
15//! | Extension                          | Language |
16//! | ---------------------------------- | -------- |
17//! | `.c`                               | [`SourceLanguage::C`]   |
18//! | `.cc`, `.cpp`, `.cxx`, `.c++`, `.C` | [`SourceLanguage::Cxx`] |
19//!
20//! Headers (`.h`, `.hh`, `.hpp`) are not classified here — they
21//! are not compiled as standalone translation units. Anything
22//! outside the table above returns `None` so callers can surface
23//! a clear "unrecognized source extension" diagnostic instead of
24//! silently picking the wrong compiler.
25
26use std::path::Path;
27
28/// Source-file language as observed by the build planner.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub enum SourceLanguage {
31    /// A C translation unit (`.c`).
32    C,
33    /// A C++ translation unit (`.cc`, `.cpp`, `.cxx`, `.c++`,
34    /// `.C`).
35    Cxx,
36}
37
38impl SourceLanguage {
39    /// Stable lower-case identifier suitable for diagnostics,
40    /// JSON output, and rule names. `c` for C and `cxx` for C++ —
41    /// matching the [`crate::ToolKind`] keys.
42    pub const fn as_key(self) -> &'static str {
43        match self {
44            Self::C => "c",
45            Self::Cxx => "cxx",
46        }
47    }
48
49    /// Human-readable label used in error messages so the
50    /// language is unambiguous to the user.
51    pub const fn human_label(self) -> &'static str {
52        match self {
53            Self::C => "C",
54            Self::Cxx => "C++",
55        }
56    }
57}
58
59impl std::fmt::Display for SourceLanguage {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        f.write_str(self.as_key())
62    }
63}
64
65/// Classify a source file by its filename extension. Returns
66/// `None` when the extension is missing or unrecognized — the
67/// planner surfaces an explicit diagnostic in that case rather
68/// than silently picking a default compiler.
69///
70/// Extension matching is case-sensitive on the lower-case forms
71/// (`.c`, `.cc`, `.cpp`, `.cxx`, `.c++`) and accepts the
72/// upper-case `.C` extension that traditionally indicates a C++
73/// translation unit on POSIX systems.
74pub fn classify_source(path: &Path) -> Option<SourceLanguage> {
75    // We deliberately do not lower-case the extension: `.C` is
76    // the only non-lower-case spelling Cabin recognizes (POSIX
77    // C++ convention), and matching it explicitly avoids
78    // collapsing `.C` and `.c` into the same bucket on
79    // case-insensitive filesystems.
80    let ext = path.extension()?;
81    let ext = ext.to_str()?;
82    match ext {
83        "c" => Some(SourceLanguage::C),
84        "cc" | "cpp" | "cxx" | "c++" | "C" => Some(SourceLanguage::Cxx),
85        _ => None,
86    }
87}
88
89/// Pick the link-driver language for a target whose objects
90/// span the supplied set of source languages.
91///
92/// **Rule:** if any object came from a C++ source (or any
93/// transitively linked library declares any C++ object), the
94/// link driver is the C++ compiler. Otherwise the C compiler
95/// drives the link. The C++ driver pulls in the C++ runtime
96/// (`libstdc++` / `libc++`), which is required for any
97/// translation unit that uses C++; the C driver omits that
98/// runtime, which is correct for pure-C link lines.
99///
100/// Returns [`SourceLanguage::C`] for an empty input — that is
101/// the conservative choice for an empty link line, but in
102/// practice the planner rejects executables with no objects
103/// before this is consulted.
104///
105/// The slice form (rather than a generic `IntoIterator`) keeps
106/// the predicate cheap to call on the per-target language
107/// manifests the planner already collects, and lets callers
108/// reason about the input by reading the call site directly.
109pub fn link_driver_language(languages: &[SourceLanguage]) -> SourceLanguage {
110    if languages.contains(&SourceLanguage::Cxx) {
111        SourceLanguage::Cxx
112    } else {
113        SourceLanguage::C
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use std::path::PathBuf;
121
122    #[test]
123    fn classifies_c_extension_as_c() {
124        assert_eq!(
125            classify_source(&PathBuf::from("foo.c")),
126            Some(SourceLanguage::C)
127        );
128        assert_eq!(
129            classify_source(&PathBuf::from("src/lib.c")),
130            Some(SourceLanguage::C)
131        );
132    }
133
134    #[test]
135    fn classifies_cpp_extensions_as_cxx() {
136        for ext in ["cc", "cpp", "cxx", "c++", "C"] {
137            let path = PathBuf::from(format!("src/file.{ext}"));
138            assert_eq!(
139                classify_source(&path),
140                Some(SourceLanguage::Cxx),
141                "extension `.{ext}` must classify as C++"
142            );
143        }
144    }
145
146    #[test]
147    fn classification_is_case_sensitive_for_lower_case_only() {
148        // `.C` is the legitimate POSIX upper-case C++ extension;
149        // anything else upper-cased is unrecognized so the
150        // planner can surface a clear error instead of guessing.
151        assert_eq!(
152            classify_source(&PathBuf::from("file.C")),
153            Some(SourceLanguage::Cxx)
154        );
155        assert!(classify_source(&PathBuf::from("file.CPP")).is_none());
156    }
157
158    #[test]
159    fn classification_returns_none_for_unknown_or_missing_extension() {
160        assert!(classify_source(&PathBuf::from("file")).is_none());
161        assert!(classify_source(&PathBuf::from("file.h")).is_none());
162        assert!(classify_source(&PathBuf::from("file.hpp")).is_none());
163        assert!(classify_source(&PathBuf::from("file.txt")).is_none());
164    }
165
166    #[test]
167    fn link_driver_is_cxx_when_any_source_is_cpp() {
168        assert_eq!(
169            link_driver_language(&[SourceLanguage::Cxx]),
170            SourceLanguage::Cxx
171        );
172        assert_eq!(
173            link_driver_language(&[SourceLanguage::C, SourceLanguage::Cxx]),
174            SourceLanguage::Cxx
175        );
176        assert_eq!(
177            link_driver_language(&[SourceLanguage::Cxx, SourceLanguage::C]),
178            SourceLanguage::Cxx
179        );
180    }
181
182    #[test]
183    fn link_driver_is_c_when_every_source_is_c() {
184        assert_eq!(
185            link_driver_language(&[SourceLanguage::C]),
186            SourceLanguage::C
187        );
188        assert_eq!(
189            link_driver_language(&[SourceLanguage::C, SourceLanguage::C]),
190            SourceLanguage::C
191        );
192    }
193
194    #[test]
195    fn link_driver_falls_back_to_c_for_empty_input() {
196        // Empty inputs do not occur in practice (the planner
197        // rejects empty targets up-front); the documented
198        // fallback is C so a future caller cannot accidentally
199        // depend on the C++ driver being selected for an empty
200        // link line.
201        assert_eq!(link_driver_language(&[]), SourceLanguage::C);
202    }
203
204    #[test]
205    fn keys_are_stable_across_renames() {
206        // The keys land in JSON metadata and rule names; lock
207        // them down so a future contributor cannot rename the
208        // variant accidentally.
209        assert_eq!(SourceLanguage::C.as_key(), "c");
210        assert_eq!(SourceLanguage::Cxx.as_key(), "cxx");
211        assert_eq!(SourceLanguage::C.to_string(), "c");
212        assert_eq!(SourceLanguage::Cxx.to_string(), "cxx");
213    }
214}