Skip to main content

trueno_rag/eval/
domain.rs

1//! Domain classification for course directories
2
3/// Classify a course directory name into a domain.
4///
5/// Returns one of: `aws`, `ml`, `k8s`, `lang`, `devops`, `data`, `cloud`, `other`
6pub fn classify_domain(course_dir: &str) -> &'static str {
7    let lower = course_dir.to_lowercase();
8
9    // AWS
10    if matches_any(
11        &lower,
12        &[
13            "52-weeks-aws",
14            "aws-cloud-practitioner",
15            "aws-certified",
16            "aws-lambda",
17            "aws-llmops",
18            "bedrock",
19            "building-genai-aws",
20            "amazon-",
21            "sagemaker",
22        ],
23    ) {
24        return "aws";
25    }
26
27    // ML / AI
28    if matches_any(
29        &lower,
30        &[
31            "machine-learning",
32            "applied-ml",
33            "pytorch",
34            "mlops",
35            "hugging-face",
36            "data-science",
37            "deep-learning",
38            "neural",
39            "llm",
40            "advanced-rag",
41            "genai",
42            "ai-",
43            "openai",
44        ],
45    ) {
46        return "ml";
47    }
48
49    // Kubernetes / Containers
50    if matches_any(
51        &lower,
52        &["kubernetes", "docker", "assimilate-containers", "container", "minikube", "k8s"],
53    ) {
54        return "k8s";
55    }
56
57    // Programming Languages
58    if matches_any(
59        &lower,
60        &[
61            "rust",
62            "python",
63            "assimilate-go",
64            "golang",
65            "assimilate-java",
66            "java-",
67            "swift",
68            "julia",
69            "c-programming",
70            "cpp",
71            "javascript",
72            "typescript",
73        ],
74    ) {
75        return "lang";
76    }
77
78    // DevOps
79    if matches_any(
80        &lower,
81        &[
82            "devops",
83            "github-actions",
84            "makefile",
85            "linux",
86            "terraform",
87            "ci-cd",
88            "ansible",
89            "jenkins",
90        ],
91    ) {
92        return "devops";
93    }
94
95    // Data Engineering
96    if matches_any(
97        &lower,
98        &["data-engineering", "databricks", "pandas", "sql", "snowflake", "spark", "etl", "dbt"],
99    ) {
100        return "data";
101    }
102
103    // Cloud (non-AWS)
104    if matches_any(
105        &lower,
106        &[
107            "going-pro-cloud-computing",
108            "duke-cloud",
109            "assimilate-azure",
110            "assimilate-google",
111            "gcp",
112            "azure",
113        ],
114    ) {
115        return "cloud";
116    }
117
118    "other"
119}
120
121fn matches_any(haystack: &str, patterns: &[&str]) -> bool {
122    patterns.iter().any(|p| haystack.contains(p))
123}
124
125/// Extract course directory name from a source path.
126///
127/// Example: `/data/courses/52-weeks-aws/build/week1.srt` → `52-weeks-aws`
128pub fn extract_course_dir(source_path: &str) -> &str {
129    let parts: Vec<&str> = source_path.split('/').collect();
130    // Find "courses" in path and take the next component
131    for (i, p) in parts.iter().enumerate() {
132        if *p == "courses" && i + 1 < parts.len() {
133            return parts[i + 1];
134        }
135    }
136    // Fallback: use the component before the filename
137    if parts.len() >= 3 {
138        return parts[parts.len() - 3];
139    }
140    "unknown"
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_classify_aws() {
149        assert_eq!(classify_domain("52-weeks-aws"), "aws");
150        assert_eq!(classify_domain("aws-cloud-practitioner"), "aws");
151        assert_eq!(classify_domain("aws-certified-solutions-architect"), "aws");
152        assert_eq!(classify_domain("bedrock-genai"), "aws");
153    }
154
155    #[test]
156    fn test_classify_ml() {
157        assert_eq!(classify_domain("machine-learning"), "ml");
158        assert_eq!(classify_domain("Applied-ML"), "ml");
159        assert_eq!(classify_domain("pytorch"), "ml");
160        assert_eq!(classify_domain("hugging-face"), "ml");
161    }
162
163    #[test]
164    fn test_classify_k8s() {
165        assert_eq!(classify_domain("kubernetes-basics"), "k8s");
166        assert_eq!(classify_domain("docker"), "k8s");
167        assert_eq!(classify_domain("assimilate-containers"), "k8s");
168    }
169
170    #[test]
171    fn test_classify_lang() {
172        assert_eq!(classify_domain("rust-fundamentals"), "lang");
173        assert_eq!(classify_domain("python-for-devops"), "lang");
174        assert_eq!(classify_domain("assimilate-go"), "lang");
175    }
176
177    #[test]
178    fn test_classify_devops() {
179        assert_eq!(classify_domain("github-actions"), "devops");
180        assert_eq!(classify_domain("terraform-iac"), "devops");
181        assert_eq!(classify_domain("linux-fundamentals"), "devops");
182    }
183
184    #[test]
185    fn test_classify_data() {
186        assert_eq!(classify_domain("data-engineering"), "data");
187        assert_eq!(classify_domain("databricks-ml"), "data");
188        assert_eq!(classify_domain("pandas-intro"), "data");
189    }
190
191    #[test]
192    fn test_classify_cloud() {
193        assert_eq!(classify_domain("going-pro-cloud-computing"), "cloud");
194        assert_eq!(classify_domain("assimilate-azure-basics"), "cloud");
195    }
196
197    #[test]
198    fn test_classify_other() {
199        assert_eq!(classify_domain("random-course"), "other");
200        assert_eq!(classify_domain("cooking-101"), "other");
201    }
202
203    #[test]
204    fn test_extract_course_dir() {
205        assert_eq!(
206            extract_course_dir("/data/courses/52-weeks-aws/build/week1.srt"),
207            "52-weeks-aws"
208        );
209        assert_eq!(
210            extract_course_dir("/home/noah/data/courses/pytorch/build/lesson1.srt"),
211            "pytorch"
212        );
213    }
214}