1use crate::core::models::{Course, Degree, Plan, School};
11use std::collections::HashMap;
12use std::error::Error;
13use std::fs;
14use std::path::Path;
15
16#[derive(Debug, Clone)]
18pub struct CurriculumMetadata {
19 pub name: String,
21 pub institution: String,
23 pub degree_type: String,
25 pub system_type: String,
27 pub cip_code: String,
29}
30
31struct CourseParseContext {
36 course_id_to_natural_key: HashMap<String, String>,
38 natural_key_to_ids: HashMap<String, Vec<String>>,
40 courses_by_id: HashMap<String, Course>,
42 course_ids_in_order: Vec<String>,
44}
45
46impl CourseParseContext {
47 fn new() -> Self {
49 Self {
50 course_id_to_natural_key: HashMap::new(),
51 natural_key_to_ids: HashMap::new(),
52 courses_by_id: HashMap::new(),
53 course_ids_in_order: Vec::new(),
54 }
55 }
56
57 fn add_course(&mut self, course_id: String, mut course: Course) {
63 let natural_key = course.key();
64 course.csv_id = Some(course_id.clone());
65
66 self.course_id_to_natural_key
67 .insert(course_id.clone(), natural_key.clone());
68 self.natural_key_to_ids
69 .entry(natural_key)
70 .or_default()
71 .push(course_id.clone());
72 self.courses_by_id.insert(course_id.clone(), course);
73 self.course_ids_in_order.push(course_id);
74 }
75
76 fn compute_storage_keys(&self) -> Result<HashMap<String, String>, Box<dyn Error>> {
84 let mut course_id_to_storage_key = HashMap::new();
85
86 for (course_id, natural_key) in &self.course_id_to_natural_key {
87 let ids_with_same_key = self.natural_key_to_ids.get(natural_key).ok_or_else(|| {
88 format!(
89 "Internal consistency error: natural key '{natural_key}' not found in key mapping"
90 )
91 })?;
92
93 let storage_key = if ids_with_same_key.len() > 1 {
94 format!("{natural_key}_{course_id}")
95 } else {
96 natural_key.clone()
97 };
98
99 course_id_to_storage_key.insert(course_id.clone(), storage_key);
100 }
101
102 Ok(course_id_to_storage_key)
103 }
104}
105
106pub fn parse_curriculum_csv<P: AsRef<Path>>(path: P) -> Result<School, Box<dyn Error>> {
122 let content = fs::read_to_string(path)?;
123 let lines: Vec<&str> = content.lines().collect();
124
125 let metadata = parse_metadata(&lines)?;
127 let mut school = create_school_from_metadata(&metadata);
128
129 let (courses_start, headers) = find_courses_section(&lines)?;
131
132 let mut ctx = CourseParseContext::new();
134 first_pass_load_courses(&lines, courses_start, &headers, &mut ctx);
135
136 let storage_keys = ctx.compute_storage_keys()?;
138
139 third_pass_add_dependencies(&lines, courses_start, &headers, &mut ctx, &storage_keys);
141
142 finalize_school(&mut school, ctx, &storage_keys, &metadata.name)?;
144
145 Ok(school)
146}
147
148fn create_school_from_metadata(metadata: &CurriculumMetadata) -> School {
150 let mut school = School::new(metadata.institution.clone());
151 let degree = Degree::new(
152 metadata.name.clone(),
153 metadata.degree_type.clone(),
154 metadata.cip_code.clone(),
155 metadata.system_type.clone(),
156 );
157 school.add_degree(degree);
158 school
159}
160
161fn find_courses_section(lines: &[&str]) -> Result<(usize, Vec<String>), Box<dyn Error>> {
169 let courses_start = lines
170 .iter()
171 .position(|line| line.to_lowercase().contains("courses"))
172 .ok_or("No 'Courses' section found in CSV")?;
173
174 if courses_start + 1 >= lines.len() {
175 return Err("No course header found".into());
176 }
177
178 let header_line = lines[courses_start + 1];
179 let headers = parse_csv_line(header_line);
180
181 Ok((courses_start, headers))
182}
183
184fn first_pass_load_courses(
186 lines: &[&str],
187 courses_start: usize,
188 headers: &[String],
189 ctx: &mut CourseParseContext,
190) {
191 for line in lines.iter().skip(courses_start + 2) {
192 if line.trim().is_empty() {
193 continue;
194 }
195
196 if let Ok(course) = parse_course_line(line, headers) {
197 if let Some(course_id) = get_field(line, "Course ID", headers) {
198 ctx.add_course(course_id, course);
199 }
200 }
201 }
202}
203
204fn third_pass_add_dependencies(
206 lines: &[&str],
207 courses_start: usize,
208 headers: &[String],
209 ctx: &mut CourseParseContext,
210 storage_keys: &HashMap<String, String>,
211) {
212 for line in lines.iter().skip(courses_start + 2) {
213 if line.trim().is_empty() {
214 continue;
215 }
216
217 let Some(course_id) = get_field(line, "Course ID", headers) else {
218 continue;
219 };
220
221 let Some(course) = ctx.courses_by_id.get_mut(&course_id) else {
222 continue;
223 };
224
225 if let Some(prereq_str) = get_field(line, "Prerequisites", headers) {
227 if !prereq_str.trim().is_empty() {
228 add_prerequisites_with_mapping(course, &prereq_str, storage_keys);
229 }
230 }
231
232 if let Some(coreq_str) = get_field(line, "Corequisites", headers) {
234 if !coreq_str.trim().is_empty() {
235 add_corequisites_with_mapping(course, &coreq_str, storage_keys);
236 }
237 }
238
239 if let Some(strict_coreq_str) = get_field(line, "Strict-Corequisites", headers) {
241 if !strict_coreq_str.trim().is_empty() {
242 add_strict_corequisites_with_mapping(course, &strict_coreq_str, storage_keys);
243 }
244 }
245 }
246}
247
248fn finalize_school(
253 school: &mut School,
254 mut ctx: CourseParseContext,
255 storage_keys: &HashMap<String, String>,
256 curriculum_name: &str,
257) -> Result<(), Box<dyn Error>> {
258 for course_id in &ctx.course_ids_in_order {
260 if let Some(course) = ctx.courses_by_id.remove(course_id) {
261 if let Some(storage_key) = storage_keys.get(course_id) {
262 school.add_course_with_key(storage_key.clone(), course);
263 }
264 }
265 }
266
267 let mut plan = Plan::new(
269 curriculum_name.to_string(),
270 school.degrees.first().ok_or("No degree found")?.id(),
271 );
272 plan.institution = Some(school.name.clone());
273
274 for course_id in &ctx.course_ids_in_order {
276 if let Some(storage_key) = storage_keys.get(course_id) {
277 plan.add_course(storage_key.clone());
278 }
279 }
280
281 school.add_plan(plan);
282 Ok(())
283}
284
285fn clean_field(field: &str) -> String {
293 field
294 .trim_matches(|c: char| c.is_whitespace() || c == '"' || c == '\u{feff}' || c == '\u{200b}')
295 .to_string()
296}
297
298fn parse_metadata(lines: &[&str]) -> Result<CurriculumMetadata, Box<dyn Error>> {
310 let mut metadata = CurriculumMetadata {
311 name: String::new(),
312 institution: String::new(),
313 degree_type: String::new(),
314 system_type: String::new(),
315 cip_code: String::new(),
316 };
317
318 for line in lines.iter().take(10) {
319 let parts = parse_csv_line(line);
320 if parts.len() < 2 {
321 continue;
322 }
323
324 let key = parts[0].to_lowercase();
325 let value = parts[1].clone();
326
327 match key.as_str() {
328 "curriculum" => metadata.name = value,
329 "institution" | "insitution" => metadata.institution = value, "degree type" => metadata.degree_type = value,
331 "system type" => metadata.system_type = value,
332 "cip" => metadata.cip_code = value,
333 _ => {}
334 }
335 }
336
337 if metadata.name.is_empty() {
339 return Err("Missing Curriculum name".into());
340 }
341 if metadata.institution.is_empty() {
342 return Err("Missing Institution".into());
343 }
344
345 Ok(metadata)
346}
347
348fn parse_csv_line(line: &str) -> Vec<String> {
353 line.split(',').map(clean_field).collect()
354}
355
356fn parse_course_line(line: &str, headers: &[String]) -> Result<Course, Box<dyn Error>> {
364 let _fields = parse_csv_line(line);
365
366 let name = get_field(line, "Course Name", headers).unwrap_or_default();
367 let prefix = get_field(line, "Prefix", headers).unwrap_or_default();
368 let number = get_field(line, "Number", headers).unwrap_or_default();
369
370 let credit_hours_str =
371 get_field(line, "Credit Hours", headers).unwrap_or_else(|| "0".to_string());
372 let credit_hours = credit_hours_str.parse::<f32>().unwrap_or(0.0);
373
374 if prefix.is_empty() || number.is_empty() {
375 return Err("Missing prefix or number".into());
376 }
377
378 let mut course = Course::new(name, prefix, number, credit_hours);
379
380 if let Some(canonical) = get_field(line, "Canonical Name", headers) {
382 if !canonical.is_empty() {
383 course.set_canonical_name(canonical);
384 }
385 }
386
387 Ok(course)
388}
389
390fn get_field(line: &str, header_name: &str, headers: &[String]) -> Option<String> {
403 let fields = parse_csv_line(line);
404
405 headers
406 .iter()
407 .position(|h| h.eq_ignore_ascii_case(header_name))
408 .and_then(|idx| fields.get(idx))
409 .cloned()
410}
411
412fn add_prerequisites_with_mapping(
422 course: &mut Course,
423 prereq_str: &str,
424 course_id_to_key: &HashMap<String, String>,
425) {
426 for prereq in prereq_str.split(';') {
427 let trimmed = prereq.trim();
428 if !trimmed.is_empty() {
429 if let Some(key) = course_id_to_key.get(trimmed) {
431 course.add_prerequisite(key.clone());
432 } else {
433 let normalized = normalize_course_key(trimmed);
435 if !normalized.is_empty() {
436 course.add_prerequisite(normalized);
437 }
438 }
439 }
440 }
441}
442
443fn add_corequisites_with_mapping(
454 course: &mut Course,
455 coreq_str: &str,
456 course_id_to_key: &HashMap<String, String>,
457) {
458 for coreq in coreq_str.split(';') {
459 let trimmed = coreq.trim();
460 if !trimmed.is_empty() {
461 if let Some(key) = course_id_to_key.get(trimmed) {
464 course.add_corequisite(key.clone());
465 }
466 }
467 }
468}
469
470fn add_strict_corequisites_with_mapping(
480 course: &mut Course,
481 coreq_str: &str,
482 course_id_to_key: &HashMap<String, String>,
483) {
484 for coreq in coreq_str.split(';') {
485 let trimmed = coreq.trim();
486 if !trimmed.is_empty() {
487 if let Some(key) = course_id_to_key.get(trimmed) {
488 course.add_strict_corequisite(key.clone());
489 }
490 }
491 }
492}
493
494fn normalize_course_key(input: &str) -> String {
508 let cleaned = input.split('(').next().unwrap_or(input).trim();
510
511 let parts: Vec<&str> = cleaned.split_whitespace().collect();
513
514 if parts.len() >= 2 {
515 format!("{}{}", parts[0], parts[1])
516 } else if parts.len() == 1 {
517 parts[0].to_string()
518 } else {
519 String::new()
520 }
521}
522
523#[cfg(test)]
524mod tests {
525 use super::*;
526
527 #[test]
528 fn test_normalize_course_key() {
529 assert_eq!(normalize_course_key("CS 1800"), "CS1800");
530 assert_eq!(normalize_course_key("CS1800"), "CS1800");
531 assert_eq!(normalize_course_key("MATH 1342"), "MATH1342");
532 assert_eq!(normalize_course_key("CS 1800 (or coreq)"), "CS1800");
533 assert_eq!(normalize_course_key(" PHYS 1151 "), "PHYS1151");
534 }
535
536 #[test]
537 fn test_normalize_course_key_empty() {
538 assert_eq!(normalize_course_key(""), "");
539 assert_eq!(normalize_course_key(" "), "");
540 }
541
542 #[test]
543 fn test_parse_csv_line() {
544 let line = "CS1800,Discrete Structures,CS,1800,CS1700,CS1801,false,4.0,";
545 let fields = parse_csv_line(line);
546
547 assert_eq!(fields.len(), 9);
548 assert_eq!(fields[0], "CS1800");
549 assert_eq!(fields[1], "Discrete Structures");
550 assert_eq!(fields[2], "CS");
551 assert_eq!(fields[3], "1800");
552 }
553
554 #[test]
555 fn test_parse_csv_line_with_quotes() {
556 let line = "1,\"Course With, Comma\",CS,101,,,3.0";
557 let fields = parse_csv_line(line);
558 assert!(fields.len() >= 7);
561 }
562
563 #[test]
564 fn test_clean_field_removes_bom() {
565 let with_bom = "\u{feff}Curriculum";
566 assert_eq!(clean_field(with_bom), "Curriculum");
567 }
568
569 #[test]
570 fn test_clean_field_removes_quotes() {
571 assert_eq!(clean_field("\"quoted\""), "quoted");
572 assert_eq!(clean_field(" \"spaced\" "), "spaced");
573 }
574
575 #[test]
576 fn test_clean_field_removes_zero_width_space() {
577 assert_eq!(clean_field("\u{200b}Test"), "Test");
579 }
580
581 #[test]
582 fn test_get_field_case_insensitive() {
583 let headers = vec![
584 "Course ID".to_string(),
585 "Course Name".to_string(),
586 "Credit Hours".to_string(),
587 ];
588 let line = "1,Intro to CS,3.0";
589
590 assert_eq!(
591 get_field(line, "course id", &headers),
592 Some("1".to_string())
593 );
594 assert_eq!(
595 get_field(line, "COURSE NAME", &headers),
596 Some("Intro to CS".to_string())
597 );
598 assert_eq!(
599 get_field(line, "Credit hours", &headers),
600 Some("3.0".to_string())
601 );
602 }
603
604 #[test]
605 fn test_get_field_missing_header() {
606 let headers = vec!["Course ID".to_string()];
607 let line = "1";
608
609 assert_eq!(get_field(line, "Missing Header", &headers), None);
610 }
611
612 #[test]
613 fn test_parse_metadata_valid() {
614 let lines = vec![
615 "Curriculum,Test Program",
616 "Institution,Test University",
617 "Degree Type,BS",
618 "System Type,semester",
619 "CIP,11.0701",
620 ];
621
622 let metadata = parse_metadata(&lines).unwrap();
623 assert_eq!(metadata.name, "Test Program");
624 assert_eq!(metadata.institution, "Test University");
625 assert_eq!(metadata.degree_type, "BS");
626 assert_eq!(metadata.system_type, "semester");
627 assert_eq!(metadata.cip_code, "11.0701");
628 }
629
630 #[test]
631 fn test_parse_metadata_handles_typo() {
632 let lines = vec!["Curriculum,Test Program", "Insitution,Test University"];
634
635 let metadata = parse_metadata(&lines).unwrap();
636 assert_eq!(metadata.institution, "Test University");
637 }
638
639 #[test]
640 fn test_parse_metadata_missing_curriculum() {
641 let lines = vec!["Institution,Test University"];
642
643 let result = parse_metadata(&lines);
644 assert!(result.is_err());
645 assert!(result.unwrap_err().to_string().contains("Curriculum"));
646 }
647
648 #[test]
649 fn test_parse_metadata_missing_institution() {
650 let lines = vec!["Curriculum,Test Program"];
651
652 let result = parse_metadata(&lines);
653 assert!(result.is_err());
654 assert!(result.unwrap_err().to_string().contains("Institution"));
655 }
656
657 #[test]
658 fn test_course_parse_context_add_course() {
659 let mut ctx = CourseParseContext::new();
660 let course = Course::new(
661 "Intro to CS".to_string(),
662 "CS".to_string(),
663 "101".to_string(),
664 3.0,
665 );
666
667 ctx.add_course("1".to_string(), course);
668
669 assert!(ctx.courses_by_id.contains_key("1"));
670 assert_eq!(ctx.course_ids_in_order, vec!["1"]);
671 assert_eq!(
672 ctx.course_id_to_natural_key.get("1"),
673 Some(&"CS101".to_string())
674 );
675 }
676
677 #[test]
678 fn test_course_parse_context_duplicate_keys() {
679 let mut ctx = CourseParseContext::new();
680
681 let course1 = Course::new(
683 "Intro A".to_string(),
684 "CS".to_string(),
685 "101".to_string(),
686 3.0,
687 );
688 let course2 = Course::new(
689 "Intro B".to_string(),
690 "CS".to_string(),
691 "101".to_string(),
692 3.0,
693 );
694
695 ctx.add_course("1".to_string(), course1);
696 ctx.add_course("2".to_string(), course2);
697
698 let storage_keys = ctx.compute_storage_keys().unwrap();
699
700 assert_eq!(storage_keys.get("1"), Some(&"CS101_1".to_string()));
702 assert_eq!(storage_keys.get("2"), Some(&"CS101_2".to_string()));
703 }
704
705 #[test]
706 fn test_course_parse_context_unique_keys() {
707 let mut ctx = CourseParseContext::new();
708
709 let course1 = Course::new(
710 "Intro".to_string(),
711 "CS".to_string(),
712 "101".to_string(),
713 3.0,
714 );
715 let course2 = Course::new(
716 "Data Structures".to_string(),
717 "CS".to_string(),
718 "201".to_string(),
719 4.0,
720 );
721
722 ctx.add_course("1".to_string(), course1);
723 ctx.add_course("2".to_string(), course2);
724
725 let storage_keys = ctx.compute_storage_keys().unwrap();
726
727 assert_eq!(storage_keys.get("1"), Some(&"CS101".to_string()));
729 assert_eq!(storage_keys.get("2"), Some(&"CS201".to_string()));
730 }
731}