1use crate::error::{CsvError, Result};
21use hedl_core::{Document, Item, MatrixList, Node, Value};
22use hedl_core::lex::parse_expression_token;
23use hedl_core::lex::parse_tensor;
24use std::io::Read;
25
26pub const DEFAULT_MAX_ROWS: usize = 1_000_000;
55
56#[derive(Debug, Clone)]
128pub struct FromCsvConfig {
129 pub delimiter: u8,
136
137 pub has_headers: bool,
142
143 pub trim: bool,
148
149 pub max_rows: usize,
175
176 pub infer_schema: bool,
201
202 pub sample_rows: usize,
213
214 pub list_key: Option<String>,
269}
270
271impl Default for FromCsvConfig {
272 fn default() -> Self {
273 Self {
274 delimiter: b',',
275 has_headers: true,
276 trim: true,
277 max_rows: DEFAULT_MAX_ROWS,
278 infer_schema: false,
279 sample_rows: 100,
280 list_key: None,
281 }
282 }
283}
284
285pub fn from_csv(csv: &str, type_name: &str, schema: &[&str]) -> Result<Document> {
392 from_csv_with_config(csv, type_name, schema, FromCsvConfig::default())
393}
394
395pub fn from_csv_with_config(
456 csv: &str,
457 type_name: &str,
458 schema: &[&str],
459 config: FromCsvConfig,
460) -> Result<Document> {
461 from_csv_reader_with_config(csv.as_bytes(), type_name, schema, config)
462}
463
464pub fn from_csv_reader<R: Read>(
516 reader: R,
517 type_name: &str,
518 schema: &[&str],
519) -> Result<Document> {
520 from_csv_reader_with_config(reader, type_name, schema, FromCsvConfig::default())
521}
522
523#[derive(Debug, Clone, Copy, PartialEq, Eq)]
525enum ColumnType {
526 Null,
528 Bool,
530 Int,
532 Float,
534 String,
536}
537
538fn infer_column_type<'a, I>(values: I) -> ColumnType
562where
563 I: Iterator<Item = &'a str>,
564{
565 let mut all_null = true;
566 let mut all_bool = true;
567 let mut all_int = true;
568 let mut all_float = true;
569
570 for value in values {
571 let trimmed = value.trim();
572
573 if trimmed.is_empty() || trimmed == "~" || trimmed == "null" {
575 continue;
576 }
577
578 all_null = false;
579
580 if trimmed != "true" && trimmed != "false" {
582 all_bool = false;
583 }
584
585 if trimmed.parse::<i64>().is_err() {
587 all_int = false;
588 }
589
590 if trimmed.parse::<f64>().is_err() {
592 all_float = false;
593 }
594
595 if !all_bool && !all_int && !all_float {
597 return ColumnType::String;
598 }
599 }
600
601 if all_null {
603 ColumnType::Null
604 } else if all_bool {
605 ColumnType::Bool
606 } else if all_int {
607 ColumnType::Int
608 } else if all_float {
609 ColumnType::Float
610 } else {
611 ColumnType::String
612 }
613}
614
615fn infer_column_types(records: &[Vec<String>], sample_size: usize) -> Vec<ColumnType> {
637 if records.is_empty() {
638 return Vec::new();
639 }
640
641 let num_columns = records[0].len();
642 let sample_count = sample_size.min(records.len());
643
644 (0..num_columns)
645 .map(|col_idx| {
646 let column_values = records
647 .iter()
648 .take(sample_count)
649 .filter_map(|row| row.get(col_idx).map(|s| s.as_str()));
650
651 infer_column_type(column_values)
652 })
653 .collect()
654}
655
656fn parse_csv_value_with_type(field: &str, col_type: ColumnType) -> Result<Value> {
670 let trimmed = field.trim();
671
672 if trimmed.is_empty() || trimmed == "~" {
674 return Ok(Value::Null);
675 }
676
677 match col_type {
678 ColumnType::Null => Ok(Value::Null),
679 ColumnType::Bool => {
680 if trimmed == "true" {
681 Ok(Value::Bool(true))
682 } else if trimmed == "false" {
683 Ok(Value::Bool(false))
684 } else {
685 Ok(Value::String(field.to_string()))
687 }
688 }
689 ColumnType::Int => {
690 if let Ok(n) = trimmed.parse::<i64>() {
691 Ok(Value::Int(n))
692 } else {
693 Ok(Value::String(field.to_string()))
695 }
696 }
697 ColumnType::Float => {
698 if let Ok(f) = trimmed.parse::<f64>() {
699 Ok(Value::Float(f))
700 } else {
701 Ok(Value::String(field.to_string()))
703 }
704 }
705 ColumnType::String => {
706 parse_csv_value(field)
709 }
710 }
711}
712
713pub fn from_csv_reader_with_config<R: Read>(
775 reader: R,
776 type_name: &str,
777 schema: &[&str],
778 config: FromCsvConfig,
779) -> Result<Document> {
780 let mut csv_reader = csv::ReaderBuilder::new()
781 .delimiter(config.delimiter)
782 .has_headers(config.has_headers)
783 .trim(if config.trim {
784 csv::Trim::All
785 } else {
786 csv::Trim::None
787 })
788 .from_reader(reader);
789
790 let mut doc = Document::new((1, 0));
791
792 let mut full_schema = vec!["id".to_string()];
794 full_schema.extend(schema.iter().map(|s| s.to_string()));
795
796 doc.structs
798 .insert(type_name.to_string(), full_schema.clone());
799
800 let mut matrix_list = MatrixList::new(type_name, full_schema.clone());
802
803 let _inferred_types = if config.infer_schema {
805 let mut all_records = Vec::new();
807 for (record_idx, result) in csv_reader.records().enumerate() {
808 if record_idx >= config.max_rows {
810 return Err(CsvError::SecurityLimit {
811 limit: config.max_rows,
812 actual: record_idx + 1,
813 });
814 }
815
816 let record = result.map_err(|e| {
817 CsvError::ParseError {
818 line: record_idx + 1,
819 message: e.to_string(),
820 }
821 })?;
822
823 if record.is_empty() {
824 continue;
825 }
826
827 let row: Vec<String> = record.iter().map(|s| s.to_string()).collect();
829 all_records.push(row);
830 }
831
832 let types = infer_column_types(&all_records, config.sample_rows);
834
835 for (record_idx, row) in all_records.iter().enumerate() {
837 let id = row.first().ok_or_else(|| {
839 CsvError::MissingColumn("id".to_string())
840 })?;
841
842 if id.is_empty() {
843 return Err(CsvError::EmptyId {
844 row: record_idx + 1,
845 });
846 }
847
848 let mut fields = Vec::new();
850 for (field_idx, field) in row.iter().enumerate() {
851 let col_type = types.get(field_idx).copied().unwrap_or(ColumnType::String);
852 let value = parse_csv_value_with_type(field, col_type).map_err(|e| {
853 e.with_context(format!(
854 "in column '{}' at line {}",
855 full_schema.get(field_idx).unwrap_or(&"unknown".to_string()),
856 record_idx + 1
857 ))
858 })?;
859 fields.push(value);
860 }
861
862 if fields.len() != full_schema.len() {
864 return Err(CsvError::WidthMismatch {
865 expected: full_schema.len(),
866 actual: fields.len(),
867 row: record_idx + 1,
868 });
869 }
870
871 let node = Node::new(type_name, id, fields);
872 matrix_list.add_row(node);
873 }
874
875 types
876 } else {
877 for (record_idx, result) in csv_reader.records().enumerate() {
879 if record_idx >= config.max_rows {
881 return Err(CsvError::SecurityLimit {
882 limit: config.max_rows,
883 actual: record_idx + 1,
884 });
885 }
886
887 let record = result.map_err(|e| {
888 CsvError::ParseError {
889 line: record_idx + 1,
890 message: e.to_string(),
891 }
892 })?;
893
894 if record.is_empty() {
895 continue;
896 }
897
898 let id = record.get(0).ok_or_else(|| {
900 CsvError::MissingColumn("id".to_string())
901 })?;
902
903 if id.is_empty() {
904 return Err(CsvError::EmptyId {
905 row: record_idx + 1,
906 });
907 }
908
909 let mut fields = Vec::new();
911 for (field_idx, field) in record.iter().enumerate() {
912 let value = parse_csv_value(field).map_err(|e| {
913 e.with_context(format!(
914 "in column '{}' at line {}",
915 full_schema.get(field_idx).unwrap_or(&"unknown".to_string()),
916 record_idx + 1
917 ))
918 })?;
919 fields.push(value);
920 }
921
922 if fields.len() != full_schema.len() {
924 return Err(CsvError::WidthMismatch {
925 expected: full_schema.len(),
926 actual: fields.len(),
927 row: record_idx + 1,
928 });
929 }
930
931 let node = Node::new(type_name, id, fields);
932 matrix_list.add_row(node);
933 }
934
935 Vec::new()
936 };
937
938 let list_key = config
940 .list_key
941 .unwrap_or_else(|| format!("{}s", type_name.to_lowercase()));
942
943 doc.root.insert(list_key, Item::List(matrix_list));
944
945 Ok(doc)
946}
947
948fn parse_csv_value(field: &str) -> Result<Value> {
959 let trimmed = field.trim();
960
961 if trimmed.is_empty() || trimmed == "~" {
963 return Ok(Value::Null);
964 }
965
966 if trimmed == "true" {
968 return Ok(Value::Bool(true));
969 }
970 if trimmed == "false" {
971 return Ok(Value::Bool(false));
972 }
973
974 match trimmed {
976 "NaN" => return Ok(Value::Float(f64::NAN)),
977 "Infinity" => return Ok(Value::Float(f64::INFINITY)),
978 "-Infinity" => return Ok(Value::Float(f64::NEG_INFINITY)),
979 _ => {}
980 }
981
982 if trimmed.starts_with('@') {
984 return parse_reference(trimmed);
985 }
986
987 if trimmed.starts_with("$(") && trimmed.ends_with(')') {
989 let expr = parse_expression_token(trimmed).map_err(|e| {
990 CsvError::ParseError {
991 line: 0,
992 message: format!("Invalid expression: {}", e),
993 }
994 })?;
995 return Ok(Value::Expression(expr));
996 }
997
998 if let Ok(n) = trimmed.parse::<i64>() {
1000 return Ok(Value::Int(n));
1001 }
1002
1003 if let Ok(f) = trimmed.parse::<f64>() {
1005 return Ok(Value::Float(f));
1006 }
1007
1008 if trimmed.starts_with('[') && trimmed.ends_with(']') {
1010 if let Ok(tensor) = parse_tensor(trimmed) {
1011 return Ok(Value::Tensor(tensor));
1012 }
1013 }
1015
1016 Ok(Value::String(field.to_string()))
1018}
1019
1020fn parse_reference(s: &str) -> Result<Value> {
1022 let without_at = &s[1..];
1023
1024 if let Some(colon_pos) = without_at.find(':') {
1025 let type_name = &without_at[..colon_pos];
1027 let id = &without_at[colon_pos + 1..];
1028
1029 if type_name.is_empty() || id.is_empty() {
1030 return Err(CsvError::ParseError {
1031 line: 0,
1032 message: format!("Invalid reference format: {}", s),
1033 });
1034 }
1035
1036 Ok(Value::Reference(hedl_core::Reference::qualified(
1037 type_name, id,
1038 )))
1039 } else {
1040 if without_at.is_empty() {
1042 return Err(CsvError::ParseError {
1043 line: 0,
1044 message: "Empty reference ID".to_string(),
1045 });
1046 }
1047
1048 Ok(Value::Reference(hedl_core::Reference::local(without_at)))
1049 }
1050}
1051
1052#[cfg(test)]
1053mod tests {
1054 use super::*;
1055 use hedl_core::lex::Tensor;
1056 use hedl_test::expr_value;
1057
1058 #[test]
1061 fn test_from_csv_config_default() {
1062 let config = FromCsvConfig::default();
1063 assert_eq!(config.delimiter, b',');
1064 assert!(config.has_headers);
1065 assert!(config.trim);
1066 assert_eq!(config.max_rows, DEFAULT_MAX_ROWS);
1067 }
1068
1069 #[test]
1070 fn test_from_csv_config_debug() {
1071 let config = FromCsvConfig::default();
1072 let debug = format!("{:?}", config);
1073 assert!(debug.contains("FromCsvConfig"));
1074 assert!(debug.contains("delimiter"));
1075 assert!(debug.contains("has_headers"));
1076 assert!(debug.contains("trim"));
1077 }
1078
1079 #[test]
1080 fn test_from_csv_config_clone() {
1081 let config = FromCsvConfig {
1082 delimiter: b'\t',
1083 has_headers: false,
1084 trim: false,
1085 max_rows: 500_000,
1086 infer_schema: false,
1087 sample_rows: 100,
1088 list_key: None,
1089 };
1090 let cloned = config.clone();
1091 assert_eq!(cloned.delimiter, b'\t');
1092 assert!(!cloned.has_headers);
1093 assert!(!cloned.trim);
1094 assert_eq!(cloned.max_rows, 500_000);
1095 assert!(!cloned.infer_schema);
1096 assert_eq!(cloned.sample_rows, 100);
1097 assert_eq!(cloned.list_key, None);
1098 }
1099
1100 #[test]
1101 fn test_from_csv_config_all_options() {
1102 let config = FromCsvConfig {
1103 delimiter: b';',
1104 has_headers: true,
1105 trim: true,
1106 max_rows: 2_000_000,
1107 infer_schema: true,
1108 sample_rows: 200,
1109 list_key: Some("custom".to_string()),
1110 };
1111 assert_eq!(config.delimiter, b';');
1112 assert!(config.has_headers);
1113 assert!(config.trim);
1114 assert_eq!(config.max_rows, 2_000_000);
1115 assert!(config.infer_schema);
1116 assert_eq!(config.sample_rows, 200);
1117 assert_eq!(config.list_key, Some("custom".to_string()));
1118 }
1119
1120 #[test]
1121 fn test_max_rows_limit_enforcement() {
1122 let mut csv_data = String::from("id,value\n");
1124 let max_rows = 100;
1125 for i in 0..=max_rows {
1126 csv_data.push_str(&format!("{},test{}\n", i, i));
1127 }
1128
1129 let config = FromCsvConfig {
1130 max_rows,
1131 infer_schema: false,
1132 sample_rows: 100,
1133 ..Default::default()
1134 };
1135
1136 let result = from_csv_with_config(&csv_data, "Item", &["value"], config);
1137 assert!(result.is_err());
1138 let err = result.unwrap_err();
1139 assert!(matches!(err, CsvError::SecurityLimit { .. }));
1140 assert!(err.to_string().contains("Security limit"));
1141 assert!(err.to_string().contains(&max_rows.to_string()));
1142 }
1143
1144 #[test]
1145 fn test_max_rows_limit_not_exceeded() {
1146 let mut csv_data = String::from("id,value\n");
1148 let max_rows = 100;
1149 for i in 0..(max_rows - 1) {
1150 csv_data.push_str(&format!("{},test{}\n", i, i));
1151 }
1152
1153 let config = FromCsvConfig {
1154 max_rows,
1155 infer_schema: false,
1156 sample_rows: 100,
1157 ..Default::default()
1158 };
1159
1160 let result = from_csv_with_config(&csv_data, "Item", &["value"], config);
1161 assert!(result.is_ok());
1162 let doc = result.unwrap();
1163 let list = doc.get("items").unwrap().as_list().unwrap();
1164 assert_eq!(list.rows.len(), max_rows - 1);
1165 }
1166
1167 #[test]
1170 fn test_from_csv_basic() {
1171 let csv_data = "id,name,age,active\n1,Alice,30,true\n2,Bob,25,false\n";
1172 let doc = from_csv(csv_data, "Person", &["name", "age", "active"]).unwrap();
1173
1174 assert_eq!(doc.version, (1, 0));
1176
1177 let schema = doc.get_schema("Person").unwrap();
1179 assert_eq!(schema, &["id", "name", "age", "active"]);
1180
1181 let item = doc.get("persons").unwrap();
1183 let list = item.as_list().unwrap();
1184 assert_eq!(list.type_name, "Person");
1185 assert_eq!(list.rows.len(), 2);
1186
1187 let row1 = &list.rows[0];
1189 assert_eq!(row1.id, "1");
1190 assert_eq!(row1.fields.len(), schema.len()); assert_eq!(row1.fields[0], Value::Int(1)); assert_eq!(row1.fields[1], Value::String("Alice".to_string()));
1193 assert_eq!(row1.fields[2], Value::Int(30));
1194 assert_eq!(row1.fields[3], Value::Bool(true));
1195
1196 let row2 = &list.rows[1];
1198 assert_eq!(row2.id, "2");
1199 assert_eq!(row2.fields.len(), schema.len()); assert_eq!(row2.fields[0], Value::Int(2)); assert_eq!(row2.fields[1], Value::String("Bob".to_string()));
1202 assert_eq!(row2.fields[2], Value::Int(25));
1203 assert_eq!(row2.fields[3], Value::Bool(false));
1204 }
1205
1206 #[test]
1207 fn test_from_csv_without_headers() {
1208 let csv_data = "1,Alice,30\n2,Bob,25\n";
1209 let config = FromCsvConfig {
1210 has_headers: false,
1211 ..Default::default()
1212 };
1213 let doc = from_csv_with_config(csv_data, "Person", &["name", "age"], config).unwrap();
1214
1215 let item = doc.get("persons").unwrap();
1216 let list = item.as_list().unwrap();
1217 assert_eq!(list.rows.len(), 2);
1218 }
1219
1220 #[test]
1221 fn test_from_csv_custom_delimiter() {
1222 let csv_data = "id\tname\tage\n1\tAlice\t30\n2\tBob\t25\n";
1223 let config = FromCsvConfig {
1224 delimiter: b'\t',
1225 ..Default::default()
1226 };
1227 let doc = from_csv_with_config(csv_data, "Person", &["name", "age"], config).unwrap();
1228
1229 let item = doc.get("persons").unwrap();
1230 let list = item.as_list().unwrap();
1231 assert_eq!(list.rows.len(), 2);
1232 }
1233
1234 #[test]
1235 fn test_from_csv_semicolon_delimiter() {
1236 let csv_data = "id;name;age\n1;Alice;30\n";
1237 let config = FromCsvConfig {
1238 delimiter: b';',
1239 ..Default::default()
1240 };
1241 let doc = from_csv_with_config(csv_data, "Person", &["name", "age"], config).unwrap();
1242
1243 let item = doc.get("persons").unwrap();
1244 let list = item.as_list().unwrap();
1245 assert_eq!(list.rows.len(), 1);
1246 assert_eq!(list.rows[0].fields[1], Value::String("Alice".to_string()));
1247 }
1248
1249 #[test]
1250 fn test_from_csv_empty_file() {
1251 let csv_data = "id,name\n";
1252 let doc = from_csv(csv_data, "Person", &["name"]).unwrap();
1253
1254 let item = doc.get("persons").unwrap();
1255 let list = item.as_list().unwrap();
1256 assert!(list.rows.is_empty());
1257 }
1258
1259 #[test]
1260 fn test_from_csv_single_row() {
1261 let csv_data = "id,name\n1,Alice\n";
1262 let doc = from_csv(csv_data, "Person", &["name"]).unwrap();
1263
1264 let item = doc.get("persons").unwrap();
1265 let list = item.as_list().unwrap();
1266 assert_eq!(list.rows.len(), 1);
1267 }
1268
1269 #[test]
1272 fn test_parse_csv_value_null_empty() {
1273 assert_eq!(parse_csv_value("").unwrap(), Value::Null);
1274 }
1275
1276 #[test]
1277 fn test_parse_csv_value_null_tilde() {
1278 assert_eq!(parse_csv_value("~").unwrap(), Value::Null);
1279 }
1280
1281 #[test]
1282 fn test_parse_csv_value_null_whitespace() {
1283 assert_eq!(parse_csv_value(" ").unwrap(), Value::Null);
1284 }
1285
1286 #[test]
1287 fn test_parse_csv_value_bool_true() {
1288 assert_eq!(parse_csv_value("true").unwrap(), Value::Bool(true));
1289 }
1290
1291 #[test]
1292 fn test_parse_csv_value_bool_false() {
1293 assert_eq!(parse_csv_value("false").unwrap(), Value::Bool(false));
1294 }
1295
1296 #[test]
1297 fn test_parse_csv_value_int_positive() {
1298 assert_eq!(parse_csv_value("42").unwrap(), Value::Int(42));
1299 }
1300
1301 #[test]
1302 fn test_parse_csv_value_int_negative() {
1303 assert_eq!(parse_csv_value("-123").unwrap(), Value::Int(-123));
1304 }
1305
1306 #[test]
1307 fn test_parse_csv_value_int_zero() {
1308 assert_eq!(parse_csv_value("0").unwrap(), Value::Int(0));
1309 }
1310
1311 #[test]
1312 fn test_parse_csv_value_int_large() {
1313 assert_eq!(
1314 parse_csv_value("9223372036854775807").unwrap(),
1315 Value::Int(i64::MAX)
1316 );
1317 }
1318
1319 #[test]
1320 fn test_parse_csv_value_float_positive() {
1321 assert_eq!(parse_csv_value("3.25").unwrap(), Value::Float(3.25));
1322 }
1323
1324 #[test]
1325 fn test_parse_csv_value_float_negative() {
1326 assert_eq!(parse_csv_value("-2.5").unwrap(), Value::Float(-2.5));
1327 }
1328
1329 #[test]
1330 fn test_parse_csv_value_float_zero() {
1331 assert_eq!(parse_csv_value("0.0").unwrap(), Value::Float(0.0));
1332 }
1333
1334 #[test]
1335 fn test_parse_csv_value_float_scientific() {
1336 let val = parse_csv_value("1.5e10").unwrap();
1337 if let Value::Float(f) = val {
1338 assert!((f - 1.5e10).abs() < 1e5);
1339 } else {
1340 panic!("Expected float");
1341 }
1342 }
1343
1344 #[test]
1345 fn test_parse_csv_value_string() {
1346 assert_eq!(
1347 parse_csv_value("hello").unwrap(),
1348 Value::String("hello".to_string())
1349 );
1350 }
1351
1352 #[test]
1353 fn test_parse_csv_value_string_with_spaces() {
1354 assert_eq!(
1355 parse_csv_value(" hello world ").unwrap(),
1356 Value::String(" hello world ".to_string())
1357 );
1358 }
1359
1360 #[test]
1361 fn test_parse_csv_value_string_numeric_looking() {
1362 assert_eq!(
1364 parse_csv_value("007").unwrap(),
1365 Value::Int(7) );
1367 }
1368
1369 #[test]
1372 fn test_parse_csv_value_nan() {
1373 let nan = parse_csv_value("NaN").unwrap();
1374 assert!(matches!(nan, Value::Float(f) if f.is_nan()));
1375 }
1376
1377 #[test]
1378 fn test_parse_csv_value_infinity() {
1379 let inf = parse_csv_value("Infinity").unwrap();
1380 assert_eq!(inf, Value::Float(f64::INFINITY));
1381 }
1382
1383 #[test]
1384 fn test_parse_csv_value_neg_infinity() {
1385 let neg_inf = parse_csv_value("-Infinity").unwrap();
1386 assert_eq!(neg_inf, Value::Float(f64::NEG_INFINITY));
1387 }
1388
1389 #[test]
1392 fn test_parse_csv_value_reference_local() {
1393 let ref_val = parse_csv_value("@user1").unwrap();
1394 if let Value::Reference(r) = ref_val {
1395 assert_eq!(r.id, "user1");
1396 assert_eq!(r.type_name, None);
1397 } else {
1398 panic!("Expected reference");
1399 }
1400 }
1401
1402 #[test]
1403 fn test_parse_csv_value_reference_qualified() {
1404 let ref_val = parse_csv_value("@User:user1").unwrap();
1405 if let Value::Reference(r) = ref_val {
1406 assert_eq!(r.id, "user1");
1407 assert_eq!(r.type_name, Some("User".to_string()));
1408 } else {
1409 panic!("Expected reference");
1410 }
1411 }
1412
1413 #[test]
1414 fn test_parse_csv_value_reference_with_dashes() {
1415 let ref_val = parse_csv_value("@my-item-123").unwrap();
1416 if let Value::Reference(r) = ref_val {
1417 assert_eq!(r.id, "my-item-123");
1418 } else {
1419 panic!("Expected reference");
1420 }
1421 }
1422
1423 #[test]
1424 fn test_parse_reference_empty_error() {
1425 let result = parse_reference("@");
1426 assert!(result.is_err());
1427 assert!(result
1428 .unwrap_err()
1429 .to_string()
1430 .contains("Empty reference ID"));
1431 }
1432
1433 #[test]
1434 fn test_parse_reference_empty_type_error() {
1435 let result = parse_reference("@:id");
1436 assert!(result.is_err());
1437 assert!(result
1438 .unwrap_err()
1439 .to_string()
1440 .contains("Invalid reference format"));
1441 }
1442
1443 #[test]
1444 fn test_parse_reference_empty_id_error() {
1445 let result = parse_reference("@Type:");
1446 assert!(result.is_err());
1447 assert!(result
1448 .unwrap_err()
1449 .to_string()
1450 .contains("Invalid reference format"));
1451 }
1452
1453 #[test]
1456 fn test_parse_csv_value_expression_identifier() {
1457 let expr = parse_csv_value("$(foo)").unwrap();
1458 assert_eq!(expr, expr_value("foo"));
1459 }
1460
1461 #[test]
1462 fn test_parse_csv_value_expression_call() {
1463 let expr = parse_csv_value("$(add(x, y))").unwrap();
1464 assert_eq!(expr, expr_value("add(x, y)"));
1465 }
1466
1467 #[test]
1468 fn test_parse_csv_value_expression_nested() {
1469 let expr = parse_csv_value("$(outer(inner(x)))").unwrap();
1470 if let Value::Expression(e) = expr {
1471 assert_eq!(e.to_string(), "outer(inner(x))");
1472 } else {
1473 panic!("Expected expression");
1474 }
1475 }
1476
1477 #[test]
1480 fn test_parse_csv_value_tensor_1d() {
1481 let val = parse_csv_value("[1, 2, 3]").unwrap();
1482 if let Value::Tensor(Tensor::Array(arr)) = val {
1483 assert_eq!(arr.len(), 3);
1484 } else {
1485 panic!("Expected tensor array");
1486 }
1487 }
1488
1489 #[test]
1490 fn test_parse_csv_value_tensor_2d() {
1491 let val = parse_csv_value("[[1, 2], [3, 4]]").unwrap();
1492 if let Value::Tensor(Tensor::Array(outer)) = val {
1493 assert_eq!(outer.len(), 2);
1494 if let Tensor::Array(inner) = &outer[0] {
1495 assert_eq!(inner.len(), 2);
1496 } else {
1497 panic!("Expected nested array");
1498 }
1499 } else {
1500 panic!("Expected tensor array");
1501 }
1502 }
1503
1504 #[test]
1505 fn test_parse_csv_value_tensor_empty_is_string() {
1506 let val = parse_csv_value("[]").unwrap();
1509 assert_eq!(val, Value::String("[]".to_string()));
1510 }
1511
1512 #[test]
1515 fn test_empty_id_error() {
1516 let csv_data = "id,name\n,Alice\n";
1517 let result = from_csv(csv_data, "Person", &["name"]);
1518 assert!(result.is_err());
1519 assert!(matches!(result.unwrap_err(), CsvError::EmptyId { .. }));
1520 }
1521
1522 #[test]
1523 fn test_mismatched_field_count() {
1524 let csv_data = "id,name,age\n1,Alice\n";
1525 let result = from_csv(csv_data, "Person", &["name", "age"]);
1526 assert!(result.is_err());
1527 assert!(matches!(result.unwrap_err(), CsvError::ParseError { .. }));
1529 }
1530
1531 #[test]
1534 fn test_whitespace_trimming_enabled() {
1535 let csv_data = "id,name,age\n1, Alice , 30 \n";
1536 let doc = from_csv(csv_data, "Person", &["name", "age"]).unwrap();
1537
1538 let item = doc.get("persons").unwrap();
1539 let list = item.as_list().unwrap();
1540 let row = &list.rows[0];
1541
1542 assert_eq!(row.fields[0], Value::Int(1)); assert_eq!(row.fields[1], Value::String("Alice".to_string()));
1544 assert_eq!(row.fields[2], Value::Int(30));
1545 }
1546
1547 #[test]
1548 fn test_whitespace_trimming_disabled() {
1549 let csv_data = "id,name\n1, Alice \n";
1550 let config = FromCsvConfig {
1551 trim: false,
1552 ..Default::default()
1553 };
1554 let doc = from_csv_with_config(csv_data, "Person", &["name"], config).unwrap();
1555
1556 let item = doc.get("persons").unwrap();
1557 let list = item.as_list().unwrap();
1558 assert_eq!(
1560 list.rows[0].fields[1],
1561 Value::String(" Alice ".to_string())
1562 );
1563 }
1564
1565 #[test]
1568 fn test_from_csv_reader_basic() {
1569 let csv_data = "id,name\n1,Alice\n".as_bytes();
1570 let doc = from_csv_reader(csv_data, "Person", &["name"]).unwrap();
1571
1572 let item = doc.get("persons").unwrap();
1573 let list = item.as_list().unwrap();
1574 assert_eq!(list.rows.len(), 1);
1575 }
1576
1577 #[test]
1578 fn test_from_csv_reader_with_config() {
1579 let csv_data = "1\tAlice\n".as_bytes();
1580 let config = FromCsvConfig {
1581 delimiter: b'\t',
1582 has_headers: false,
1583 trim: true,
1584 ..Default::default()
1585 };
1586 let doc = from_csv_reader_with_config(csv_data, "Person", &["name"], config).unwrap();
1587
1588 let item = doc.get("persons").unwrap();
1589 let list = item.as_list().unwrap();
1590 assert_eq!(list.rows.len(), 1);
1591 }
1592
1593 #[test]
1596 fn test_type_naming_singularization() {
1597 let csv_data = "id,name\n1,Alice\n";
1598 let doc = from_csv(csv_data, "User", &["name"]).unwrap();
1599
1600 let item = doc.get("users").unwrap();
1602 let list = item.as_list().unwrap();
1603 assert_eq!(list.type_name, "User");
1604 }
1605
1606 #[test]
1609 fn test_quoted_fields() {
1610 let csv_data = "id,name,bio\n1,Alice,\"Hello, World\"\n";
1611 let doc = from_csv(csv_data, "Person", &["name", "bio"]).unwrap();
1612
1613 let item = doc.get("persons").unwrap();
1614 let list = item.as_list().unwrap();
1615 assert_eq!(
1616 list.rows[0].fields[2],
1617 Value::String("Hello, World".to_string())
1618 );
1619 }
1620
1621 #[test]
1622 fn test_quoted_fields_with_newline() {
1623 let csv_data = "id,name,bio\n1,Alice,\"Line 1\nLine 2\"\n";
1624 let doc = from_csv(csv_data, "Person", &["name", "bio"]).unwrap();
1625
1626 let item = doc.get("persons").unwrap();
1627 let list = item.as_list().unwrap();
1628 assert_eq!(
1629 list.rows[0].fields[2],
1630 Value::String("Line 1\nLine 2".to_string())
1631 );
1632 }
1633
1634 #[test]
1635 fn test_quoted_fields_with_quotes() {
1636 let csv_data = "id,name\n1,\"Alice \"\"Bob\"\" Smith\"\n";
1637 let doc = from_csv(csv_data, "Person", &["name"]).unwrap();
1638
1639 let item = doc.get("persons").unwrap();
1640 let list = item.as_list().unwrap();
1641 assert_eq!(
1642 list.rows[0].fields[1],
1643 Value::String("Alice \"Bob\" Smith".to_string())
1644 );
1645 }
1646
1647 #[test]
1650 fn test_unicode_values() {
1651 let csv_data = "id,name\n1,héllo 世界\n";
1652 let doc = from_csv(csv_data, "Person", &["name"]).unwrap();
1653
1654 let item = doc.get("persons").unwrap();
1655 let list = item.as_list().unwrap();
1656 assert_eq!(
1657 list.rows[0].fields[1],
1658 Value::String("héllo 世界".to_string())
1659 );
1660 }
1661
1662 #[test]
1663 fn test_string_id() {
1664 let csv_data = "id,name\nabc,Alice\n";
1665 let doc = from_csv(csv_data, "Person", &["name"]).unwrap();
1666
1667 let item = doc.get("persons").unwrap();
1668 let list = item.as_list().unwrap();
1669 assert_eq!(list.rows[0].id, "abc");
1670 assert_eq!(list.rows[0].fields[0], Value::String("abc".to_string()));
1671 }
1672
1673 #[test]
1674 fn test_many_columns() {
1675 let csv_data = "id,a,b,c,d,e\n1,2,3,4,5,6\n";
1676 let doc = from_csv(csv_data, "Item", &["a", "b", "c", "d", "e"]).unwrap();
1677
1678 let item = doc.get("items").unwrap();
1679 let list = item.as_list().unwrap();
1680 assert_eq!(list.schema.len(), 6); assert_eq!(list.rows[0].fields.len(), 6);
1682 }
1683
1684 #[test]
1687 fn test_custom_list_key_basic() {
1688 let csv_data = "id,name\n1,Alice\n";
1689 let config = FromCsvConfig {
1690 list_key: Some("people".to_string()),
1691 ..Default::default()
1692 };
1693 let doc = from_csv_with_config(csv_data, "Person", &["name"], config).unwrap();
1694
1695 assert!(doc.get("people").is_some());
1697 assert!(doc.get("persons").is_none());
1699
1700 let list = doc.get("people").unwrap().as_list().unwrap();
1701 assert_eq!(list.type_name, "Person");
1702 assert_eq!(list.rows.len(), 1);
1703 }
1704
1705 #[test]
1706 fn test_custom_list_key_irregular_plurals() {
1707 let test_cases = vec![
1709 ("Person", "people"),
1710 ("Child", "children"),
1711 ("Tooth", "teeth"),
1712 ("Foot", "feet"),
1713 ("Mouse", "mice"),
1714 ("Goose", "geese"),
1715 ("Man", "men"),
1716 ("Woman", "women"),
1717 ("Ox", "oxen"),
1718 ("Datum", "data"),
1719 ];
1720
1721 for (type_name, plural) in test_cases {
1722 let csv_data = format!("id,value\n1,test\n");
1723 let config = FromCsvConfig {
1724 list_key: Some(plural.to_string()),
1725 ..Default::default()
1726 };
1727 let doc = from_csv_with_config(&csv_data, type_name, &["value"], config).unwrap();
1728
1729 assert!(
1730 doc.get(plural).is_some(),
1731 "Failed to find {} for type {}",
1732 plural,
1733 type_name
1734 );
1735 }
1736 }
1737
1738 #[test]
1739 fn test_custom_list_key_collective_nouns() {
1740 let csv_data = "id,value\n1,42\n";
1741
1742 let test_cases = vec![
1744 ("Data", "dataset"),
1745 ("Information", "info_collection"),
1746 ("Equipment", "gear"),
1747 ("Furniture", "furnishings"),
1748 ];
1749
1750 for (type_name, collective) in test_cases {
1751 let config = FromCsvConfig {
1752 list_key: Some(collective.to_string()),
1753 ..Default::default()
1754 };
1755 let doc = from_csv_with_config(&csv_data, type_name, &["value"], config).unwrap();
1756
1757 assert!(
1758 doc.get(collective).is_some(),
1759 "Failed to find {} for type {}",
1760 collective,
1761 type_name
1762 );
1763 }
1764 }
1765
1766 #[test]
1767 fn test_custom_list_key_case_sensitive() {
1768 let csv_data = "id,value\n1,test\n";
1769 let config = FromCsvConfig {
1770 list_key: Some("MyCustomList".to_string()),
1771 ..Default::default()
1772 };
1773 let doc = from_csv_with_config(&csv_data, "Item", &["value"], config).unwrap();
1774
1775 assert!(doc.get("MyCustomList").is_some());
1777 assert!(doc.get("mycustomlist").is_none());
1779 assert!(doc.get("items").is_none());
1780 }
1781
1782 #[test]
1783 fn test_custom_list_key_empty_string() {
1784 let csv_data = "id,value\n1,test\n";
1786 let config = FromCsvConfig {
1787 list_key: Some("".to_string()),
1788 ..Default::default()
1789 };
1790 let doc = from_csv_with_config(&csv_data, "Item", &["value"], config).unwrap();
1791
1792 assert!(doc.get("").is_some());
1793 }
1794
1795 #[test]
1796 fn test_custom_list_key_with_special_chars() {
1797 let csv_data = "id,value\n1,test\n";
1798 let config = FromCsvConfig {
1799 list_key: Some("my-custom_list.v2".to_string()),
1800 ..Default::default()
1801 };
1802 let doc = from_csv_with_config(&csv_data, "Item", &["value"], config).unwrap();
1803
1804 assert!(doc.get("my-custom_list.v2").is_some());
1805 }
1806
1807 #[test]
1808 fn test_custom_list_key_unicode() {
1809 let csv_data = "id,value\n1,test\n";
1810 let config = FromCsvConfig {
1811 list_key: Some("人々".to_string()), ..Default::default()
1813 };
1814 let doc = from_csv_with_config(&csv_data, "Person", &["value"], config).unwrap();
1815
1816 assert!(doc.get("人々").is_some());
1817 }
1818
1819 #[test]
1820 fn test_custom_list_key_with_schema_inference() {
1821 let csv_data = "id,value\n1,42\n2,43\n3,44\n";
1822 let config = FromCsvConfig {
1823 list_key: Some("people".to_string()),
1824 infer_schema: true,
1825 sample_rows: 10,
1826 ..Default::default()
1827 };
1828 let doc = from_csv_with_config(&csv_data, "Person", &["value"], config).unwrap();
1829
1830 assert!(doc.get("people").is_some());
1831 let list = doc.get("people").unwrap().as_list().unwrap();
1832 assert_eq!(list.rows.len(), 3);
1833 assert_eq!(list.rows[0].fields[1], Value::Int(42));
1835 }
1836
1837 #[test]
1838 fn test_custom_list_key_none_uses_default() {
1839 let csv_data = "id,name\n1,Alice\n";
1840 let config = FromCsvConfig {
1841 list_key: None,
1842 ..Default::default()
1843 };
1844 let doc = from_csv_with_config(csv_data, "Person", &["name"], config).unwrap();
1845
1846 assert!(doc.get("persons").is_some());
1848 assert!(doc.get("people").is_none());
1849 }
1850
1851 #[test]
1852 fn test_custom_list_key_default_config() {
1853 let csv_data = "id,name\n1,Alice\n";
1854 let doc = from_csv(csv_data, "User", &["name"]).unwrap();
1855
1856 assert!(doc.get("users").is_some());
1858 }
1859
1860 #[test]
1861 fn test_custom_list_key_preserves_type_name() {
1862 let csv_data = "id,name\n1,Alice\n";
1863 let config = FromCsvConfig {
1864 list_key: Some("people".to_string()),
1865 ..Default::default()
1866 };
1867 let doc = from_csv_with_config(csv_data, "Person", &["name"], config).unwrap();
1868
1869 let list = doc.get("people").unwrap().as_list().unwrap();
1870 assert_eq!(list.type_name, "Person");
1872 }
1873
1874 #[test]
1875 fn test_custom_list_key_with_multiple_types() {
1876 let csv1 = "id,name\n1,Alice\n";
1878 let config1 = FromCsvConfig {
1879 list_key: Some("people".to_string()),
1880 ..Default::default()
1881 };
1882 let doc1 = from_csv_with_config(csv1, "Person", &["name"], config1).unwrap();
1883
1884 let csv2 = "id,name\n1,Fluffy\n";
1885 let config2 = FromCsvConfig {
1886 list_key: Some("mice".to_string()),
1887 ..Default::default()
1888 };
1889 let doc2 = from_csv_with_config(csv2, "Mouse", &["name"], config2).unwrap();
1890
1891 assert!(doc1.get("people").is_some());
1892 assert!(doc1.get("persons").is_none());
1893
1894 assert!(doc2.get("mice").is_some());
1895 assert!(doc2.get("mouses").is_none());
1896 }
1897
1898 #[test]
1899 fn test_custom_list_key_numbers_in_name() {
1900 let csv_data = "id,value\n1,test\n";
1901 let config = FromCsvConfig {
1902 list_key: Some("items_v2".to_string()),
1903 ..Default::default()
1904 };
1905 let doc = from_csv_with_config(&csv_data, "Item", &["value"], config).unwrap();
1906
1907 assert!(doc.get("items_v2").is_some());
1908 }
1909
1910 #[test]
1911 fn test_custom_list_key_round_trip_compatibility() {
1912 let csv_data = "id,name\n1,Alice\n2,Bob\n";
1914 let config = FromCsvConfig {
1915 list_key: Some("people".to_string()),
1916 ..Default::default()
1917 };
1918 let doc = from_csv_with_config(csv_data, "Person", &["name"], config).unwrap();
1919
1920 use crate::to_csv_list;
1922 let exported_csv = to_csv_list(&doc, "people").unwrap();
1923 assert!(exported_csv.contains("Alice"));
1924 assert!(exported_csv.contains("Bob"));
1925
1926 assert!(to_csv_list(&doc, "persons").is_err());
1928 }
1929
1930 #[test]
1931 fn test_from_csv_config_clone_with_list_key() {
1932 let config = FromCsvConfig {
1933 delimiter: b',',
1934 has_headers: true,
1935 trim: true,
1936 max_rows: 1000,
1937 infer_schema: false,
1938 sample_rows: 50,
1939 list_key: Some("people".to_string()),
1940 };
1941 let cloned = config.clone();
1942 assert_eq!(cloned.list_key, Some("people".to_string()));
1943 }
1944
1945 #[test]
1946 fn test_from_csv_config_debug_with_list_key() {
1947 let config = FromCsvConfig {
1948 list_key: Some("people".to_string()),
1949 ..Default::default()
1950 };
1951 let debug = format!("{:?}", config);
1952 assert!(debug.contains("list_key"));
1953 assert!(debug.contains("people"));
1954 }
1955}