1use crate::error::MetadataError;
7use dtt::datetime::DateTime;
8use regex::Regex;
9use serde_json::Value as JsonValue;
10use std::collections::HashMap;
11use toml::Value as TomlValue;
12
13#[derive(Debug, Default, Clone)]
27pub struct Metadata {
28 inner: HashMap<String, String>,
30}
31
32impl Metadata {
33 pub fn new(data: HashMap<String, String>) -> Self {
43 Metadata { inner: data }
44 }
45
46 pub fn get(&self, key: &str) -> Option<&String> {
56 self.inner.get(key)
57 }
58
59 pub fn insert(
70 &mut self,
71 key: String,
72 value: String,
73 ) -> Option<String> {
74 self.inner.insert(key, value)
75 }
76
77 pub fn contains_key(&self, key: &str) -> bool {
87 self.inner.contains_key(key)
88 }
89
90 pub fn into_inner(self) -> HashMap<String, String> {
96 self.inner
97 }
98}
99
100pub fn extract_metadata(
116 content: &str,
117) -> Result<Metadata, MetadataError> {
118 extract_yaml_metadata(content)
119 .or_else(|| extract_toml_metadata(content))
120 .or_else(|| extract_json_metadata(content))
121 .ok_or_else(|| MetadataError::ExtractionError {
122 message: "No valid front matter found.".to_string(),
123 })
124}
125
126fn extract_yaml_metadata(content: &str) -> Option<Metadata> {
136 let re = Regex::new(r"(?s)^\s*---\s*\n(.*?)\n\s*---\s*").ok()?;
137 let captures = re.captures(content)?;
138
139 let yaml_str = captures.get(1)?.as_str().trim();
140
141 let yaml_value: serde_yml::Value =
142 serde_yml::from_str(yaml_str).ok()?;
143
144 let metadata: HashMap<String, String> = flatten_yaml(&yaml_value);
145
146 Some(Metadata::new(metadata))
147}
148
149fn flatten_yaml(value: &serde_yml::Value) -> HashMap<String, String> {
154 let mut map = HashMap::new();
155 flatten_yaml_recursive(value, String::new(), &mut map);
156 map
157}
158
159fn flatten_yaml_recursive(
162 value: &serde_yml::Value,
163 prefix: String,
164 map: &mut HashMap<String, String>,
165) {
166 match value {
167 serde_yml::Value::Mapping(m) => {
168 for (k, v) in m {
169 let new_prefix = if prefix.is_empty() {
170 k.as_str().unwrap_or_default().to_string()
171 } else {
172 format!(
173 "{}.{}",
174 prefix,
175 k.as_str().unwrap_or_default()
176 )
177 };
178 flatten_yaml_recursive(v, new_prefix, map);
179 }
180 }
181 serde_yml::Value::Sequence(seq) => {
182 let inline_list = seq
183 .iter()
184 .filter_map(|item| item.as_str().map(|s| s.to_string()))
185 .collect::<Vec<String>>()
186 .join(", ");
187 map.insert(prefix, format!("[{}]", inline_list));
188 }
189 _ => {
190 map.insert(
191 prefix,
192 value.as_str().unwrap_or_default().to_string(),
193 );
194 }
195 }
196}
197
198fn extract_toml_metadata(content: &str) -> Option<Metadata> {
208 let re = Regex::new(r"(?s)^\s*\+\+\+\s*(.*?)\s*\+\+\+").ok()?;
209 let captures = re.captures(content)?;
210 let toml_str = captures.get(1)?.as_str().trim();
211
212 let toml_value: TomlValue = toml::from_str(toml_str).ok()?;
213
214 let mut metadata = HashMap::new();
215 flatten_toml(&toml_value, &mut metadata, String::new());
216
217 Some(Metadata::new(metadata))
218}
219
220fn flatten_toml(
225 value: &TomlValue,
226 map: &mut HashMap<String, String>,
227 prefix: String,
228) {
229 match value {
230 TomlValue::Table(table) => {
231 for (k, v) in table {
232 let new_prefix = if prefix.is_empty() {
233 k.to_string()
234 } else {
235 format!("{}.{}", prefix, k)
236 };
237 flatten_toml(v, map, new_prefix);
238 }
239 }
240 TomlValue::Array(arr) => {
241 let inline_list = arr
242 .iter()
243 .map(|v| {
244 match v {
246 TomlValue::String(s) => s.clone(),
247 _ => v.to_string(),
248 }
249 })
250 .collect::<Vec<String>>()
251 .join(", ");
252 map.insert(prefix, format!("[{}]", inline_list));
253 }
254 TomlValue::String(s) => {
255 map.insert(prefix, s.clone());
256 }
257 TomlValue::Datetime(dt) => {
258 map.insert(prefix, dt.to_string());
259 }
260 _ => {
261 map.insert(prefix, value.to_string());
262 }
263 }
264}
265
266fn extract_json_metadata(content: &str) -> Option<Metadata> {
276 let re = Regex::new(r"(?s)^\s*\{\s*(.*?)\s*\}").ok()?;
277 let captures = re.captures(content)?;
278 let json_str = format!("{{{}}}", captures.get(1)?.as_str().trim());
279
280 let json_value: JsonValue = serde_json::from_str(&json_str).ok()?;
281 let json_object = json_value.as_object()?;
282
283 let metadata: HashMap<String, String> = json_object
284 .iter()
285 .filter_map(|(k, v)| {
286 v.as_str().map(|s| (k.clone(), s.to_string()))
287 })
288 .collect();
289
290 Some(Metadata::new(metadata))
291}
292
293pub fn process_metadata(
309 metadata: &Metadata,
310) -> Result<Metadata, MetadataError> {
311 let mut processed = metadata.clone();
312
313 if let Some(date) = processed.get("date").cloned() {
315 let standardized_date = standardize_date(&date)?;
316 processed.insert("date".to_string(), standardized_date);
317 }
318
319 ensure_required_fields(&processed)?;
321
322 generate_derived_fields(&mut processed);
324
325 Ok(processed)
326}
327
328fn standardize_date(date: &str) -> Result<String, MetadataError> {
344 if date.trim().is_empty() {
346 return Err(MetadataError::DateParseError(
347 "Date string is empty.".to_string(),
348 ));
349 }
350
351 if date.len() < 8 {
352 return Err(MetadataError::DateParseError(
353 "Date string is too short.".to_string(),
354 ));
355 }
356
357 let date = if date.contains('/') && date.len() == 10 {
359 let parts: Vec<&str> = date.split('/').collect();
360 if parts.len() == 3
361 && parts[0].len() == 2
362 && parts[1].len() == 2
363 && parts[2].len() == 4
364 {
365 format!("{}-{}-{}", parts[2], parts[1], parts[0]) } else {
367 return Err(MetadataError::DateParseError(
368 "Invalid DD/MM/YYYY date format.".to_string(),
369 ));
370 }
371 } else {
372 date.to_string()
373 };
374
375 let parsed_date = DateTime::parse(&date)
377 .or_else(|_| {
378 DateTime::parse_custom_format(&date, "[year]-[month]-[day]")
379 })
380 .or_else(|_| {
381 DateTime::parse_custom_format(&date, "[month]/[day]/[year]")
382 })
383 .map_err(|e| {
384 MetadataError::DateParseError(format!(
385 "Failed to parse date: {}",
386 e
387 ))
388 })?;
389
390 Ok(format!(
392 "{:04}-{:02}-{:02}",
393 parsed_date.year(),
394 parsed_date.month() as u8,
395 parsed_date.day()
396 ))
397}
398
399fn ensure_required_fields(
413 metadata: &Metadata,
414) -> Result<(), MetadataError> {
415 let required_fields = ["title", "date"];
416
417 for &field in &required_fields {
418 if !metadata.contains_key(field) {
419 return Err(MetadataError::MissingFieldError(
420 field.to_string(),
421 ));
422 }
423 }
424
425 Ok(())
426}
427
428fn generate_derived_fields(metadata: &mut Metadata) {
436 if !metadata.contains_key("slug") {
437 if let Some(title) = metadata.get("title") {
438 let slug = generate_slug(title);
439 metadata.insert("slug".to_string(), slug);
440 }
441 }
442}
443
444fn generate_slug(title: &str) -> String {
454 title.to_lowercase().replace(' ', "-")
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460 use dtt::dtt_parse;
461
462 #[test]
463 fn test_standardize_date() {
464 let test_cases = vec![
465 ("2023-05-20T15:30:00Z", "2023-05-20"),
466 ("2023-05-20", "2023-05-20"),
467 ("20/05/2023", "2023-05-20"), ];
469
470 for (input, expected) in test_cases {
471 let result = standardize_date(input);
472 assert!(result.is_ok(), "Failed for input: {}", input);
473 assert_eq!(result.unwrap(), expected);
474 }
475 }
476
477 #[test]
478 fn test_standardize_date_errors() {
479 assert!(standardize_date("").is_err());
480 assert!(standardize_date("invalid").is_err());
481 assert!(standardize_date("20/05/23").is_err()); }
483
484 #[test]
485 fn test_date_format() {
486 let dt = dtt_parse!("2023-01-01T12:00:00+00:00").unwrap();
487 let formatted = format!(
488 "{:04}-{:02}-{:02}",
489 dt.year(),
490 dt.month() as u8,
491 dt.day()
492 );
493 assert_eq!(formatted, "2023-01-01");
494 }
495
496 #[test]
497 fn test_generate_slug() {
498 assert_eq!(generate_slug("Hello World"), "hello-world");
499 assert_eq!(generate_slug("Test 123"), "test-123");
500 assert_eq!(generate_slug(" Spaces "), "--spaces--");
501 }
502
503 #[test]
504 fn test_process_metadata() {
505 let mut metadata = Metadata::new(HashMap::new());
506 metadata.insert("title".to_string(), "Test Title".to_string());
507 metadata.insert(
508 "date".to_string(),
509 "2023-05-20T15:30:00Z".to_string(),
510 );
511
512 let processed = process_metadata(&metadata).unwrap();
513 assert_eq!(processed.get("title").unwrap(), "Test Title");
514 assert_eq!(processed.get("date").unwrap(), "2023-05-20");
515 assert_eq!(processed.get("slug").unwrap(), "test-title");
516 }
517
518 #[test]
519 fn test_extract_metadata() {
520 let yaml_content = r#"---
521title: YAML Test
522date: 2023-05-20
523---
524Content here"#;
525
526 let toml_content = r#"+++
527title = "TOML Test"
528date = "2023-05-20"
529+++
530Content here"#;
531
532 let json_content = r#"{
533"title": "JSON Test",
534"date": "2023-05-20"
535}
536Content here"#;
537
538 let yaml_metadata = extract_metadata(yaml_content).unwrap();
539 assert_eq!(yaml_metadata.get("title").unwrap(), "YAML Test");
540
541 let toml_metadata = extract_metadata(toml_content).unwrap();
542 assert_eq!(toml_metadata.get("title").unwrap(), "TOML Test");
543
544 let json_metadata = extract_metadata(json_content).unwrap();
545 assert_eq!(json_metadata.get("title").unwrap(), "JSON Test");
546 }
547
548 #[test]
549 fn test_extract_metadata_failure() {
550 let invalid_content = "This content has no metadata";
551 assert!(extract_metadata(invalid_content).is_err());
552 }
553
554 #[test]
555 fn test_ensure_required_fields() {
556 let mut metadata = Metadata::new(HashMap::new());
557 metadata.insert("title".to_string(), "Test".to_string());
558 metadata.insert("date".to_string(), "2023-05-20".to_string());
559
560 assert!(ensure_required_fields(&metadata).is_ok());
561
562 let mut incomplete_metadata = Metadata::new(HashMap::new());
563 incomplete_metadata
564 .insert("title".to_string(), "Test".to_string());
565
566 assert!(ensure_required_fields(&incomplete_metadata).is_err());
567 }
568
569 #[test]
570 fn test_generate_derived_fields() {
571 let mut metadata = Metadata::new(HashMap::new());
572 metadata.insert("title".to_string(), "Test Title".to_string());
573
574 generate_derived_fields(&mut metadata);
575
576 assert_eq!(metadata.get("slug").unwrap(), "test-title");
577 }
578
579 #[test]
580 fn test_metadata_methods() {
581 let mut metadata = Metadata::new(HashMap::new());
582 metadata.insert("key".to_string(), "value".to_string());
583
584 assert_eq!(metadata.get("key"), Some(&"value".to_string()));
585 assert!(metadata.contains_key("key"));
586 assert!(!metadata.contains_key("nonexistent"));
587
588 let old_value =
589 metadata.insert("key".to_string(), "new_value".to_string());
590 assert_eq!(old_value, Some("value".to_string()));
591 assert_eq!(metadata.get("key"), Some(&"new_value".to_string()));
592
593 let inner = metadata.into_inner();
594 assert_eq!(inner.get("key"), Some(&"new_value".to_string()));
595 }
596
597 #[test]
598 fn test_process_metadata_with_invalid_date() {
599 let mut metadata = Metadata::new(HashMap::new());
600 metadata.insert("title".to_string(), "Test Title".to_string());
601 metadata.insert("date".to_string(), "invalid_date".to_string());
602
603 assert!(process_metadata(&metadata).is_err());
604 }
605
606 #[test]
607 fn test_extract_yaml_metadata_with_complex_structure() {
608 let yaml_content = r#"---
609title: Complex YAML Test
610date: 2023-05-20
611author:
612 name: John Doe
613 email: john@example.com
614tags:
615 - rust
616 - metadata
617 - testing
618---
619Content here"#;
620
621 let metadata = extract_metadata(yaml_content).unwrap();
622 assert_eq!(metadata.get("title").unwrap(), "Complex YAML Test");
623 assert_eq!(metadata.get("date").unwrap(), "2023-05-20");
624 assert_eq!(metadata.get("author.name").unwrap(), "John Doe");
625 assert_eq!(
626 metadata.get("author.email").unwrap(),
627 "john@example.com"
628 );
629 assert_eq!(
630 metadata.get("tags").unwrap(),
631 "[rust, metadata, testing]"
632 );
633 }
634
635 #[test]
636 fn test_extract_toml_metadata_with_complex_structure() {
637 let toml_content = r#"+++
638title = "Complex TOML Test"
639date = 2023-05-20
640
641[author]
642name = "John Doe"
643email = "john@example.com"
644
645tags = ["rust", "metadata", "testing"]
646+++
647Content here"#;
648
649 let metadata = extract_metadata(toml_content).unwrap();
650 assert_eq!(
651 metadata.get("title").expect("Missing 'title' key"),
652 "Complex TOML Test"
653 );
654 assert_eq!(
655 metadata.get("date").expect("Missing 'date' key"),
656 "2023-05-20"
657 );
658 assert_eq!(
659 metadata
660 .get("author.name")
661 .expect("Missing 'author.name' key"),
662 "John Doe"
663 );
664 assert_eq!(
665 metadata
666 .get("author.email")
667 .expect("Missing 'author.email' key"),
668 "john@example.com"
669 );
670 assert_eq!(
671 metadata
672 .get("author.tags")
673 .expect("Missing 'author.tags' key"),
674 "[rust, metadata, testing]"
675 );
676 }
677
678 #[test]
679 fn test_generate_slug_with_special_characters() {
680 assert_eq!(
681 generate_slug("Hello, World! 123"),
682 "hello,-world!-123"
683 );
684 assert_eq!(generate_slug("Test: Ästhetik"), "test:-ästhetik");
685 assert_eq!(
686 generate_slug(" Multiple Spaces "),
687 "--multiple---spaces--"
688 );
689 }
690}