1use crate::error::MetadataError;
7use dtt::datetime::DateTime;
8use regex::Regex;
9use serde_json::Value as JsonValue;
10use std::collections::HashMap;
11use toml::Value as TomlValue;
12
13#[derive(Debug, Default, Clone)]
15pub struct Metadata {
16 inner: HashMap<String, String>,
17}
18
19impl Metadata {
20 pub fn new(data: HashMap<String, String>) -> Self {
30 Metadata { inner: data }
31 }
32
33 pub fn get(&self, key: &str) -> Option<&String> {
43 self.inner.get(key)
44 }
45
46 pub fn insert(
57 &mut self,
58 key: String,
59 value: String,
60 ) -> Option<String> {
61 self.inner.insert(key, value)
62 }
63
64 pub fn contains_key(&self, key: &str) -> bool {
74 self.inner.contains_key(key)
75 }
76
77 pub fn into_inner(self) -> HashMap<String, String> {
83 self.inner
84 }
85}
86
87pub fn extract_metadata(
103 content: &str,
104) -> Result<Metadata, MetadataError> {
105 extract_yaml_metadata(content)
106 .or_else(|| extract_toml_metadata(content))
107 .or_else(|| extract_json_metadata(content))
108 .ok_or_else(|| MetadataError::ExtractionError {
109 message: "No valid front matter found.".to_string(),
110 })
111}
112
113fn extract_yaml_metadata(content: &str) -> Option<Metadata> {
123 let re = Regex::new(r"(?s)^\s*---\s*\n(.*?)\n\s*---\s*").ok()?;
124 let captures = re.captures(content)?;
125
126 let yaml_str = captures.get(1)?.as_str().trim();
127
128 let yaml_value: serde_yml::Value =
129 serde_yml::from_str(yaml_str).ok()?;
130
131 let metadata: HashMap<String, String> = flatten_yaml(&yaml_value);
132
133 Some(Metadata::new(metadata))
134}
135
136fn flatten_yaml(value: &serde_yml::Value) -> HashMap<String, String> {
137 let mut map = HashMap::new();
138 flatten_yaml_recursive(value, String::new(), &mut map);
139 map
140}
141
142fn flatten_yaml_recursive(
143 value: &serde_yml::Value,
144 prefix: String,
145 map: &mut HashMap<String, String>,
146) {
147 match value {
148 serde_yml::Value::Mapping(m) => {
149 for (k, v) in m {
150 let new_prefix = if prefix.is_empty() {
151 k.as_str().unwrap_or_default().to_string()
152 } else {
153 format!(
154 "{}.{}",
155 prefix,
156 k.as_str().unwrap_or_default()
157 )
158 };
159 flatten_yaml_recursive(v, new_prefix, map);
160 }
161 }
162 serde_yml::Value::Sequence(seq) => {
163 let inline_list = seq
164 .iter()
165 .filter_map(|item| item.as_str().map(|s| s.to_string()))
166 .collect::<Vec<String>>()
167 .join(", ");
168 map.insert(prefix, format!("[{}]", inline_list));
169 }
170 _ => {
171 map.insert(
172 prefix,
173 value.as_str().unwrap_or_default().to_string(),
174 );
175 }
176 }
177}
178
179fn extract_toml_metadata(content: &str) -> Option<Metadata> {
189 let re = Regex::new(r"(?s)^\s*\+\+\+\s*(.*?)\s*\+\+\+").ok()?;
190 let captures = re.captures(content)?;
191 let toml_str = captures.get(1)?.as_str().trim();
192
193 let toml_value: TomlValue = toml::from_str(toml_str).ok()?;
194
195 let mut metadata = HashMap::new();
196 flatten_toml(&toml_value, &mut metadata, String::new());
197
198 Some(Metadata::new(metadata))
199}
200
201fn flatten_toml(
202 value: &TomlValue,
203 map: &mut HashMap<String, String>,
204 prefix: String,
205) {
206 match value {
207 TomlValue::Table(table) => {
208 for (k, v) in table {
209 let new_prefix = if prefix.is_empty() {
210 k.to_string()
211 } else {
212 format!("{}.{}", prefix, k)
213 };
214 flatten_toml(v, map, new_prefix);
215 }
216 }
217 TomlValue::Array(arr) => {
218 let inline_list = arr
219 .iter()
220 .map(|v| {
221 match v {
223 TomlValue::String(s) => s.clone(),
224 _ => v.to_string(),
225 }
226 })
227 .collect::<Vec<String>>()
228 .join(", ");
229 map.insert(prefix, format!("[{}]", inline_list));
230 }
231 TomlValue::String(s) => {
232 map.insert(prefix, s.clone());
233 }
234 TomlValue::Datetime(dt) => {
235 map.insert(prefix, dt.to_string());
236 }
237 _ => {
238 map.insert(prefix, value.to_string());
239 }
240 }
241}
242
243fn extract_json_metadata(content: &str) -> Option<Metadata> {
253 let re = Regex::new(r"(?s)^\s*\{\s*(.*?)\s*\}").ok()?;
254 let captures = re.captures(content)?;
255 let json_str = format!("{{{}}}", captures.get(1)?.as_str().trim());
256
257 let json_value: JsonValue = serde_json::from_str(&json_str).ok()?;
258 let json_object = json_value.as_object()?;
259
260 let metadata: HashMap<String, String> = json_object
261 .iter()
262 .filter_map(|(k, v)| {
263 v.as_str().map(|s| (k.clone(), s.to_string()))
264 })
265 .collect();
266
267 Some(Metadata::new(metadata))
268}
269
270pub fn process_metadata(
286 metadata: &Metadata,
287) -> Result<Metadata, MetadataError> {
288 let mut processed = metadata.clone();
289
290 if let Some(date) = processed.get("date").cloned() {
292 let standardized_date = standardize_date(&date)?;
293 processed.insert("date".to_string(), standardized_date);
294 }
295
296 ensure_required_fields(&processed)?;
298
299 generate_derived_fields(&mut processed);
301
302 Ok(processed)
303}
304
305fn standardize_date(date: &str) -> Result<String, MetadataError> {
321 if date.trim().is_empty() {
323 return Err(MetadataError::DateParseError(
324 "Date string is empty.".to_string(),
325 ));
326 }
327
328 if date.len() < 8 {
329 return Err(MetadataError::DateParseError(
330 "Date string is too short.".to_string(),
331 ));
332 }
333
334 let date = if date.contains('/') && date.len() == 10 {
336 let parts: Vec<&str> = date.split('/').collect();
337 if parts.len() == 3
338 && parts[0].len() == 2
339 && parts[1].len() == 2
340 && parts[2].len() == 4
341 {
342 format!("{}-{}-{}", parts[2], parts[1], parts[0]) } else {
344 return Err(MetadataError::DateParseError(
345 "Invalid DD/MM/YYYY date format.".to_string(),
346 ));
347 }
348 } else {
349 date.to_string()
350 };
351
352 let parsed_date = DateTime::parse(&date)
354 .or_else(|_| {
355 DateTime::parse_custom_format(&date, "[year]-[month]-[day]")
356 })
357 .or_else(|_| {
358 DateTime::parse_custom_format(&date, "[month]/[day]/[year]")
359 })
360 .map_err(|e| {
361 MetadataError::DateParseError(format!(
362 "Failed to parse date: {}",
363 e
364 ))
365 })?;
366
367 Ok(format!(
369 "{:04}-{:02}-{:02}",
370 parsed_date.year(),
371 parsed_date.month() as u8,
372 parsed_date.day()
373 ))
374}
375
376fn ensure_required_fields(
390 metadata: &Metadata,
391) -> Result<(), MetadataError> {
392 let required_fields = ["title", "date"];
393
394 for &field in &required_fields {
395 if !metadata.contains_key(field) {
396 return Err(MetadataError::MissingFieldError(
397 field.to_string(),
398 ));
399 }
400 }
401
402 Ok(())
403}
404
405fn generate_derived_fields(metadata: &mut Metadata) {
413 if !metadata.contains_key("slug") {
414 if let Some(title) = metadata.get("title") {
415 let slug = generate_slug(title);
416 metadata.insert("slug".to_string(), slug);
417 }
418 }
419}
420
421fn generate_slug(title: &str) -> String {
431 title.to_lowercase().replace(' ', "-")
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437 use dtt::dtt_parse;
438
439 #[test]
440 fn test_standardize_date() {
441 let test_cases = vec![
442 ("2023-05-20T15:30:00Z", "2023-05-20"),
443 ("2023-05-20", "2023-05-20"),
444 ("20/05/2023", "2023-05-20"), ];
446
447 for (input, expected) in test_cases {
448 let result = standardize_date(input);
449 assert!(result.is_ok(), "Failed for input: {}", input);
450 assert_eq!(result.unwrap(), expected);
451 }
452 }
453
454 #[test]
455 fn test_standardize_date_errors() {
456 assert!(standardize_date("").is_err());
457 assert!(standardize_date("invalid").is_err());
458 assert!(standardize_date("20/05/23").is_err()); }
460
461 #[test]
462 fn test_date_format() {
463 let dt = dtt_parse!("2023-01-01T12:00:00+00:00").unwrap();
464 let formatted = format!(
465 "{:04}-{:02}-{:02}",
466 dt.year(),
467 dt.month() as u8,
468 dt.day()
469 );
470 assert_eq!(formatted, "2023-01-01");
471 }
472
473 #[test]
474 fn test_generate_slug() {
475 assert_eq!(generate_slug("Hello World"), "hello-world");
476 assert_eq!(generate_slug("Test 123"), "test-123");
477 assert_eq!(generate_slug(" Spaces "), "--spaces--");
478 }
479
480 #[test]
481 fn test_process_metadata() {
482 let mut metadata = Metadata::new(HashMap::new());
483 metadata.insert("title".to_string(), "Test Title".to_string());
484 metadata.insert(
485 "date".to_string(),
486 "2023-05-20T15:30:00Z".to_string(),
487 );
488
489 let processed = process_metadata(&metadata).unwrap();
490 assert_eq!(processed.get("title").unwrap(), "Test Title");
491 assert_eq!(processed.get("date").unwrap(), "2023-05-20");
492 assert_eq!(processed.get("slug").unwrap(), "test-title");
493 }
494
495 #[test]
496 fn test_extract_metadata() {
497 let yaml_content = r#"---
498title: YAML Test
499date: 2023-05-20
500---
501Content here"#;
502
503 let toml_content = r#"+++
504title = "TOML Test"
505date = "2023-05-20"
506+++
507Content here"#;
508
509 let json_content = r#"{
510"title": "JSON Test",
511"date": "2023-05-20"
512}
513Content here"#;
514
515 let yaml_metadata = extract_metadata(yaml_content).unwrap();
516 assert_eq!(yaml_metadata.get("title").unwrap(), "YAML Test");
517
518 let toml_metadata = extract_metadata(toml_content).unwrap();
519 assert_eq!(toml_metadata.get("title").unwrap(), "TOML Test");
520
521 let json_metadata = extract_metadata(json_content).unwrap();
522 assert_eq!(json_metadata.get("title").unwrap(), "JSON Test");
523 }
524
525 #[test]
526 fn test_extract_metadata_failure() {
527 let invalid_content = "This content has no metadata";
528 assert!(extract_metadata(invalid_content).is_err());
529 }
530
531 #[test]
532 fn test_ensure_required_fields() {
533 let mut metadata = Metadata::new(HashMap::new());
534 metadata.insert("title".to_string(), "Test".to_string());
535 metadata.insert("date".to_string(), "2023-05-20".to_string());
536
537 assert!(ensure_required_fields(&metadata).is_ok());
538
539 let mut incomplete_metadata = Metadata::new(HashMap::new());
540 incomplete_metadata
541 .insert("title".to_string(), "Test".to_string());
542
543 assert!(ensure_required_fields(&incomplete_metadata).is_err());
544 }
545
546 #[test]
547 fn test_generate_derived_fields() {
548 let mut metadata = Metadata::new(HashMap::new());
549 metadata.insert("title".to_string(), "Test Title".to_string());
550
551 generate_derived_fields(&mut metadata);
552
553 assert_eq!(metadata.get("slug").unwrap(), "test-title");
554 }
555
556 #[test]
557 fn test_metadata_methods() {
558 let mut metadata = Metadata::new(HashMap::new());
559 metadata.insert("key".to_string(), "value".to_string());
560
561 assert_eq!(metadata.get("key"), Some(&"value".to_string()));
562 assert!(metadata.contains_key("key"));
563 assert!(!metadata.contains_key("nonexistent"));
564
565 let old_value =
566 metadata.insert("key".to_string(), "new_value".to_string());
567 assert_eq!(old_value, Some("value".to_string()));
568 assert_eq!(metadata.get("key"), Some(&"new_value".to_string()));
569
570 let inner = metadata.into_inner();
571 assert_eq!(inner.get("key"), Some(&"new_value".to_string()));
572 }
573
574 #[test]
575 fn test_process_metadata_with_invalid_date() {
576 let mut metadata = Metadata::new(HashMap::new());
577 metadata.insert("title".to_string(), "Test Title".to_string());
578 metadata.insert("date".to_string(), "invalid_date".to_string());
579
580 assert!(process_metadata(&metadata).is_err());
581 }
582
583 #[test]
584 fn test_extract_yaml_metadata_with_complex_structure() {
585 let yaml_content = r#"---
586title: Complex YAML Test
587date: 2023-05-20
588author:
589 name: John Doe
590 email: john@example.com
591tags:
592 - rust
593 - metadata
594 - testing
595---
596Content here"#;
597
598 let metadata = extract_metadata(yaml_content).unwrap();
599 assert_eq!(metadata.get("title").unwrap(), "Complex YAML Test");
600 assert_eq!(metadata.get("date").unwrap(), "2023-05-20");
601 assert_eq!(metadata.get("author.name").unwrap(), "John Doe");
602 assert_eq!(
603 metadata.get("author.email").unwrap(),
604 "john@example.com"
605 );
606 assert_eq!(
607 metadata.get("tags").unwrap(),
608 "[rust, metadata, testing]"
609 );
610 }
611
612 #[test]
613 fn test_extract_toml_metadata_with_complex_structure() {
614 let toml_content = r#"+++
615title = "Complex TOML Test"
616date = 2023-05-20
617
618[author]
619name = "John Doe"
620email = "john@example.com"
621
622tags = ["rust", "metadata", "testing"]
623+++
624Content here"#;
625
626 let metadata = extract_metadata(toml_content).unwrap();
627 assert_eq!(
628 metadata.get("title").expect("Missing 'title' key"),
629 "Complex TOML Test"
630 );
631 assert_eq!(
632 metadata.get("date").expect("Missing 'date' key"),
633 "2023-05-20"
634 );
635 assert_eq!(
636 metadata
637 .get("author.name")
638 .expect("Missing 'author.name' key"),
639 "John Doe"
640 );
641 assert_eq!(
642 metadata
643 .get("author.email")
644 .expect("Missing 'author.email' key"),
645 "john@example.com"
646 );
647 assert_eq!(
648 metadata
649 .get("author.tags")
650 .expect("Missing 'author.tags' key"),
651 "[rust, metadata, testing]"
652 );
653 }
654
655 #[test]
656 fn test_generate_slug_with_special_characters() {
657 assert_eq!(
658 generate_slug("Hello, World! 123"),
659 "hello,-world!-123"
660 );
661 assert_eq!(generate_slug("Test: Ästhetik"), "test:-ästhetik");
662 assert_eq!(
663 generate_slug(" Multiple Spaces "),
664 "--multiple---spaces--"
665 );
666 }
667}