helix/dna/map/
concat.rs

1#![allow(clippy::pedantic)]
2#![warn(clippy::all)]
3use std::collections::{HashSet, HashMap};
4use std::path::Path;
5use anyhow::{Context, Result};
6use log;
7use tokio::fs;
8use serde::{Deserialize, Serialize};
9use std::fmt;
10use std::sync::atomic::{AtomicUsize, Ordering};
11use std::sync::Arc;
12use md5;
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14pub enum FileExtensionPreset {
15    CaptionWdTags,
16    FlorenceWdTags,
17}
18impl fmt::Display for FileExtensionPreset {
19    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20        match self {
21            Self::CaptionWdTags => write!(f, "caption+wd+tags"),
22            Self::FlorenceWdTags => write!(f, "florence+wd+tags"),
23        }
24    }
25}
26/// Configuration for file concatenation
27///
28/// This configuration controls how files are concatenated, with the following behavior:
29/// - Base extensions define which files to look for (e.g., jpg, png)
30/// - Extensions to concatenate define which related files to process (e.g., caption, wd, tags)
31/// - Caption files (with extension "caption" or "florence") are treated specially:
32///   - Their content is appended after the concatenated tags
33///   - They aren't included in tag deduplication
34#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
35pub struct ConcatConfig {
36    pub base_extensions: Vec<String>,
37    pub extensions_to_concat: Vec<String>,
38    pub output_extension: String,
39    pub remove_duplicates: bool,
40    pub tag_separator: String,
41    pub deduplicate_files: bool,
42}
43impl ConcatConfig {
44    #[must_use]
45    pub fn new(
46        base_extensions: Vec<String>,
47        extensions_to_concat: Vec<String>,
48        output_extension: String,
49        remove_duplicates: bool,
50        tag_separator: String,
51    ) -> Self {
52        Self {
53            base_extensions,
54            extensions_to_concat,
55            output_extension,
56            remove_duplicates,
57            tag_separator,
58            deduplicate_files: false,
59        }
60    }
61    #[must_use]
62    pub fn with_deduplication(mut self, deduplicate: bool) -> Self {
63        self.deduplicate_files = deduplicate;
64        self
65    }
66    #[must_use]
67    pub fn from_preset(preset: FileExtensionPreset) -> Self {
68        match preset {
69            FileExtensionPreset::CaptionWdTags => {
70                Self {
71                    base_extensions: vec![
72                        "png".into(), "jpg".into(), "jpeg".into(), "webp".into(), "gif"
73                        .into(), "tiff".into(), "bmp".into(), "jxl".into(), "avif".into()
74                    ],
75                    extensions_to_concat: vec![
76                        "caption".into(), "wd".into(), "tags".into()
77                    ],
78                    output_extension: "txt".into(),
79                    remove_duplicates: true,
80                    tag_separator: ", ".into(),
81                    deduplicate_files: false,
82                }
83            }
84            FileExtensionPreset::FlorenceWdTags => {
85                Self {
86                    base_extensions: vec![
87                        "png".into(), "jpg".into(), "jpeg".into(), "webp".into(), "gif"
88                        .into(), "tiff".into(), "bmp".into(), "jxl".into(), "avif".into()
89                    ],
90                    extensions_to_concat: vec![
91                        "florence".into(), "wd".into(), "tags".into()
92                    ],
93                    output_extension: "txt".into(),
94                    remove_duplicates: true,
95                    tag_separator: ", ".into(),
96                    deduplicate_files: false,
97                }
98            }
99        }
100    }
101}
102async fn read_file_content(path: &Path) -> Result<String> {
103    let content = fs::read_to_string(path)
104        .await
105        .with_context(|| format!("Failed to read file: {}", path.display()))?;
106    Ok(content.trim().to_string())
107}
108fn concat_tags(
109    contents: &[String],
110    config: &ConcatConfig,
111    file_paths: &[std::path::PathBuf],
112) -> String {
113    if contents.is_empty() {
114        return String::new();
115    }
116    let caption_ext = if config.extensions_to_concat.contains(&"caption".to_string()) {
117        "caption"
118    } else if config.extensions_to_concat.contains(&"florence".to_string()) {
119        "florence"
120    } else {
121        config.extensions_to_concat.last().unwrap()
122    };
123    let mut caption_index = None;
124    for (i, path) in file_paths.iter().enumerate() {
125        if let Some(ext) = path.extension() {
126            if ext == caption_ext {
127                caption_index = Some(i);
128                break;
129            }
130        }
131    }
132    let caption_index = caption_index.unwrap_or(contents.len() - 1);
133    let caption_content = &contents[caption_index];
134    let mut unique_tags = HashSet::new();
135    let mut all_tags = Vec::new();
136    for (i, content) in contents.iter().enumerate() {
137        if i == caption_index {
138            continue;
139        }
140        let tags = content.split(',').map(str::trim).filter(|&tag| !tag.is_empty());
141        for tag in tags {
142            if config.remove_duplicates {
143                unique_tags.insert(tag.to_string());
144            } else {
145                all_tags.push(tag.to_string());
146            }
147        }
148    }
149    let tags_portion = if config.remove_duplicates {
150        let mut sorted_tags: Vec<_> = unique_tags.into_iter().collect();
151        sorted_tags.sort();
152        sorted_tags.join(&config.tag_separator)
153    } else {
154        all_tags.join(&config.tag_separator)
155    };
156    if tags_portion.is_empty() {
157        caption_content.clone()
158    } else if caption_content.is_empty() {
159        tags_portion
160    } else {
161        format!("{}{}{}", tags_portion, config.tag_separator, caption_content)
162    }
163}
164pub async fn process_image_file(
165    image_path: &Path,
166    config: &ConcatConfig,
167    dry_run: bool,
168) -> Result<bool> {
169    let stem = image_path
170        .file_stem()
171        .with_context(|| {
172            format!("Failed to get file stem from: {}", image_path.display())
173        })?
174        .to_string_lossy();
175    let parent = image_path
176        .parent()
177        .with_context(|| {
178            format!("Failed to get parent directory of: {}", image_path.display())
179        })?;
180    let mut missing_files = Vec::new();
181    let mut file_paths = Vec::new();
182    for ext in &config.extensions_to_concat {
183        let ext_file = parent.join(format!("{stem}.{ext}"));
184        if ext_file.exists() {
185            file_paths.push(ext_file);
186        } else {
187            missing_files.push(ext_file.to_string_lossy().to_string());
188        }
189    }
190    if !missing_files.is_empty() {
191        log::warn!(
192            "Skipping {}: Missing files: {}", image_path.display(), missing_files
193            .join(", ")
194        );
195        return Ok(false);
196    }
197    let mut contents = Vec::new();
198    for path in &file_paths {
199        let content = read_file_content(path).await?;
200        contents.push(content);
201    }
202    let concatenated = concat_tags(&contents, config, &file_paths);
203    let output_path = parent.join(format!("{}.{}", stem, config.output_extension));
204    if dry_run {
205        log::info!("Would write to {}: {}", output_path.display(), concatenated);
206    } else {
207        fs::write(&output_path, &concatenated)
208            .await
209            .with_context(|| format!("Failed to write to: {}", output_path.display()))?;
210        log::debug!("Wrote {}", output_path.display());
211    }
212    Ok(true)
213}
214async fn walk_directory<F, Fut>(directory: &Path, mut callback: F) -> Result<()>
215where
216    F: FnMut(&Path) -> Fut + Send,
217    Fut: std::future::Future<Output = Result<()>> + Send,
218{
219    let mut dirs_to_visit = vec![directory.to_path_buf()];
220    while let Some(current_dir) = dirs_to_visit.pop() {
221        let mut entries = fs::read_dir(&current_dir).await?;
222        while let Some(entry) = entries.next_entry().await? {
223            let path = entry.path();
224            if path.is_dir() {
225                dirs_to_visit.push(path);
226            } else {
227                callback(&path).await?;
228            }
229        }
230    }
231    Ok(())
232}
233pub async fn concat_files(
234    directory: &Path,
235    config: &ConcatConfig,
236    dry_run: bool,
237) -> Result<usize> {
238    let directory = directory.to_path_buf();
239    let config_clone = config.clone();
240    log::info!("Searching for files in: {}", directory.display());
241    log::info!("Using extensions: {}", config.extensions_to_concat.join(", "));
242    log::info!("Output extension: {}", config.output_extension);
243    if config.deduplicate_files {
244        log::info!(
245            "File deduplication enabled - will check for identical file contents"
246        );
247    }
248    let processed_count = Arc::new(AtomicUsize::new(0));
249    let skipped_duplicates = Arc::new(AtomicUsize::new(0));
250    let mut base_extensions = HashSet::new();
251    for ext in &config.base_extensions {
252        base_extensions.insert(ext.clone());
253        log::debug!("Added base extension: {}", ext);
254    }
255    let content_hashes: Arc<tokio::sync::Mutex<HashMap<String, String>>> = Arc::new(
256        tokio::sync::Mutex::new(HashMap::new()),
257    );
258    let processed_count_clone = processed_count.clone();
259    let skipped_duplicates_clone = skipped_duplicates.clone();
260    let content_hashes_clone = content_hashes.clone();
261    walk_directory(
262            &directory,
263            move |path| {
264                let path = path.to_path_buf();
265                let base_exts = base_extensions.clone();
266                let config = config_clone.clone();
267                let dry_run = dry_run;
268                let count = processed_count_clone.clone();
269                let skipped = skipped_duplicates_clone.clone();
270                let hashes = content_hashes_clone.clone();
271                async move {
272                    if let Some(ext) = path.extension() {
273                        let ext_str = ext.to_string_lossy().to_lowercase();
274                        log::debug!(
275                            "Checking file: {} with extension: {}", path.display(),
276                            ext_str
277                        );
278                        log::debug!("Base extensions: {:?}", base_exts);
279                        if base_exts.contains(&ext_str) {
280                            log::debug!(
281                                "Found base extension match: {}", path.display()
282                            );
283                            if config.deduplicate_files {
284                                log::debug!(
285                                    "Checking for duplicate content: {}", path.display()
286                                );
287                                let is_duplicate = check_duplicate_content(
288                                        &path,
289                                        &config,
290                                        hashes.clone(),
291                                    )
292                                    .await;
293                                if is_duplicate {
294                                    log::debug!("Skipping duplicate file: {}", path.display());
295                                    skipped.fetch_add(1, Ordering::Relaxed);
296                                    return Ok(());
297                                }
298                                log::debug!(
299                                    "File is not a duplicate, proceeding: {}", path.display()
300                                );
301                            }
302                            log::debug!("Processing file: {}", path.display());
303                            match process_image_file(&path, &config, dry_run).await {
304                                Ok(true) => {
305                                    log::debug!("Successfully processed: {}", path.display());
306                                    count.fetch_add(1, Ordering::Relaxed);
307                                }
308                                Ok(false) => {
309                                    log::debug!(
310                                        "Skipped due to missing files: {}", path.display()
311                                    );
312                                }
313                                Err(err) => {
314                                    log::warn!("Error processing {}: {}", path.display(), err)
315                                }
316                            }
317                        } else {
318                            log::debug!(
319                                "Skipping non-base extension: {}", path.display()
320                            );
321                        }
322                    }
323                    Ok(())
324                }
325            },
326        )
327        .await?;
328    let final_count = processed_count.load(Ordering::Relaxed);
329    let final_skipped = skipped_duplicates.load(Ordering::Relaxed);
330    if dry_run {
331        log::info!("Dry run completed. Would have processed {} files.", final_count);
332    } else {
333        log::info!("Concatenation completed. Processed {} files.", final_count);
334    }
335    if config.deduplicate_files {
336        log::info!("Skipped {} duplicate files.", final_skipped);
337    }
338    Ok(final_count)
339}
340async fn check_duplicate_content(
341    path: &Path,
342    config: &ConcatConfig,
343    hashes: Arc<tokio::sync::Mutex<HashMap<String, String>>>,
344) -> bool {
345    let Some(stem) = path.file_stem() else {
346        log::debug!("Could not get file stem for: {}", path.display());
347        return false;
348    };
349    let stem = stem.to_string_lossy();
350    let Some(parent) = path.parent() else {
351        log::debug!("Could not get parent directory for: {}", path.display());
352        return false;
353    };
354    log::debug!(
355        "Checking duplicate content for file: {} with stem: {}", path.display(), stem
356    );
357    let mut file_paths = Vec::new();
358    for ext in &config.extensions_to_concat {
359        let ext_file = parent.join(format!("{stem}.{ext}"));
360        if !ext_file.exists() {
361            log::debug!("Missing required file: {}", ext_file.display());
362            return false;
363        }
364        log::debug!("Found required file: {}", ext_file.display());
365        file_paths.push(ext_file);
366    }
367    let mut combined_content = String::new();
368    for path in &file_paths {
369        match fs::read_to_string(path).await {
370            Ok(content) => {
371                log::debug!("Read content from: {}", path.display());
372                combined_content.push_str(&content);
373            }
374            Err(err) => {
375                log::debug!("Failed to read content from {}: {}", path.display(), err);
376                return false;
377            }
378        }
379    }
380    let content_hash = format!("{:x}", md5::compute(combined_content.as_bytes()));
381    log::debug!("Generated hash for {}: {}", path.display(), content_hash);
382    let mut hashes_map = hashes.lock().await;
383    if let Some(existing_file) = hashes_map.get(&content_hash) {
384        log::debug!(
385            "Found duplicate content: {} matches {}", path.display(), existing_file
386        );
387        true
388    } else {
389        log::debug!("No duplicate found for {}, storing hash", path.display());
390        hashes_map.insert(content_hash, path.to_string_lossy().to_string());
391        false
392    }
393}
394#[cfg(test)]
395mod tests {
396    use super::*;
397    use tempfile::TempDir;
398    use tokio::fs::File;
399    use tokio::io::AsyncWriteExt;
400    #[tokio::test]
401    async fn test_concat_tags_with_duplicates() -> Result<()> {
402        let config = ConcatConfig {
403            base_extensions: vec!["jpg".into()],
404            extensions_to_concat: vec!["wd".into(), "tags".into(), "caption".into()],
405            output_extension: "txt".into(),
406            remove_duplicates: true,
407            tag_separator: ", ".into(),
408            deduplicate_files: false,
409        };
410        let contents = vec![
411            "tag1, tag2, tag3".to_string(), "tag2, tag4, tag5".to_string(),
412            "a photo of a person".to_string(),
413        ];
414        let file_paths = vec![
415            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
416            std::path::PathBuf::from("test.caption"),
417        ];
418        let result = concat_tags(&contents, &config, &file_paths);
419        assert_eq!(result, "tag1, tag2, tag3, tag4, tag5, a photo of a person");
420        Ok(())
421    }
422    #[tokio::test]
423    async fn test_concat_tags_without_duplicates() -> Result<()> {
424        let config = ConcatConfig {
425            base_extensions: vec!["jpg".into()],
426            extensions_to_concat: vec!["wd".into(), "tags".into(), "caption".into()],
427            output_extension: "txt".into(),
428            remove_duplicates: false,
429            tag_separator: ", ".into(),
430            deduplicate_files: false,
431        };
432        let contents = vec![
433            "tag1, tag2, tag3".to_string(), "tag2, tag4, tag5".to_string(),
434            "a photo of a person".to_string(),
435        ];
436        let file_paths = vec![
437            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
438            std::path::PathBuf::from("test.caption"),
439        ];
440        let result = concat_tags(&contents, &config, &file_paths);
441        assert_eq!(result, "tag1, tag2, tag3, tag2, tag4, tag5, a photo of a person");
442        Ok(())
443    }
444    #[tokio::test]
445    async fn test_process_image_file() -> Result<()> {
446        let temp_dir = TempDir::new()?;
447        let temp_path = temp_dir.path();
448        let image_path = temp_path.join("test.jpg");
449        let caption_path = temp_path.join("test.caption");
450        let wd_path = temp_path.join("test.wd");
451        let tags_path = temp_path.join("test.tags");
452        File::create(&image_path).await?.sync_all().await?;
453        let mut caption_file = File::create(&caption_path).await?;
454        caption_file.write_all(b"caption1, caption2").await?;
455        caption_file.sync_all().await?;
456        let mut wd_file = File::create(&wd_path).await?;
457        wd_file.write_all(b"wd1, wd2").await?;
458        wd_file.sync_all().await?;
459        let mut tags_file = File::create(&tags_path).await?;
460        tags_file.write_all(b"tag1, tag2").await?;
461        tags_file.sync_all().await?;
462        let config = ConcatConfig {
463            base_extensions: vec!["jpg".into()],
464            extensions_to_concat: vec!["caption".into(), "wd".into(), "tags".into()],
465            output_extension: "txt".into(),
466            remove_duplicates: true,
467            tag_separator: ", ".into(),
468            deduplicate_files: false,
469        };
470        let processed_dry = process_image_file(&image_path, &config, true).await?;
471        assert!(processed_dry);
472        assert!(! temp_path.join("test.txt").exists());
473        let processed = process_image_file(&image_path, &config, false).await?;
474        assert!(processed);
475        let output_content = fs::read_to_string(temp_path.join("test.txt")).await?;
476        assert_eq!(output_content, "tag1, tag2, wd1, wd2, caption1, caption2");
477        Ok(())
478    }
479    #[tokio::test]
480    async fn test_file_deduplication() -> Result<()> {
481        let _ = env_logger::builder()
482            .filter_level(log::LevelFilter::Debug)
483            .is_test(true)
484            .try_init();
485        log::info!("Starting file deduplication test");
486        let temp_dir = tempfile::tempdir()?;
487        let temp_path = temp_dir.path();
488        let image1_path = temp_path.join("image1.jpg");
489        let image2_path = temp_path.join("image2.jpg");
490        let image3_path = temp_path.join("image3.jpg");
491        let caption1_path = temp_path.join("image1.caption");
492        let caption2_path = temp_path.join("image2.caption");
493        let caption3_path = temp_path.join("image3.caption");
494        let wd1_path = temp_path.join("image1.wd");
495        let wd2_path = temp_path.join("image2.wd");
496        let wd3_path = temp_path.join("image3.wd");
497        let tags1_path = temp_path.join("image1.tags");
498        let tags2_path = temp_path.join("image2.tags");
499        let tags3_path = temp_path.join("image3.tags");
500        log::info!("Creating test files in {}", temp_path.display());
501        let mut image1_file = File::create(&image1_path).await?;
502        image1_file.write_all(b"test image 1").await?;
503        image1_file.sync_all().await?;
504        let mut image2_file = File::create(&image2_path).await?;
505        image2_file.write_all(b"test image 2").await?;
506        image2_file.sync_all().await?;
507        let mut image3_file = File::create(&image3_path).await?;
508        image3_file.write_all(b"test image 3").await?;
509        image3_file.sync_all().await?;
510        let mut caption1_file = File::create(&caption1_path).await?;
511        caption1_file.write_all(b"a photo of a person").await?;
512        caption1_file.sync_all().await?;
513        let mut caption2_file = File::create(&caption2_path).await?;
514        caption2_file.write_all(b"a photo of a person").await?;
515        caption2_file.sync_all().await?;
516        let mut caption3_file = File::create(&caption3_path).await?;
517        caption3_file.write_all(b"person, portrait, indoor").await?;
518        caption3_file.sync_all().await?;
519        let mut wd1_file = File::create(&wd1_path).await?;
520        wd1_file.write_all(b"masterpiece, digital art").await?;
521        wd1_file.sync_all().await?;
522        let mut wd2_file = File::create(&wd2_path).await?;
523        wd2_file.write_all(b"masterpiece, digital art").await?;
524        wd2_file.sync_all().await?;
525        let mut wd3_file = File::create(&wd3_path).await?;
526        wd3_file.write_all(b"highly detailed, 4k").await?;
527        wd3_file.sync_all().await?;
528        let tags_content = "tag1, tag2, tag3";
529        let mut tags1_file = File::create(&tags1_path).await?;
530        tags1_file.write_all(tags_content.as_bytes()).await?;
531        tags1_file.sync_all().await?;
532        let mut tags2_file = File::create(&tags2_path).await?;
533        tags2_file.write_all(tags_content.as_bytes()).await?;
534        tags2_file.sync_all().await?;
535        let mut tags3_file = File::create(&tags3_path).await?;
536        tags3_file.write_all(b"tag4, tag5, tag6").await?;
537        tags3_file.sync_all().await?;
538        let config = ConcatConfig {
539            base_extensions: vec!["jpg".into()],
540            extensions_to_concat: vec!["caption".into(), "wd".into(), "tags".into()],
541            output_extension: "txt".into(),
542            remove_duplicates: true,
543            tag_separator: ", ".into(),
544            deduplicate_files: true,
545        };
546        log::info!("Test files created at:");
547        log::info!("Image 1: {}", image1_path.display());
548        log::info!("Caption 1: {}", caption1_path.display());
549        log::info!("WD 1: {}", wd1_path.display());
550        log::info!("Tags 1: {}", tags1_path.display());
551        let content_hashes: Arc<tokio::sync::Mutex<HashMap<String, String>>> = Arc::new(
552            tokio::sync::Mutex::new(HashMap::new()),
553        );
554        log::info!("Processing first image: {}", image1_path.display());
555        let is_duplicate1 = check_duplicate_content(
556                &image1_path,
557                &config,
558                content_hashes.clone(),
559            )
560            .await;
561        assert!(! is_duplicate1, "First image should not be detected as duplicate");
562        let processed1 = process_image_file(&image1_path, &config, false).await?;
563        assert!(processed1, "First image should be processed successfully");
564        log::info!("Processing second image: {}", image2_path.display());
565        let is_duplicate2 = check_duplicate_content(
566                &image2_path,
567                &config,
568                content_hashes.clone(),
569            )
570            .await;
571        assert!(is_duplicate2, "Second image should be detected as duplicate");
572        log::info!("Processing third image: {}", image3_path.display());
573        let is_duplicate3 = check_duplicate_content(
574                &image3_path,
575                &config,
576                content_hashes.clone(),
577            )
578            .await;
579        assert!(! is_duplicate3, "Third image should not be detected as duplicate");
580        let processed3 = process_image_file(&image3_path, &config, false).await?;
581        assert!(processed3, "Third image should be processed successfully");
582        assert!(temp_path.join("image1.txt").exists(), "image1.txt should exist");
583        assert!(
584            ! temp_path.join("image2.txt").exists(),
585            "image2.txt should not exist (duplicate)"
586        );
587        assert!(temp_path.join("image3.txt").exists(), "image3.txt should exist");
588        let output1_content = fs::read_to_string(temp_path.join("image1.txt")).await?;
589        let output3_content = fs::read_to_string(temp_path.join("image3.txt")).await?;
590        log::info!("Output 1 content: '{}'", output1_content);
591        log::info!("Output 3 content: '{}'", output3_content);
592        assert!(
593            output1_content.contains("tag1, tag2, tag3"),
594            "Output for image1 should contain deduplicated tags"
595        );
596        assert!(
597            output1_content.contains("digital art, masterpiece"),
598            "Output for image1 should contain wd content (in alphabetical order)"
599        );
600        assert!(
601            output1_content.contains("a photo of a person"),
602            "Output for image1 should contain caption content"
603        );
604        assert!(
605            output3_content.contains("tag4, tag5, tag6"),
606            "Output for image3 should contain its unique tags content"
607        );
608        assert!(
609            output3_content.contains("4k, highly detailed"),
610            "Output for image3 should contain its unique wd content (in alphabetical order)"
611        );
612        assert!(
613            output3_content.contains("person, portrait, indoor"),
614            "Output for image3 should contain its unique caption content"
615        );
616        Ok(())
617    }
618    #[tokio::test]
619    async fn test_concat_tags_caption_handling() -> Result<()> {
620        let config = ConcatConfig {
621            base_extensions: vec!["jpg".into()],
622            extensions_to_concat: vec!["wd".into(), "tags".into(), "caption".into()],
623            output_extension: "txt".into(),
624            remove_duplicates: true,
625            tag_separator: ", ".into(),
626            deduplicate_files: false,
627        };
628        let contents = vec![
629            "person, photo".to_string(), "person, indoor, white background".to_string(),
630            "a photo of a person".to_string(),
631        ];
632        let file_paths = vec![
633            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
634            std::path::PathBuf::from("test.caption"),
635        ];
636        let result = concat_tags(&contents, &config, &file_paths);
637        assert_eq!(
638            result, "indoor, person, photo, white background, a photo of a person"
639        );
640        let config = ConcatConfig {
641            base_extensions: vec!["jpg".into()],
642            extensions_to_concat: vec!["caption".into(), "wd".into(), "tags".into()],
643            output_extension: "txt".into(),
644            remove_duplicates: true,
645            tag_separator: ", ".into(),
646            deduplicate_files: false,
647        };
648        let contents = vec![
649            "a photo of a person".to_string(), "person, photo".to_string(),
650            "person, indoor, white background".to_string(),
651        ];
652        let file_paths = vec![
653            std::path::PathBuf::from("test.caption"),
654            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
655        ];
656        let result = concat_tags(&contents, &config, &file_paths);
657        assert_eq!(
658            result, "indoor, person, photo, white background, a photo of a person"
659        );
660        Ok(())
661    }
662}