helix_core/json/
concat.rs

1#![allow(clippy::pedantic)]
2#![warn(clippy::all)]
3use std::collections::{HashSet, HashMap};
4use std::path::Path;
5use anyhow::{Context, Result};
6use log;
7use tokio::fs;
8use serde::{Deserialize, Serialize};
9use std::fmt;
10use std::sync::atomic::{AtomicUsize, Ordering};
11use std::sync::Arc;
12use md5;
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14pub enum FileExtensionPreset {
15    CaptionWdTags,
16    FlorenceWdTags,
17}
18impl fmt::Display for FileExtensionPreset {
19    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20        match self {
21            Self::CaptionWdTags => write!(f, "caption+wd+tags"),
22            Self::FlorenceWdTags => write!(f, "florence+wd+tags"),
23        }
24    }
25}
26/// Configuration for file concatenation
27///
28/// This configuration controls how files are concatenated, with the following behavior:
29/// - Base extensions define which files to look for (e.g., jpg, png)
30/// - Extensions to concatenate define which related files to process (e.g., caption, wd, tags)
31/// - Caption files (with extension "caption" or "florence") are treated specially:
32///   - Their content is appended after the concatenated tags
33///   - They aren't included in tag deduplication
34#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
35pub struct ConcatConfig {
36    pub base_extensions: Vec<String>,
37    pub extensions_to_concat: Vec<String>,
38    pub output_extension: String,
39    pub remove_duplicates: bool,
40    pub tag_separator: String,
41    pub deduplicate_files: bool,
42}
43impl ConcatConfig {
44    #[must_use]
45    pub fn new(
46        base_extensions: Vec<String>,
47        extensions_to_concat: Vec<String>,
48        output_extension: String,
49        remove_duplicates: bool,
50        tag_separator: String,
51    ) -> Self {
52        Self {
53            base_extensions,
54            extensions_to_concat,
55            output_extension,
56            remove_duplicates,
57            tag_separator,
58            deduplicate_files: false,
59        }
60    }
61    #[must_use]
62    pub fn with_deduplication(mut self, deduplicate: bool) -> Self {
63        self.deduplicate_files = deduplicate;
64        self
65    }
66    #[must_use]
67    pub fn from_preset(preset: FileExtensionPreset) -> Self {
68        match preset {
69            FileExtensionPreset::CaptionWdTags => {
70                Self {
71                    base_extensions: vec![
72                        "png".into(), "jpg".into(), "jpeg".into(), "webp".into(), "gif"
73                        .into(), "tiff".into(), "bmp".into(), "jxl".into(), "avif".into()
74                    ],
75                    extensions_to_concat: vec![
76                        "caption".into(), "wd".into(), "tags".into()
77                    ],
78                    output_extension: "txt".into(),
79                    remove_duplicates: true,
80                    tag_separator: ", ".into(),
81                    deduplicate_files: false,
82                }
83            }
84            FileExtensionPreset::FlorenceWdTags => {
85                Self {
86                    base_extensions: vec![
87                        "png".into(), "jpg".into(), "jpeg".into(), "webp".into(), "gif"
88                        .into(), "tiff".into(), "bmp".into(), "jxl".into(), "avif".into()
89                    ],
90                    extensions_to_concat: vec![
91                        "florence".into(), "wd".into(), "tags".into()
92                    ],
93                    output_extension: "txt".into(),
94                    remove_duplicates: true,
95                    tag_separator: ", ".into(),
96                    deduplicate_files: false,
97                }
98            }
99        }
100    }
101}
102async fn read_file_content(path: &Path) -> Result<String> {
103    let content = fs::read_to_string(path)
104        .await
105        .with_context(|| format!("Failed to read file: {}", path.display()))?;
106    Ok(content.trim().to_string())
107}
108fn concat_tags(
109    contents: &[String],
110    config: &ConcatConfig,
111    file_paths: &[std::path::PathBuf],
112) -> String {
113    if contents.is_empty() {
114        return String::new();
115    }
116    let caption_ext = if config.extensions_to_concat.contains(&"caption".to_string()) {
117        "caption"
118    } else if config.extensions_to_concat.contains(&"florence".to_string()) {
119        "florence"
120    } else {
121        config.extensions_to_concat.last().unwrap()
122    };
123    let mut caption_index = None;
124    for (i, path) in file_paths.iter().enumerate() {
125        if let Some(ext) = path.extension() {
126            if ext == caption_ext {
127                caption_index = Some(i);
128                break;
129            }
130        }
131    }
132    let caption_index = caption_index.unwrap_or(contents.len() - 1);
133    let caption_content = &contents[caption_index];
134    let mut unique_tags = HashSet::new();
135    let mut all_tags = Vec::new();
136    for (i, content) in contents.iter().enumerate() {
137        if i == caption_index {
138            continue;
139        }
140        let tags = content.split(',').map(str::trim).filter(|&tag| !tag.is_empty());
141        for tag in tags {
142            if config.remove_duplicates {
143                unique_tags.insert(tag.to_string());
144            } else {
145                all_tags.push(tag.to_string());
146            }
147        }
148    }
149    let tags_portion = if config.remove_duplicates {
150        let mut sorted_tags: Vec<_> = unique_tags.into_iter().collect();
151        sorted_tags.sort();
152        sorted_tags.join(&config.tag_separator)
153    } else {
154        all_tags.join(&config.tag_separator)
155    };
156    if tags_portion.is_empty() {
157        caption_content.clone()
158    } else if caption_content.is_empty() {
159        tags_portion
160    } else {
161        format!("{}{}{}", tags_portion, config.tag_separator, caption_content)
162    }
163}
164pub async fn process_image_file(
165    image_path: &Path,
166    config: &ConcatConfig,
167    dry_run: bool,
168) -> Result<bool> {
169    let stem = image_path
170        .file_stem()
171        .with_context(|| {
172            format!("Failed to get file stem from: {}", image_path.display())
173        })?
174        .to_string_lossy();
175    let parent = image_path
176        .parent()
177        .with_context(|| {
178            format!("Failed to get parent directory of: {}", image_path.display())
179        })?;
180    let mut missing_files = Vec::new();
181    let mut file_paths = Vec::new();
182    for ext in &config.extensions_to_concat {
183        let ext_file = parent.join(format!("{stem}.{ext}"));
184        if ext_file.exists() {
185            file_paths.push(ext_file);
186        } else {
187            missing_files.push(ext_file.to_string_lossy().to_string());
188        }
189    }
190    if !missing_files.is_empty() {
191        log::warn!(
192            "Skipping {}: Missing files: {}", image_path.display(), missing_files
193            .join(", ")
194        );
195        return Ok(false);
196    }
197    let mut contents = Vec::new();
198    for path in &file_paths {
199        let content = read_file_content(path).await?;
200        contents.push(content);
201    }
202    let concatenated = concat_tags(&contents, config, &file_paths);
203    let output_path = parent.join(format!("{}.{}", stem, config.output_extension));
204    if dry_run {
205        log::info!("Would write to {}: {}", output_path.display(), concatenated);
206    } else {
207        fs::write(&output_path, &concatenated)
208            .await
209            .with_context(|| format!("Failed to write to: {}", output_path.display()))?;
210        log::debug!("Wrote {}", output_path.display());
211    }
212    Ok(true)
213}
214async fn walk_directory<F, Fut>(
215    directory: &Path,
216    mut callback: F,
217) -> Result<()>
218where
219    F: FnMut(&Path) -> Fut + Send,
220    Fut: std::future::Future<Output = Result<()>> + Send,
221{
222    let mut dirs_to_visit = vec![directory.to_path_buf()];
223
224    while let Some(current_dir) = dirs_to_visit.pop() {
225        let mut entries = fs::read_dir(&current_dir).await?;
226
227        while let Some(entry) = entries.next_entry().await? {
228            let path = entry.path();
229
230            if path.is_dir() {
231                dirs_to_visit.push(path);
232            } else {
233                callback(&path).await?;
234            }
235        }
236    }
237
238    Ok(())
239}
240
241pub async fn concat_files(
242    directory: &Path,
243    config: &ConcatConfig,
244    dry_run: bool,
245) -> Result<usize> {
246    let directory = directory.to_path_buf();
247    let config_clone = config.clone();
248    log::info!("Searching for files in: {}", directory.display());
249    log::info!("Using extensions: {}", config.extensions_to_concat.join(", "));
250    log::info!("Output extension: {}", config.output_extension);
251    if config.deduplicate_files {
252        log::info!("File deduplication enabled - will check for identical file contents");
253    }
254    let processed_count = Arc::new(AtomicUsize::new(0));
255    let skipped_duplicates = Arc::new(AtomicUsize::new(0));
256    let mut base_extensions = HashSet::new();
257    for ext in &config.base_extensions {
258        base_extensions.insert(ext.clone());
259        log::debug!("Added base extension: {}", ext);
260    }
261    let content_hashes: Arc<tokio::sync::Mutex<HashMap<String, String>>> = Arc::new(
262        tokio::sync::Mutex::new(HashMap::new()),
263    );
264    let processed_count_clone = processed_count.clone();
265    let skipped_duplicates_clone = skipped_duplicates.clone();
266    let content_hashes_clone = content_hashes.clone();
267    walk_directory(
268            &directory,
269            move |path| {
270                let path = path.to_path_buf();
271                let base_exts = base_extensions.clone();
272                let config = config_clone.clone();
273                let dry_run = dry_run;
274                let count = processed_count_clone.clone();
275                let skipped = skipped_duplicates_clone.clone();
276                let hashes = content_hashes_clone.clone();
277                async move {
278                    if let Some(ext) = path.extension() {
279                        let ext_str = ext.to_string_lossy().to_lowercase();
280                        log::debug!(
281                            "Checking file: {} with extension: {}", path.display(),
282                            ext_str
283                        );
284                        log::debug!("Base extensions: {:?}", base_exts);
285                        if base_exts.contains(&ext_str) {
286                            log::debug!("Found base extension match: {}", path.display());
287                            if config.deduplicate_files {
288                                log::debug!(
289                                    "Checking for duplicate content: {}", path.display()
290                                );
291                                let is_duplicate = check_duplicate_content(
292                                        &path,
293                                        &config,
294                                        hashes.clone(),
295                                    )
296                                    .await;
297                                if is_duplicate {
298                                    log::debug!("Skipping duplicate file: {}", path.display());
299                                    skipped.fetch_add(1, Ordering::Relaxed);
300                                    return Ok(());
301                                }
302                                log::debug!(
303                                    "File is not a duplicate, proceeding: {}", path.display()
304                                );
305                            }
306                            log::debug!("Processing file: {}", path.display());
307                            match process_image_file(&path, &config, dry_run).await {
308                                Ok(true) => {
309                                    log::debug!("Successfully processed: {}", path.display());
310                                    count.fetch_add(1, Ordering::Relaxed);
311                                }
312                                Ok(false) => {
313                                    log::debug!("Skipped due to missing files: {}", path.display());
314                                }
315                                Err(err) => {
316                                    log::warn!("Error processing {}: {}", path.display(), err)
317                                }
318                            }
319                        } else {
320                            log::debug!("Skipping non-base extension: {}", path.display());
321                        }
322                    }
323                    Ok(())
324                }
325            },
326        )
327        .await?;
328    let final_count = processed_count.load(Ordering::Relaxed);
329    let final_skipped = skipped_duplicates.load(Ordering::Relaxed);
330    if dry_run {
331        log::info!("Dry run completed. Would have processed {} files.", final_count);
332    } else {
333        log::info!("Concatenation completed. Processed {} files.", final_count);
334    }
335    if config.deduplicate_files {
336        log::info!("Skipped {} duplicate files.", final_skipped);
337    }
338    Ok(final_count)
339}
340async fn check_duplicate_content(
341    path: &Path,
342    config: &ConcatConfig,
343    hashes: Arc<tokio::sync::Mutex<HashMap<String, String>>>,
344) -> bool {
345    let Some(stem) = path.file_stem() else {
346        log::debug!("Could not get file stem for: {}", path.display());
347        return false;
348    };
349    let stem = stem.to_string_lossy();
350    let Some(parent) = path.parent() else {
351        log::debug!("Could not get parent directory for: {}", path.display());
352        return false;
353    };
354    log::debug!(
355        "Checking duplicate content for file: {} with stem: {}", path.display(), stem
356    );
357    let mut file_paths = Vec::new();
358    for ext in &config.extensions_to_concat {
359        let ext_file = parent.join(format!("{stem}.{ext}"));
360        if !ext_file.exists() {
361            log::debug!("Missing required file: {}", ext_file.display());
362            return false;
363        }
364        log::debug!("Found required file: {}", ext_file.display());
365        file_paths.push(ext_file);
366    }
367    let mut combined_content = String::new();
368    for path in &file_paths {
369        match fs::read_to_string(path).await {
370            Ok(content) => {
371                log::debug!("Read content from: {}", path.display());
372                combined_content.push_str(&content);
373            }
374            Err(err) => {
375                log::debug!("Failed to read content from {}: {}", path.display(), err);
376                return false;
377            }
378        }
379    }
380    let content_hash = format!("{:x}", md5::compute(combined_content.as_bytes()));
381    log::debug!("Generated hash for {}: {}", path.display(), content_hash);
382    let mut hashes_map = hashes.lock().await;
383    if let Some(existing_file) = hashes_map.get(&content_hash) {
384        log::debug!("Found duplicate content: {} matches {}", path.display(), existing_file);
385        true
386    } else {
387        log::debug!("No duplicate found for {}, storing hash", path.display());
388        hashes_map.insert(content_hash, path.to_string_lossy().to_string());
389        false
390    }
391}
392#[cfg(test)]
393mod tests {
394    use super::*;
395    use tempfile::TempDir;
396    use tokio::fs::File;
397    use tokio::io::AsyncWriteExt;
398    #[tokio::test]
399    async fn test_concat_tags_with_duplicates() -> Result<()> {
400        let config = ConcatConfig {
401            base_extensions: vec!["jpg".into()],
402            extensions_to_concat: vec!["wd".into(), "tags".into(), "caption".into()],
403            output_extension: "txt".into(),
404            remove_duplicates: true,
405            tag_separator: ", ".into(),
406            deduplicate_files: false,
407        };
408        let contents = vec![
409            "tag1, tag2, tag3".to_string(), "tag2, tag4, tag5".to_string(),
410            "a photo of a person".to_string(),
411        ];
412        let file_paths = vec![
413            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
414            std::path::PathBuf::from("test.caption"),
415        ];
416        let result = concat_tags(&contents, &config, &file_paths);
417        assert_eq!(result, "tag1, tag2, tag3, tag4, tag5, a photo of a person");
418        Ok(())
419    }
420    #[tokio::test]
421    async fn test_concat_tags_without_duplicates() -> Result<()> {
422        let config = ConcatConfig {
423            base_extensions: vec!["jpg".into()],
424            extensions_to_concat: vec!["wd".into(), "tags".into(), "caption".into()],
425            output_extension: "txt".into(),
426            remove_duplicates: false,
427            tag_separator: ", ".into(),
428            deduplicate_files: false,
429        };
430        let contents = vec![
431            "tag1, tag2, tag3".to_string(), "tag2, tag4, tag5".to_string(),
432            "a photo of a person".to_string(),
433        ];
434        let file_paths = vec![
435            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
436            std::path::PathBuf::from("test.caption"),
437        ];
438        let result = concat_tags(&contents, &config, &file_paths);
439        assert_eq!(result, "tag1, tag2, tag3, tag2, tag4, tag5, a photo of a person");
440        Ok(())
441    }
442    #[tokio::test]
443    async fn test_process_image_file() -> Result<()> {
444        let temp_dir = TempDir::new()?;
445        let temp_path = temp_dir.path();
446        let image_path = temp_path.join("test.jpg");
447        let caption_path = temp_path.join("test.caption");
448        let wd_path = temp_path.join("test.wd");
449        let tags_path = temp_path.join("test.tags");
450        File::create(&image_path).await?.sync_all().await?;
451        let mut caption_file = File::create(&caption_path).await?;
452        caption_file.write_all(b"caption1, caption2").await?;
453        caption_file.sync_all().await?;
454        let mut wd_file = File::create(&wd_path).await?;
455        wd_file.write_all(b"wd1, wd2").await?;
456        wd_file.sync_all().await?;
457        let mut tags_file = File::create(&tags_path).await?;
458        tags_file.write_all(b"tag1, tag2").await?;
459        tags_file.sync_all().await?;
460        let config = ConcatConfig {
461            base_extensions: vec!["jpg".into()],
462            extensions_to_concat: vec!["caption".into(), "wd".into(), "tags".into()],
463            output_extension: "txt".into(),
464            remove_duplicates: true,
465            tag_separator: ", ".into(),
466            deduplicate_files: false,
467        };
468        let processed_dry = process_image_file(&image_path, &config, true).await?;
469        assert!(processed_dry);
470        assert!(! temp_path.join("test.txt").exists());
471        let processed = process_image_file(&image_path, &config, false).await?;
472        assert!(processed);
473        let output_content = fs::read_to_string(temp_path.join("test.txt")).await?;
474        assert_eq!(output_content, "tag1, tag2, wd1, wd2, caption1, caption2");
475        Ok(())
476    }
477    #[tokio::test]
478    async fn test_file_deduplication() -> Result<()> {
479        let _ = env_logger::builder()
480            .filter_level(log::LevelFilter::Debug)
481            .is_test(true)
482            .try_init();
483        log::info!("Starting file deduplication test");
484        let temp_dir = tempfile::tempdir()?;
485        let temp_path = temp_dir.path();
486        let image1_path = temp_path.join("image1.jpg");
487        let image2_path = temp_path.join("image2.jpg");
488        let image3_path = temp_path.join("image3.jpg");
489        let caption1_path = temp_path.join("image1.caption");
490        let caption2_path = temp_path.join("image2.caption");
491        let caption3_path = temp_path.join("image3.caption");
492        let wd1_path = temp_path.join("image1.wd");
493        let wd2_path = temp_path.join("image2.wd");
494        let wd3_path = temp_path.join("image3.wd");
495        let tags1_path = temp_path.join("image1.tags");
496        let tags2_path = temp_path.join("image2.tags");
497        let tags3_path = temp_path.join("image3.tags");
498        log::info!("Creating test files in {}", temp_path.display());
499        let mut image1_file = File::create(&image1_path).await?;
500        image1_file.write_all(b"test image 1").await?;
501        image1_file.sync_all().await?;
502        let mut image2_file = File::create(&image2_path).await?;
503        image2_file.write_all(b"test image 2").await?;
504        image2_file.sync_all().await?;
505        let mut image3_file = File::create(&image3_path).await?;
506        image3_file.write_all(b"test image 3").await?;
507        image3_file.sync_all().await?;
508        let mut caption1_file = File::create(&caption1_path).await?;
509        caption1_file.write_all(b"a photo of a person").await?;
510        caption1_file.sync_all().await?;
511        let mut caption2_file = File::create(&caption2_path).await?;
512        caption2_file.write_all(b"a photo of a person").await?;
513        caption2_file.sync_all().await?;
514        let mut caption3_file = File::create(&caption3_path).await?;
515        caption3_file.write_all(b"person, portrait, indoor").await?;
516        caption3_file.sync_all().await?;
517        let mut wd1_file = File::create(&wd1_path).await?;
518        wd1_file.write_all(b"masterpiece, digital art").await?;
519        wd1_file.sync_all().await?;
520        let mut wd2_file = File::create(&wd2_path).await?;
521        wd2_file.write_all(b"masterpiece, digital art").await?;
522        wd2_file.sync_all().await?;
523        let mut wd3_file = File::create(&wd3_path).await?;
524        wd3_file.write_all(b"highly detailed, 4k").await?;
525        wd3_file.sync_all().await?;
526        let tags_content = "tag1, tag2, tag3";
527        let mut tags1_file = File::create(&tags1_path).await?;
528        tags1_file.write_all(tags_content.as_bytes()).await?;
529        tags1_file.sync_all().await?;
530        let mut tags2_file = File::create(&tags2_path).await?;
531        tags2_file.write_all(tags_content.as_bytes()).await?;
532        tags2_file.sync_all().await?;
533        let mut tags3_file = File::create(&tags3_path).await?;
534        tags3_file.write_all(b"tag4, tag5, tag6").await?;
535        tags3_file.sync_all().await?;
536        let config = ConcatConfig {
537            base_extensions: vec!["jpg".into()],
538            extensions_to_concat: vec!["caption".into(), "wd".into(), "tags".into()],
539            output_extension: "txt".into(),
540            remove_duplicates: true,
541            tag_separator: ", ".into(),
542            deduplicate_files: true,
543        };
544        log::info!("Test files created at:");
545        log::info!("Image 1: {}", image1_path.display());
546        log::info!("Caption 1: {}", caption1_path.display());
547        log::info!("WD 1: {}", wd1_path.display());
548        log::info!("Tags 1: {}", tags1_path.display());
549        let content_hashes: Arc<tokio::sync::Mutex<HashMap<String, String>>> = Arc::new(
550            tokio::sync::Mutex::new(HashMap::new()),
551        );
552        log::info!("Processing first image: {}", image1_path.display());
553        let is_duplicate1 = check_duplicate_content(
554                &image1_path,
555                &config,
556                content_hashes.clone(),
557            )
558            .await;
559        assert!(! is_duplicate1, "First image should not be detected as duplicate");
560        let processed1 = process_image_file(&image1_path, &config, false).await?;
561        assert!(processed1, "First image should be processed successfully");
562        log::info!("Processing second image: {}", image2_path.display());
563        let is_duplicate2 = check_duplicate_content(
564                &image2_path,
565                &config,
566                content_hashes.clone(),
567            )
568            .await;
569        assert!(is_duplicate2, "Second image should be detected as duplicate");
570        log::info!("Processing third image: {}", image3_path.display());
571        let is_duplicate3 = check_duplicate_content(
572                &image3_path,
573                &config,
574                content_hashes.clone(),
575            )
576            .await;
577        assert!(! is_duplicate3, "Third image should not be detected as duplicate");
578        let processed3 = process_image_file(&image3_path, &config, false).await?;
579        assert!(processed3, "Third image should be processed successfully");
580        assert!(temp_path.join("image1.txt").exists(), "image1.txt should exist");
581        assert!(
582            ! temp_path.join("image2.txt").exists(),
583            "image2.txt should not exist (duplicate)"
584        );
585        assert!(temp_path.join("image3.txt").exists(), "image3.txt should exist");
586        let output1_content = fs::read_to_string(temp_path.join("image1.txt")).await?;
587        let output3_content = fs::read_to_string(temp_path.join("image3.txt")).await?;
588        log::info!("Output 1 content: '{}'", output1_content);
589        log::info!("Output 3 content: '{}'", output3_content);
590        assert!(
591            output1_content.contains("tag1, tag2, tag3"),
592            "Output for image1 should contain deduplicated tags"
593        );
594        assert!(
595            output1_content.contains("digital art, masterpiece"),
596            "Output for image1 should contain wd content (in alphabetical order)"
597        );
598        assert!(
599            output1_content.contains("a photo of a person"),
600            "Output for image1 should contain caption content"
601        );
602        assert!(
603            output3_content.contains("tag4, tag5, tag6"),
604            "Output for image3 should contain its unique tags content"
605        );
606        assert!(
607            output3_content.contains("4k, highly detailed"),
608            "Output for image3 should contain its unique wd content (in alphabetical order)"
609        );
610        assert!(
611            output3_content.contains("person, portrait, indoor"),
612            "Output for image3 should contain its unique caption content"
613        );
614        Ok(())
615    }
616    #[tokio::test]
617    async fn test_concat_tags_caption_handling() -> Result<()> {
618        let config = ConcatConfig {
619            base_extensions: vec!["jpg".into()],
620            extensions_to_concat: vec!["wd".into(), "tags".into(), "caption".into()],
621            output_extension: "txt".into(),
622            remove_duplicates: true,
623            tag_separator: ", ".into(),
624            deduplicate_files: false,
625        };
626        let contents = vec![
627            "person, photo".to_string(), "person, indoor, white background".to_string(),
628            "a photo of a person".to_string(),
629        ];
630        let file_paths = vec![
631            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
632            std::path::PathBuf::from("test.caption"),
633        ];
634        let result = concat_tags(&contents, &config, &file_paths);
635        assert_eq!(
636            result, "indoor, person, photo, white background, a photo of a person"
637        );
638        let config = ConcatConfig {
639            base_extensions: vec!["jpg".into()],
640            extensions_to_concat: vec!["caption".into(), "wd".into(), "tags".into()],
641            output_extension: "txt".into(),
642            remove_duplicates: true,
643            tag_separator: ", ".into(),
644            deduplicate_files: false,
645        };
646        let contents = vec![
647            "a photo of a person".to_string(), "person, photo".to_string(),
648            "person, indoor, white background".to_string(),
649        ];
650        let file_paths = vec![
651            std::path::PathBuf::from("test.caption"),
652            std::path::PathBuf::from("test.wd"), std::path::PathBuf::from("test.tags"),
653        ];
654        let result = concat_tags(&contents, &config, &file_paths);
655        assert_eq!(
656            result, "indoor, person, photo, white background, a photo of a person"
657        );
658        Ok(())
659    }
660}