Skip to main content

serval/
utils.rs

1use chrono::NaiveDateTime;
2use core::fmt;
3use indicatif::{ProgressBar, ProgressStyle};
4use pest_derive::Parser;
5use polars::prelude::*;
6use rayon::prelude::*;
7use std::collections::HashSet;
8use std::ffi::OsString;
9use std::fs::{File, FileTimes};
10use std::io;
11use std::str::FromStr;
12use std::{
13    env, fs,
14    path::{Path, PathBuf},
15};
16use walkdir::{DirEntry, WalkDir};
17use xmp_toolkit::{OpenFileOptions, XmpFile, XmpMeta};
18
19#[derive(Parser)]
20#[grammar = "filter.pest"]
21struct FilterParser;
22
23#[derive(clap::ValueEnum, Clone, Copy, Debug)]
24pub enum ResourceType {
25    Xmp,
26    Image,
27    Video,
28    Media, // Image or Video
29    All,   // All resources (for serval align)
30}
31
32impl fmt::Display for ResourceType {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        write!(f, "{self:?}")
35    }
36}
37
38impl ResourceType {
39    fn extension(self) -> Vec<&'static str> {
40        match self {
41            ResourceType::Image => vec!["jpg", "jpeg", "png"],
42            ResourceType::Video => vec!["avi", "mp4", "mov"],
43            ResourceType::Xmp => vec!["xmp"],
44            ResourceType::Media => vec!["jpg", "jpeg", "png", "avi", "mp4", "mov"],
45            ResourceType::All => vec!["jpg", "jpeg", "png", "avi", "mp4", "mov", "xmp"],
46        }
47    }
48
49    fn is_resource(self, path: &Path) -> bool {
50        let ext = match path.extension() {
51            None => return false,
52            Some(ext) => ext,
53        };
54
55        match ext.to_str() {
56            None => false,
57            Some(ext_str) => {
58                let ext_lower = ext_str.to_ascii_lowercase();
59                self.extension().contains(&ext_lower.as_str())
60            }
61        }
62    }
63}
64
65#[derive(clap::ValueEnum, PartialEq, Clone, Copy, Debug)]
66pub enum TagType {
67    Species,
68    Individual,
69    Count,
70    Sex,
71    Bodypart,
72}
73
74impl fmt::Display for TagType {
75    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76        write!(f, "{self:?}")
77    }
78}
79
80impl TagType {
81    pub fn col_name(self) -> &'static str {
82        match self {
83            TagType::Individual => "individual",
84            TagType::Species => "species",
85            TagType::Count => "count",
86            TagType::Sex => "sex",
87            TagType::Bodypart => "bodypart",
88        }
89    }
90    pub fn digikam_tag_prefix(self) -> &'static str {
91        match self {
92            TagType::Individual => "Individual/",
93            TagType::Species => "Species/",
94            TagType::Count => "Count/",
95            TagType::Sex => "Sex/",
96            TagType::Bodypart => "Bodypart/",
97        }
98    }
99    pub fn adobe_tag_prefix(self) -> &'static str {
100        match self {
101            TagType::Individual => "Individual|",
102            TagType::Species => "Species|",
103            TagType::Count => "Count|",
104            TagType::Sex => "Sex|",
105            TagType::Bodypart => "Bodypart|",
106        }
107    }
108}
109
110#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)]
111pub enum ExtractFilterType {
112    Species,
113    Path,
114    Individual,
115    Rating,
116    Event,
117    Custom,
118    Advanced,
119}
120
121#[derive(clap::ValueEnum, Clone, Copy, Debug)]
122pub enum SubdirType {
123    Species,
124    Individual,
125    Rating,
126    Custom,
127}
128
129/// Represents a parsed filter condition
130#[derive(Debug, Clone)]
131pub struct FilterCondition {
132    pub filter_type: ExtractFilterType,
133    pub operator: FilterOperator,
134    pub value: String,
135}
136
137/// Supported filter operators
138#[derive(Debug, Clone)]
139pub enum FilterOperator {
140    Equal, // exact match
141    // Contains,        // TODO: substring match
142    GreaterEqual, // >=
143    LessEqual,    // <=
144    Greater,      // >
145    Less,         // <
146    Range(f64, f64), // min-max range
147                  // Not,             // TODO: negation wrapper
148}
149
150/// Logical operators for combining filters
151#[derive(Debug, Clone)]
152pub enum LogicalOperator {
153    And,
154    Or,
155}
156
157/// Complete filter expression tree
158#[derive(Debug, Clone)]
159pub enum FilterExpr {
160    Condition(FilterCondition),
161    Logical {
162        left: Box<FilterExpr>,
163        operator: LogicalOperator,
164        right: Box<FilterExpr>,
165    },
166    // Not(Box<FilterExpr>), // TODO, need to consider the multiple-tag case
167}
168
169impl ExtractFilterType {
170    /// Parse field aliases to filter types
171    pub fn from_alias(alias: &str) -> Option<Self> {
172        match alias.to_lowercase().as_str() {
173            "species" | "sp" | "s" => Some(Self::Species),
174            "individual" | "ind" | "i" => Some(Self::Individual),
175            "rating" | "rate" | "r" => Some(Self::Rating),
176            "path" | "p" => Some(Self::Path),
177            "event" | "e" => Some(Self::Event),
178            "custom" | "c" => Some(Self::Custom),
179            _ => None,
180        }
181    }
182}
183
184/// Parse advanced filter string into FilterExpr using pest
185pub fn parse_advanced_filter(input: &str) -> anyhow::Result<FilterExpr> {
186    use pest::Parser;
187
188    let pairs = FilterParser::parse(Rule::filter, input)
189        .map_err(|e| anyhow::anyhow!("Parse error: {e}"))?;
190
191    // Get the or_expr inside the filter rule
192    let or_expr = pairs
193        .into_iter()
194        .next()
195        .ok_or_else(|| anyhow::anyhow!("Empty parse result"))?
196        .into_inner()
197        .next()
198        .ok_or_else(|| anyhow::anyhow!("No expression found"))?;
199
200    build_expr(or_expr)
201}
202
203/// Build FilterExpr from pest Pair
204fn build_expr(pair: pest::iterators::Pair<Rule>) -> anyhow::Result<FilterExpr> {
205    match pair.as_rule() {
206        Rule::or_expr => {
207            let mut inner = pair.into_inner();
208            let mut expr = build_expr(inner.next().unwrap())?;
209
210            while let Some(next) = inner.next() {
211                if next.as_rule() == Rule::or_op {
212                    let right = build_expr(inner.next().unwrap())?;
213                    expr = FilterExpr::Logical {
214                        left: Box::new(expr),
215                        operator: LogicalOperator::Or,
216                        right: Box::new(right),
217                    };
218                }
219            }
220
221            Ok(expr)
222        }
223
224        Rule::and_expr => {
225            let mut inner = pair.into_inner();
226            let mut expr = build_expr(inner.next().unwrap())?;
227
228            while let Some(next) = inner.next() {
229                if next.as_rule() == Rule::and_op {
230                    let right = build_expr(inner.next().unwrap())?;
231                    expr = FilterExpr::Logical {
232                        left: Box::new(expr),
233                        operator: LogicalOperator::And,
234                        right: Box::new(right),
235                    };
236                }
237            }
238
239            Ok(expr)
240        }
241
242        Rule::primary => {
243            let inner = pair.into_inner().next().unwrap();
244            build_expr(inner)
245        }
246
247        Rule::paren_expr => {
248            let inner = pair.into_inner().next().unwrap();
249            build_expr(inner)
250        }
251
252        Rule::condition => {
253            let mut inner = pair.into_inner();
254            let field = inner.next().unwrap().as_str();
255            let value = inner.next().unwrap().as_str().trim(); // Trim whitespace from value
256
257            let filter_type = ExtractFilterType::from_alias(field)
258                .ok_or_else(|| anyhow::anyhow!("Unknown filter field: {field}"))?;
259
260            let (operator, cleaned_value) = parse_value_and_operator(value)?;
261
262            Ok(FilterExpr::Condition(FilterCondition {
263                filter_type,
264                operator,
265                value: cleaned_value,
266            }))
267        }
268
269        _ => Err(anyhow::anyhow!("Unexpected rule: {:?}", pair.as_rule())),
270    }
271}
272
273/// Parse value and detect operator (>=, <=, range, etc.)
274fn parse_value_and_operator(value: &str) -> anyhow::Result<(FilterOperator, String)> {
275    // Handle range syntax first (e.g., "1-5", "0.5-4.5")
276    if let Some((min_str, max_str)) = value.split_once('-')
277        && let (Ok(min), Ok(max)) = (min_str.trim().parse::<f64>(), max_str.trim().parse::<f64>())
278    {
279        return Ok((FilterOperator::Range(min, max), value.to_string()));
280    }
281
282    // Handle comparison operators
283    if let Some(stripped) = value.strip_prefix(">=") {
284        return Ok((FilterOperator::GreaterEqual, stripped.trim().to_string()));
285    }
286    if let Some(stripped) = value.strip_prefix("<=") {
287        return Ok((FilterOperator::LessEqual, stripped.trim().to_string()));
288    }
289    if let Some(stripped) = value.strip_prefix('>') {
290        return Ok((FilterOperator::Greater, stripped.trim().to_string()));
291    }
292    if let Some(stripped) = value.strip_prefix('<') {
293        return Ok((FilterOperator::Less, stripped.trim().to_string()));
294    }
295
296    // Remove quotes if present
297    let cleaned_value = if (value.starts_with('"') && value.ends_with('"'))
298        || (value.starts_with('\'') && value.ends_with('\''))
299    {
300        value[1..value.len() - 1].to_string()
301    } else {
302        value.to_string()
303    };
304
305    // Default to exact match for most fields, contains for path
306    Ok((FilterOperator::Equal, cleaned_value))
307}
308
309pub fn has_same_field_and_conditions(expr: &FilterExpr) -> bool {
310    fn collect_and_fields(expr: &FilterExpr, fields: &mut Vec<ExtractFilterType>) {
311        match expr {
312            FilterExpr::Condition(cond) => {
313                fields.push(cond.filter_type);
314            }
315            FilterExpr::Logical {
316                left,
317                operator,
318                right,
319            } => {
320                match operator {
321                    LogicalOperator::And => {
322                        collect_and_fields(left, fields);
323                        collect_and_fields(right, fields);
324                    }
325                    LogicalOperator::Or => {
326                        // OR branches are separate, don't mix them
327                    }
328                }
329            }
330        }
331    }
332
333    let mut fields = Vec::new();
334    collect_and_fields(expr, &mut fields);
335
336    // Check if any field appears more than once in AND conditions
337    for i in 0..fields.len() {
338        for j in (i + 1)..fields.len() {
339            if fields[i] == fields[j] {
340                return true;
341            }
342        }
343    }
344    false
345}
346
347/// Convert FilterExpr to Polars Expr
348///
349/// # Parameters
350/// * `expr` - The filter expression to convert
351/// * `use_aggregated` - If true, treats species/individual as list columns (for path-level filtering)
352pub fn filter_expr_to_polars(expr: &FilterExpr, use_aggregated: bool) -> anyhow::Result<Expr> {
353    use crate::utils::TagType;
354
355    match expr {
356        FilterExpr::Condition(condition) => {
357            let col_name = match condition.filter_type {
358                ExtractFilterType::Species => TagType::Species.col_name(),
359                ExtractFilterType::Individual => TagType::Individual.col_name(),
360                ExtractFilterType::Rating => "rating",
361                ExtractFilterType::Path => "path",
362                ExtractFilterType::Event => "event_id",
363                ExtractFilterType::Custom => "custom",
364                ExtractFilterType::Advanced => {
365                    return Err(anyhow::anyhow!(
366                        "Advanced filter should not appear in conditions"
367                    ));
368                }
369            };
370
371            let base_col = col(col_name);
372
373            match &condition.operator {
374                FilterOperator::Equal => {
375                    if condition.filter_type == ExtractFilterType::Path {
376                        // Path uses contains for substring matching
377                        Ok(base_col
378                            .str()
379                            .contains_literal(lit(condition.value.clone())))
380                    } else if use_aggregated
381                        && (condition.filter_type == ExtractFilterType::Species
382                            || condition.filter_type == ExtractFilterType::Individual)
383                    {
384                        // For aggregated species/individual, check if list contains the value
385                        Ok(base_col
386                            .list()
387                            .contains(lit(condition.value.clone()), false))
388                    } else {
389                        Ok(base_col.eq(lit(condition.value.clone())))
390                    }
391                }
392                FilterOperator::Range(min, max) => {
393                    // Rating stays as scalar in both modes
394                    let numeric_col = base_col.cast(DataType::Float64);
395                    Ok(numeric_col
396                        .clone()
397                        .is_not_null()
398                        .and(numeric_col.clone().gt_eq(lit(*min)))
399                        .and(numeric_col.lt_eq(lit(*max))))
400                }
401                FilterOperator::GreaterEqual => {
402                    if let Ok(value) = condition.value.parse::<f64>() {
403                        let numeric_col = base_col.cast(DataType::Float64);
404                        Ok(numeric_col
405                            .clone()
406                            .is_not_null()
407                            .and(numeric_col.gt_eq(lit(value))))
408                    } else {
409                        Err(anyhow::anyhow!(
410                            "GreaterEqual operator requires numeric value"
411                        ))
412                    }
413                }
414                FilterOperator::LessEqual => {
415                    if let Ok(value) = condition.value.parse::<f64>() {
416                        let numeric_col = base_col.cast(DataType::Float64);
417                        Ok(numeric_col
418                            .clone()
419                            .is_not_null()
420                            .and(numeric_col.lt_eq(lit(value))))
421                    } else {
422                        Err(anyhow::anyhow!("LessEqual operator requires numeric value"))
423                    }
424                }
425                FilterOperator::Greater => {
426                    if let Ok(value) = condition.value.parse::<f64>() {
427                        let numeric_col = base_col.cast(DataType::Float64);
428                        Ok(numeric_col
429                            .clone()
430                            .is_not_null()
431                            .and(numeric_col.gt(lit(value))))
432                    } else {
433                        Err(anyhow::anyhow!("Greater operator requires numeric value"))
434                    }
435                }
436                FilterOperator::Less => {
437                    if let Ok(value) = condition.value.parse::<f64>() {
438                        let numeric_col = base_col.cast(DataType::Float64);
439                        Ok(numeric_col
440                            .clone()
441                            .is_not_null()
442                            .and(numeric_col.lt(lit(value))))
443                    } else {
444                        Err(anyhow::anyhow!("Less operator requires numeric value"))
445                    }
446                }
447            }
448        }
449        FilterExpr::Logical {
450            left,
451            operator,
452            right,
453        } => {
454            let left_expr = filter_expr_to_polars(left, use_aggregated)?;
455            let right_expr = filter_expr_to_polars(right, use_aggregated)?;
456
457            match operator {
458                LogicalOperator::And => Ok(left_expr.and(right_expr)),
459                LogicalOperator::Or => Ok(left_expr.or(right_expr)),
460            }
461        }
462    }
463}
464
465// Serval ignores
466fn is_ignored(entry: &DirEntry) -> bool {
467    entry
468        .file_name()
469        .to_str()
470        .map(|s| s.starts_with('.') || s.contains("精选")) // ignore 精选 and .dtrash
471        .unwrap_or(false)
472}
473
474// Serval bar style
475pub fn serval_pb_style() -> ProgressStyle {
476    ProgressStyle::default_bar()
477        .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta})")
478        .unwrap()
479        .progress_chars("=> ")
480}
481
482pub fn configure_progress_bar(pb: &ProgressBar) {
483    pb.set_style(serval_pb_style());
484    pb.enable_steady_tick(std::time::Duration::from_secs(1));
485}
486
487// workaround for https://github.com/rust-lang/rust/issues/42869
488// ref. https://github.com/sharkdp/fd/pull/72/files
489fn path_to_absolute(path: PathBuf) -> io::Result<PathBuf> {
490    if path.is_absolute() {
491        return Ok(path);
492    }
493    let path = path.strip_prefix(".").unwrap_or(&path);
494    env::current_dir().map(|current_dir| current_dir.join(path))
495}
496
497pub fn absolute_path(path: PathBuf) -> io::Result<PathBuf> {
498    let path_buf = path_to_absolute(path)?;
499    #[cfg(windows)]
500    let path_buf = Path::new(
501        path_buf
502            .as_path()
503            .to_string_lossy()
504            .trim_start_matches(r"\\?\"),
505    )
506    .to_path_buf();
507    Ok(path_buf)
508}
509
510pub fn path_enumerate(root_dir: PathBuf, resource_type: ResourceType) -> Vec<PathBuf> {
511    WalkDir::new(root_dir)
512        .into_iter()
513        .filter_entry(|e| !is_ignored(e))
514        .par_bridge()
515        .filter_map(Result::ok)
516        .filter(|e| resource_type.is_resource(e.path()))
517        .map(|e| e.into_path())
518        .collect()
519}
520
521pub fn resources_flatten(
522    deploy_dir: PathBuf,
523    working_dir: PathBuf,
524    resource_type: ResourceType,
525    dry_run: bool,
526    move_mode: bool,
527    prefix_deploy_id_in_name: bool,
528    keep_first_subdir: bool,
529) -> anyhow::Result<()> {
530    let deploy_id = deploy_dir
531        .file_name()
532        .ok_or_else(|| anyhow::anyhow!("Invalid deploy directory path: no filename"))?;
533
534    let base_output_dir = working_dir.join(deploy_id);
535    fs::create_dir_all(base_output_dir.clone())?;
536
537    let resource_paths = path_enumerate(deploy_dir.clone(), resource_type);
538    let num_resource = resource_paths.len();
539    println!(
540        "{} {}(s) found in {}",
541        num_resource,
542        resource_type,
543        deploy_dir.to_string_lossy()
544    );
545
546    let mut visited_path: HashSet<String> = HashSet::new();
547    let pb = if !dry_run {
548        Some(indicatif::ProgressBar::new(num_resource as u64))
549    } else {
550        None
551    };
552    if let Some(pb_ref) = &pb {
553        configure_progress_bar(pb_ref);
554    }
555    for resource in resource_paths {
556        let mut output_path = PathBuf::new();
557        let resource_parent = resource.parent().unwrap();
558        let relative_path = resource.strip_prefix(&deploy_dir).unwrap_or(&resource);
559        let mut relative_parts: Vec<OsString> = relative_path
560            .iter()
561            .map(|part| part.to_os_string())
562            .collect();
563        if relative_parts.is_empty() {
564            relative_parts.push("unnamed_file".into());
565        }
566
567        let mut output_dir = base_output_dir.clone();
568        if keep_first_subdir && relative_parts.len() > 1 {
569            output_dir = output_dir.join(&relative_parts[0]);
570            if !dry_run {
571                fs::create_dir_all(output_dir.clone())?;
572            }
573        }
574
575        let mut name_parts: Vec<OsString> = Vec::new();
576        if prefix_deploy_id_in_name {
577            name_parts.push(deploy_id.to_os_string());
578        }
579        name_parts.extend(relative_parts.into_iter());
580        let resource_name = name_parts.join(std::ffi::OsStr::new("-"));
581
582        output_path.push(output_dir.join(resource_name));
583
584        if !dry_run {
585            if move_mode {
586                fs::rename(resource, output_path)?;
587            } else {
588                fs::copy(resource, output_path)?;
589            }
590            if let Some(pb_ref) = &pb {
591                pb_ref.inc(1);
592            }
593        } else if !visited_path.contains(resource_parent.to_string_lossy().as_ref()) {
594            visited_path.insert(resource_parent.to_string_lossy().to_string());
595            println!(
596                "DRYRUN sample: From {} to {}",
597                resource.display(),
598                output_path.display()
599            );
600        }
601    }
602    if let Some(pb_ref) = pb {
603        pb_ref.finish();
604    }
605    Ok(())
606}
607
608pub fn deployments_align(
609    project_dir: PathBuf,
610    output_dir: PathBuf,
611    deploy_table: PathBuf,
612    resource_type: ResourceType,
613    dry_run: bool,
614    move_mode: bool,
615    keep_first_subdir: bool,
616) -> anyhow::Result<()> {
617    let deploy_df = CsvReadOptions::default()
618        .try_into_reader_with_file_path(Some(deploy_table))?
619        .finish()?;
620    let deploy_array = deploy_df["deploymentID"].str()?;
621
622    let deploy_iter = deploy_array.into_iter();
623    let num_iter = deploy_iter.len();
624    let pb = indicatif::ProgressBar::new(num_iter as u64);
625    configure_progress_bar(&pb);
626    for deploy_id in deploy_iter {
627        let (_, collection_name) = deploy_id.unwrap().rsplit_once('_').unwrap();
628        let deploy_dir = project_dir.join(collection_name).join(deploy_id.unwrap());
629        let collection_output_dir = output_dir.join(collection_name);
630        resources_flatten(
631            deploy_dir,
632            collection_output_dir.clone(),
633            resource_type,
634            dry_run,
635            move_mode,
636            true,
637            keep_first_subdir,
638        )?;
639        pb.inc(1);
640    }
641    pb.finish();
642    Ok(())
643}
644
645pub fn deployments_rename(project_dir: PathBuf, dry_run: bool) -> anyhow::Result<()> {
646    // rename deployment path name to <deployment_name>_<collection_name>
647    let mut count = 0;
648    for entry in project_dir.read_dir()? {
649        let entry = entry?;
650        let path = entry.path();
651        if path.is_dir() {
652            let mut collection_dir = path;
653            let original_collection_name = collection_dir
654                .file_name()
655                .and_then(|name| name.to_str())
656                .ok_or_else(|| anyhow::anyhow!("Invalid collection directory name"))?;
657            let collection_name_lower = original_collection_name.to_lowercase();
658            if original_collection_name != collection_name_lower {
659                let mut new_collection_dir = collection_dir.clone();
660                new_collection_dir.set_file_name(&collection_name_lower);
661                if dry_run {
662                    println!(
663                        "Will rename collection {original_collection_name} to {collection_name_lower}"
664                    );
665                } else {
666                    println!(
667                        "Renaming collection {} to {}",
668                        collection_dir.display(),
669                        new_collection_dir.display()
670                    );
671                    fs::rename(&collection_dir, &new_collection_dir)?;
672                    collection_dir = new_collection_dir;
673                }
674            }
675            let collection_name = collection_dir
676                .file_name()
677                .and_then(|name| name.to_str())
678                .ok_or_else(|| anyhow::anyhow!("Invalid collection directory name"))?;
679            for deploy in collection_dir.read_dir()? {
680                let deploy_dir = deploy?.path();
681                if deploy_dir.is_file() {
682                    continue;
683                }
684                count += 1;
685                let deploy_name = deploy_dir
686                    .file_name()
687                    .and_then(|name| name.to_str())
688                    .ok_or_else(|| anyhow::anyhow!("Invalid deploy directory name"))?;
689                if !deploy_name.contains(collection_name) {
690                    if dry_run {
691                        println!(
692                            "Will rename {} to {}_{}",
693                            deploy_name,
694                            deploy_name.to_lowercase(),
695                            collection_name.to_lowercase()
696                        );
697                    } else {
698                        let mut deploy_id_dir = deploy_dir.clone();
699                        deploy_id_dir.set_file_name(format!(
700                            "{}_{}",
701                            deploy_name.to_lowercase(),
702                            collection_name.to_lowercase()
703                        ));
704                        println!(
705                            "Renaming {} to {}",
706                            deploy_dir.display(),
707                            deploy_id_dir.display()
708                        );
709                        fs::rename(deploy_dir, deploy_id_dir)?;
710                    }
711                }
712            }
713        }
714    }
715    println!("Total directories: {count}");
716    Ok(())
717}
718
719// copy xmp files to output_dir and keep the directory structure
720pub fn copy_xmp(source_dir: PathBuf, output_dir: PathBuf) -> anyhow::Result<()> {
721    let xmp_paths = path_enumerate(source_dir.clone(), ResourceType::Xmp);
722    let num_xmp = xmp_paths.len();
723    println!("{num_xmp} xmp files found");
724    let pb = indicatif::ProgressBar::new(num_xmp as u64);
725    configure_progress_bar(&pb);
726
727    for xmp in xmp_paths {
728        let mut output_path = output_dir.clone();
729        let relative_path = xmp.strip_prefix(&source_dir).unwrap();
730        output_path.push(relative_path);
731        fs::create_dir_all(output_path.parent().unwrap())?;
732        fs::copy(xmp, output_path)?;
733        pb.inc(1);
734    }
735    pb.finish();
736    Ok(())
737}
738
739// Sync XMP metadata to corresponding media files
740pub fn sync_xmp_to_media(xmp_path: &Path) -> anyhow::Result<()> {
741    let media_path_str = match xmp_path.to_str() {
742        Some(path_str) => path_str.trim_end_matches(".xmp"),
743        None => {
744            eprintln!(
745                "Warning: Skipping XMP file with non-UTF-8 path: {}",
746                xmp_path.display()
747            );
748            return Ok(());
749        }
750    };
751    let media_path = Path::new(media_path_str);
752
753    if !media_path.exists() {
754        eprintln!(
755            "Warning: Skipping,'{}' does not exist.",
756            media_path.display()
757        );
758        return Ok(());
759    }
760
761    let xmp_content = fs::read_to_string(xmp_path)?;
762    let xmp_meta = XmpMeta::from_str(&xmp_content)?;
763
764    let mut xmp_file = XmpFile::new()?;
765    let open_options = OpenFileOptions::default().for_update();
766    xmp_file.open_file(media_path, open_options)?;
767    xmp_file.put_xmp(&xmp_meta)?;
768    xmp_file.try_close()?;
769
770    Ok(())
771}
772
773pub fn sync_xmp_directory(source_dir: PathBuf) -> anyhow::Result<()> {
774    let xmp_paths = path_enumerate(source_dir.clone(), ResourceType::Xmp);
775    let num_xmp = xmp_paths.len();
776
777    if num_xmp == 0 {
778        println!("No XMP files found in {}", source_dir.display());
779        return Ok(());
780    }
781
782    println!(
783        "Found {} XMP files to sync in {}",
784        num_xmp,
785        source_dir.display()
786    );
787
788    let pb = indicatif::ProgressBar::new(num_xmp as u64);
789    configure_progress_bar(&pb);
790    pb.set_message("Syncing XMP metadata to media files...");
791
792    let results: Vec<anyhow::Result<()>> = xmp_paths
793        .par_iter()
794        .map(|xmp_path| {
795            let result = sync_xmp_to_media(xmp_path);
796            pb.inc(1);
797            result
798        })
799        .collect();
800
801    pb.finish();
802
803    let (successes, failures): (Vec<_>, Vec<_>) = results.into_iter().partition(Result::is_ok);
804
805    let num_synced = successes.len();
806    let num_skipped = failures.len();
807
808    for result in failures {
809        if let Err(e) = result {
810            eprintln!("Failed to sync: {e}");
811        }
812    }
813
814    println!("Successfully synced {num_synced} XMP files, skipped {num_skipped} files");
815
816    Ok(())
817}
818
819pub fn sync_xmp_from_csv(csv_path: PathBuf) -> anyhow::Result<()> {
820    let df = CsvReadOptions::default()
821        .with_has_header(true)
822        .with_ignore_errors(false)
823        .try_into_reader_with_file_path(Some(csv_path))?
824        .finish()?;
825
826    let df_filtered = df
827        .lazy()
828        .filter(col("path").is_not_null())
829        .filter(col("path").str().ends_with(lit(".xmp")))
830        .unique(
831            Some(cols(vec!["path".to_string()])),
832            UniqueKeepStrategy::First,
833        )
834        .collect()?;
835
836    let num_files = df_filtered.height();
837    if num_files == 0 {
838        println!("No XMP files found in CSV");
839        return Ok(());
840    }
841
842    println!("Found {num_files} XMP files in CSV to sync");
843
844    let pb = indicatif::ProgressBar::new(num_files as u64);
845    configure_progress_bar(&pb);
846    pb.set_message("Syncing XMP files in CSV...");
847
848    let path_col = df_filtered.column("path")?.str()?;
849
850    let results: Vec<anyhow::Result<()>> = path_col
851        .par_iter()
852        .filter_map(|path| path.map(PathBuf::from))
853        .map(|xmp_path| {
854            let result = sync_xmp_to_media(&xmp_path);
855            pb.inc(1);
856            result
857        })
858        .collect();
859
860    pb.finish();
861
862    let (successes, failures): (Vec<_>, Vec<_>) = results.into_iter().partition(Result::is_ok);
863
864    let num_synced = successes.len();
865    let num_skipped = failures.len();
866
867    for result in failures {
868        if let Err(e) = result {
869            eprintln!("Failed to sync: {e}");
870        }
871    }
872
873    println!("Successfully synced {num_synced} XMP files, skipped {num_skipped} files");
874
875    Ok(())
876}
877
878// Remove all XMP files recursively from a directory
879pub fn remove_xmp_files(source_dir: PathBuf) -> anyhow::Result<()> {
880    let xmp_paths = path_enumerate(source_dir.clone(), ResourceType::Xmp);
881    let num_xmp = xmp_paths.len();
882
883    if num_xmp == 0 {
884        println!("No XMP files found in {}", source_dir.display());
885        return Ok(());
886    }
887
888    println!("Found {} XMP files in {}", num_xmp, source_dir.display());
889
890    let pb = indicatif::ProgressBar::new(num_xmp as u64);
891    configure_progress_bar(&pb);
892    pb.set_message("Removing XMP files...");
893
894    let results: Vec<anyhow::Result<()>> = xmp_paths
895        .par_iter()
896        .map(|xmp_path| {
897            let result = fs::remove_file(xmp_path);
898            pb.inc(1);
899            result.map_err(|e| anyhow::anyhow!("Failed to remove {}: {}", xmp_path.display(), e))
900        })
901        .collect();
902
903    pb.finish();
904
905    let (successes, failures): (Vec<_>, Vec<_>) = results.into_iter().partition(Result::is_ok);
906
907    let num_removed = successes.len();
908    let num_failed = failures.len();
909
910    for result in failures {
911        if let Err(e) = result {
912            eprintln!("{e}");
913        }
914    }
915
916    println!("Successfully removed {num_removed} XMP files, failed to remove {num_failed} files");
917    Ok(())
918}
919
920pub fn is_temporal_independent(
921    time_ref: String,
922    time: String,
923    min_delta_time: i32,
924) -> anyhow::Result<bool> {
925    // TODO Timezone
926    let dt_ref = NaiveDateTime::parse_from_str(time_ref.as_str(), "%Y-%m-%d %H:%M:%S")
927        .map_err(|e| anyhow::anyhow!("Failed to parse reference datetime '{time_ref}': {e}"))?;
928    let dt = NaiveDateTime::parse_from_str(time.as_str(), "%Y-%m-%d %H:%M:%S")
929        .map_err(|e| anyhow::anyhow!("Failed to parse datetime '{time}': {e}"))?;
930    let diff = dt - dt_ref;
931
932    Ok(diff
933        >= chrono::Duration::try_minutes(min_delta_time.into())
934            .ok_or_else(|| anyhow::anyhow!("Invalid minute value: {min_delta_time}"))?)
935}
936
937pub fn get_path_levels(path: String) -> Vec<String> {
938    // Abandoned for performance
939    // let normalized_path = PathBuf::from(path.replace('\\', "/"));
940    // let levels: Vec<String> = normalized_path
941    //     .components()
942    //     .filter_map(|comp| match comp {
943    //         Component::Normal(part) => Some(part.to_string_lossy().into_owned()),
944    //         Component::Prefix(prefix) => Some(prefix.as_os_str().to_string_lossy().into_owned()), // For windows path prefixes
945    //         _ => None, // Skip root and other components
946    //     })
947    //     .collect();
948
949    let normalized_path = path.replace('\\', "/");
950    let levels: Vec<String> = normalized_path
951        .split('/')
952        .map(|comp| comp.to_string())
953        .collect();
954    levels[1..levels.len() - 1].to_vec()
955}
956
957pub fn ignore_timezone(time: String) -> anyhow::Result<String> {
958    let time_remove_designator = time.replace('Z', "");
959    let time_ignore_zone = time_remove_designator.split('+').collect::<Vec<&str>>()[0];
960    Ok(time_ignore_zone.to_string())
961}
962
963pub fn sync_modified_time(source: PathBuf, target: PathBuf) -> anyhow::Result<()> {
964    let src = fs::metadata(source)?;
965    let dest = File::options().write(true).open(target)?;
966    let times = FileTimes::new()
967        .set_accessed(src.accessed()?)
968        .set_modified(src.modified()?);
969    dest.set_times(times)?;
970    Ok(())
971}
972
973pub fn tags_csv_translate(
974    source_csv: PathBuf,
975    taglist_csv: PathBuf,
976    output_dir: PathBuf,
977    from: &str,
978    to: &str,
979) -> anyhow::Result<()> {
980    let source_df = CsvReadOptions::default()
981        .try_into_reader_with_file_path(Some(source_csv.clone()))?
982        .finish()?;
983    let taglist_df = CsvReadOptions::default()
984        .try_into_reader_with_file_path(Some(taglist_csv))?
985        .finish()?;
986
987    let joined = source_df.clone().lazy().join(
988        taglist_df.clone().lazy(),
989        [col(TagType::Species.col_name())],
990        [col(from)],
991        JoinArgs::new(JoinType::Left),
992    );
993
994    let unknown = joined
995        .clone()
996        .filter(
997            col(to)
998                .is_null()
999                .and(col(TagType::Species.col_name()).is_not_null())
1000                .and(col(TagType::Species.col_name()).neq(lit(""))),
1001        )
1002        .select([col(TagType::Species.col_name())])
1003        .unique(None, UniqueKeepStrategy::Any)
1004        .collect()?;
1005    if unknown.height() > 0 {
1006        let mut sample = Vec::new();
1007        if let Ok(col) = unknown.column(TagType::Species.col_name())
1008            && let Ok(ca) = col.str()
1009        {
1010            for v in ca.into_iter().flatten().take(20) {
1011                sample.push(v.to_string());
1012            }
1013        }
1014        return Err(anyhow::anyhow!(
1015            "Unknown tag(s) not found in taglist: {}",
1016            sample.join(", ")
1017        ));
1018    }
1019
1020    let mut result = joined
1021        .drop(cols([TagType::Species.col_name()]))
1022        .rename(vec![to], vec![TagType::Species.col_name()], true)
1023        // .with_column(col(to).alias("species"))
1024        .collect()?;
1025
1026    let output_csv = output_dir.join(format!(
1027        "{}_translated.csv",
1028        source_csv
1029            .file_stem()
1030            .and_then(|stem| stem.to_str())
1031            .unwrap_or("tags")
1032    ));
1033    fs::create_dir_all(output_dir.clone())?;
1034    let mut file = std::fs::File::create(&output_csv)?;
1035    CsvWriter::new(&mut file)
1036        .include_bom(true)
1037        .finish(&mut result)?;
1038
1039    println!("Saved to {}", output_csv.display());
1040    Ok(())
1041}