serval/
utils.rs

1use chrono::NaiveDateTime;
2use core::fmt;
3use polars::prelude::*;
4use std::collections::HashSet;
5use std::ffi::{OsStr, OsString};
6use std::fs::{File, FileTimes};
7use std::io;
8use std::{
9    env, fs,
10    path::{Path, PathBuf},
11};
12use walkdir::{DirEntry, WalkDir};
13
14#[derive(clap::ValueEnum, Clone, Copy, Debug)]
15pub enum ResourceType {
16    Xmp,
17    Image,
18    Video,
19    Media, // Image or Video
20    All,   // All resources (for serval align)
21}
22
23impl fmt::Display for ResourceType {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        write!(f, "{:?}", self)
26    }
27}
28
29impl ResourceType {
30    fn extension(self) -> Vec<&'static str> {
31        match self {
32            ResourceType::Image => vec!["jpg", "jpeg", "png"],
33            ResourceType::Video => vec!["avi", "mp4", "mov"],
34            ResourceType::Xmp => vec!["xmp"],
35            ResourceType::Media => vec!["jpg", "jpeg", "png", "avi", "mp4", "mov"],
36            ResourceType::All => vec!["jpg", "jpeg", "png", "avi", "mp4", "mov", "xmp"],
37        }
38    }
39
40    fn is_resource(self, path: &Path) -> bool {
41        match path.extension() {
42            None => false,
43            Some(x) => self
44                .extension()
45                .contains(&x.to_str().unwrap().to_lowercase().as_str()),
46        }
47    }
48}
49
50#[derive(clap::ValueEnum, PartialEq, Clone, Copy, Debug)]
51pub enum TagType {
52    Species,
53    Individual,
54    Count,
55    Sex,
56    Bodypart,
57}
58
59impl fmt::Display for TagType {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        write!(f, "{:?}", self)
62    }
63}
64
65impl TagType {
66    pub fn col_name(self) -> &'static str {
67        match self {
68            TagType::Individual => "individual",
69            TagType::Species => "species",
70            TagType::Count => "count",
71            TagType::Sex => "sex",
72            TagType::Bodypart => "bodypart",
73        }
74    }
75    pub fn digikam_tag_prefix(self) -> &'static str {
76        match self {
77            TagType::Individual => "Individual/",
78            TagType::Species => "Species/",
79            TagType::Count => "Count/",
80            TagType::Sex => "Sex/",
81            TagType::Bodypart => "Bodypart/",
82        }
83    }
84    pub fn adobe_tag_prefix(self) -> &'static str {
85        match self {
86            TagType::Individual => "Individual|",
87            TagType::Species => "Species|",
88            TagType::Count => "Count|",
89            TagType::Sex => "Sex|",
90            TagType::Bodypart => "Bodypart|",
91        }
92    }
93}
94
95#[derive(clap::ValueEnum, Clone, Copy, Debug)]
96pub enum ExtractFilterType {
97    Species,
98    Path,
99    Individual,
100    Rating,
101    Custom,
102}
103
104fn is_ignored(entry: &DirEntry) -> bool {
105    entry
106        .file_name()
107        .to_str()
108        .map(|s| s.starts_with('.') || s.contains("精选")) // ignore 精选 and .dtrash
109        .unwrap_or(false)
110}
111
112// workaround for https://github.com/rust-lang/rust/issues/42869
113// ref. https://github.com/sharkdp/fd/pull/72/files
114fn path_to_absolute(path: PathBuf) -> io::Result<PathBuf> {
115    if path.is_absolute() {
116        return Ok(path);
117    }
118    let path = path.strip_prefix(".").unwrap_or(&path);
119    env::current_dir().map(|current_dir| current_dir.join(path))
120}
121
122pub fn absolute_path(path: PathBuf) -> io::Result<PathBuf> {
123    let path_buf = path_to_absolute(path)?;
124    #[cfg(windows)]
125    let path_buf = Path::new(
126        path_buf
127            .as_path()
128            .to_string_lossy()
129            .trim_start_matches(r"\\?\"),
130    )
131    .to_path_buf();
132    Ok(path_buf)
133}
134
135pub fn path_enumerate(root_dir: PathBuf, resource_type: ResourceType) -> Vec<PathBuf> {
136    let mut paths: Vec<PathBuf> = vec![];
137    for entry in WalkDir::new(root_dir)
138        .into_iter()
139        .filter_entry(|e| !is_ignored(e))
140        .filter_map(Result::ok)
141        .filter(|e| resource_type.is_resource(e.path()))
142    {
143        paths.push(entry.into_path());
144    }
145    paths
146}
147
148pub fn resources_align(
149    deploy_dir: PathBuf,
150    working_dir: PathBuf,
151    resource_type: ResourceType,
152    dry_run: bool,
153    move_mode: bool,
154) -> anyhow::Result<()> {
155    let deploy_id = deploy_dir.file_name().unwrap();
156    let deploy_path = deploy_dir.to_str();
157
158    let output_dir = working_dir.join(deploy_id);
159    fs::create_dir_all(output_dir.clone())?;
160
161    let resource_paths = path_enumerate(deploy_dir.clone(), resource_type);
162    let num_resource = resource_paths.len();
163    println!(
164        "{} {}(s) found in {}",
165        num_resource,
166        resource_type,
167        deploy_dir.to_str().unwrap()
168    );
169
170    let mut visited_path: HashSet<String> = HashSet::new();
171    for resource in resource_paths {
172        let mut output_path = PathBuf::new();
173        let resource_parent = resource.parent().unwrap();
174        let mut parent_names: Vec<OsString> = Vec::new();
175
176        let mut resource_name = deploy_id.to_os_string();
177        let mut current_parent = resource.parent();
178        while let Some(parent) = current_parent {
179            if parent.to_str() == deploy_path {
180                break;
181            }
182            parent_names.push(parent.file_name().unwrap().to_os_string());
183            current_parent = parent.parent();
184        }
185
186        parent_names.reverse();
187        for parent_name in parent_names {
188            resource_name.push("-");
189            resource_name.push(&parent_name);
190        }
191        resource_name.push("-");
192        resource_name.push(resource.file_name().unwrap());
193
194        output_path.push(output_dir.join(resource_name));
195
196        if !dry_run {
197            let pb = indicatif::ProgressBar::new(num_resource as u64);
198
199            if move_mode {
200                fs::rename(resource, output_path)?;
201                pb.inc(1);
202            } else {
203                fs::copy(resource, output_path)?;
204                pb.inc(1);
205            }
206        } else if !visited_path.contains(resource_parent.to_str().unwrap()) {
207            visited_path.insert(resource_parent.to_str().unwrap().to_string());
208            println!(
209                "DRYRUN sample: From {} to {}",
210                resource.display(),
211                output_path.display()
212            );
213        }
214    }
215    Ok(())
216}
217
218pub fn deployments_align(
219    project_dir: PathBuf,
220    output_dir: PathBuf,
221    deploy_table: PathBuf,
222    resource_type: ResourceType,
223    dry_run: bool,
224    move_mode: bool,
225) -> anyhow::Result<()> {
226    let deploy_df = CsvReadOptions::default()
227        .try_into_reader_with_file_path(Some(deploy_table))?
228        .finish()?;
229    let deploy_array = deploy_df["deploymentID"].str()?;
230
231    let deploy_iter = deploy_array.into_iter();
232    let num_iter = deploy_iter.len();
233    let pb = indicatif::ProgressBar::new(num_iter as u64);
234    for deploy_id in deploy_iter {
235        let (_, collection_name) = deploy_id.unwrap().rsplit_once('_').unwrap();
236        let deploy_dir = project_dir.join(collection_name).join(deploy_id.unwrap());
237        let collection_output_dir = output_dir.join(collection_name);
238        resources_align(
239            deploy_dir,
240            collection_output_dir.clone(),
241            resource_type,
242            dry_run,
243            move_mode,
244        )?;
245        pb.inc(1);
246    }
247    Ok(())
248}
249
250pub fn deployments_rename(project_dir: PathBuf, dry_run: bool) -> anyhow::Result<()> {
251    // rename deployment path name to <deployment_name>_<collection_name>
252    let mut count = 0;
253    for entry in project_dir.read_dir()? {
254        let entry = entry?;
255        let path = entry.path();
256        if path.is_dir() {
257            let collection = path;
258            for deploy in collection.read_dir()? {
259                let deploy_dir = deploy.unwrap().path();
260                if deploy_dir.is_file() {
261                    continue;
262                }
263                count += 1;
264                let collection_name = deploy_dir
265                    .parent()
266                    .unwrap()
267                    .file_name()
268                    .unwrap()
269                    .to_str()
270                    .unwrap();
271                let deploy_name = deploy_dir.file_name().unwrap().to_str().unwrap();
272                if !deploy_name.contains(collection_name) {
273                    if dry_run {
274                        println!(
275                            "Will rename {} to {}_{}",
276                            deploy_name, deploy_name, collection_name
277                        );
278                    } else {
279                        let mut deploy_id_dir = deploy_dir.clone();
280                        deploy_id_dir.set_file_name(format!("{}_{}", deploy_name, collection_name));
281                        fs::rename(deploy_dir, deploy_id_dir)?;
282                    }
283                }
284            }
285        }
286    }
287    println!("Total directories: {}", count);
288    Ok(())
289}
290
291// copy xmp files to output_dir and keep the directory structure
292pub fn copy_xmp(source_dir: PathBuf, output_dir: PathBuf) -> anyhow::Result<()> {
293    let xmp_paths = path_enumerate(source_dir.clone(), ResourceType::Xmp);
294    let num_xmp = xmp_paths.len();
295    println!("{} xmp files found", num_xmp);
296    let pb = indicatif::ProgressBar::new(num_xmp as u64);
297
298    for xmp in xmp_paths {
299        let mut output_path = output_dir.clone();
300        let relative_path = xmp.strip_prefix(&source_dir).unwrap();
301        output_path.push(relative_path);
302        fs::create_dir_all(output_path.parent().unwrap())?;
303        fs::copy(xmp, output_path)?;
304        pb.inc(1);
305    }
306    pb.finish();
307    Ok(())
308}
309
310pub fn is_temporal_independent(
311    time_ref: String,
312    time: String,
313    min_delta_time: i32,
314) -> anyhow::Result<bool> {
315    // TODO Timezone
316    let dt_ref = NaiveDateTime::parse_from_str(time_ref.as_str(), "%Y-%m-%d %H:%M:%S").unwrap();
317    let dt = NaiveDateTime::parse_from_str(time.as_str(), "%Y-%m-%d %H:%M:%S").unwrap();
318    let diff = dt - dt_ref;
319
320    Ok(diff >= chrono::Duration::try_minutes(min_delta_time.into()).unwrap())
321}
322
323pub fn get_path_levels(path: String) -> Vec<String> {
324    // Abandoned for performance
325    // let normalized_path = PathBuf::from(path.replace('\\', "/"));
326    // let levels: Vec<String> = normalized_path
327    //     .components()
328    //     .filter_map(|comp| match comp {
329    //         Component::Normal(part) => Some(part.to_string_lossy().into_owned()),
330    //         Component::Prefix(prefix) => Some(prefix.as_os_str().to_string_lossy().into_owned()), // For windows path prefixes
331    //         _ => None, // Skip root and other components
332    //     })
333    //     .collect();
334
335    let normalized_path = path.replace('\\', "/");
336    let levels: Vec<String> = normalized_path
337        .split('/')
338        .map(|comp| comp.to_string())
339        .collect();
340    levels[1..levels.len() - 1].to_vec()
341}
342
343pub fn ignore_timezone(time: String) -> anyhow::Result<String> {
344    let time_remove_designator = time.replace('Z', "");
345    let time_ignore_zone = time_remove_designator.split('+').collect::<Vec<&str>>()[0];
346    Ok(time_ignore_zone.to_string())
347}
348
349pub fn append_ext(ext: impl AsRef<OsStr>, path: PathBuf) -> anyhow::Result<PathBuf> {
350    let mut os_string: OsString = path.into();
351    os_string.push(".");
352    os_string.push(ext.as_ref());
353    Ok(os_string.into())
354}
355
356pub fn sync_modified_time(source: PathBuf, target: PathBuf) -> anyhow::Result<()> {
357    let src = fs::metadata(source)?;
358    let dest = File::options().write(true).open(target)?;
359    let times = FileTimes::new()
360        .set_accessed(src.accessed()?)
361        .set_modified(src.modified()?);
362    dest.set_times(times)?;
363    Ok(())
364}
365
366pub fn tags_csv_translate(
367    source_csv: PathBuf,
368    taglist_csv: PathBuf,
369    output_dir: PathBuf,
370    from: &str,
371    to: &str,
372) -> anyhow::Result<()> {
373    let source_df = CsvReadOptions::default()
374        .try_into_reader_with_file_path(Some(source_csv.clone()))?
375        .finish()?;
376    let taglist_df = CsvReadOptions::default()
377        .try_into_reader_with_file_path(Some(taglist_csv))?
378        .finish()?;
379
380    let mut result = source_df
381        .clone()
382        .lazy()
383        .join(
384            taglist_df.clone().lazy(),
385            [col("species")],
386            [col(from)],
387            JoinArgs::new(JoinType::Left),
388        )
389        .drop(vec!["species"])
390        .rename(vec![to], vec!["species"], true)
391        // .with_column(col(to).alias("species"))
392        .collect()?;
393
394    let output_csv = output_dir.join(format!(
395        "{}_translated.csv",
396        source_csv.file_stem().unwrap().to_str().unwrap()
397    ));
398    fs::create_dir_all(output_dir.clone())?;
399    let mut file = std::fs::File::create(&output_csv)?;
400    CsvWriter::new(&mut file)
401        .include_bom(true)
402        .finish(&mut result)?;
403
404    println!("Saved to {}", output_csv.display());
405    Ok(())
406}