langcodec_cli/
merge.rs

1use crate::formats::parse_custom_format;
2use crate::transformers::custom_format_to_resource;
3
4use langcodec::{Codec, converter};
5use rayon::prelude::*;
6
7/// Strategy for handling conflicts when merging localization files.
8#[derive(Debug, Clone, PartialEq, clap::ValueEnum)]
9pub enum ConflictStrategy {
10    /// Keep the first occurrence of a key
11    First,
12    /// Keep the last occurrence of a key (default)
13    Last,
14    /// Skip conflicting entries
15    Skip,
16}
17
18/// Run the merge command: merge multiple localization files into one output file.
19pub fn run_merge_command(
20    inputs: Vec<String>,
21    output: String,
22    strategy: ConflictStrategy,
23    lang: Option<String>,
24    source_language_override: Option<String>,
25    version_override: Option<String>,
26) {
27    if inputs.is_empty() {
28        eprintln!("Error: At least one input file is required.");
29        std::process::exit(1);
30    }
31
32    // Read all input files concurrently into Codecs, then combine and merge
33    println!("Reading {} input files...", inputs.len());
34    let read_results: Vec<Result<Codec, String>> = inputs
35        .par_iter()
36        .map(|input| read_input_to_codec(input, lang.clone()))
37        .collect();
38
39    let mut input_codecs: Vec<Codec> = Vec::with_capacity(read_results.len());
40    for (idx, res) in read_results.into_iter().enumerate() {
41        match res {
42            Ok(c) => input_codecs.push(c),
43            Err(e) => {
44                println!("❌ Error reading input file {}/{}", idx + 1, inputs.len());
45                eprintln!("{}", e);
46                std::process::exit(1);
47            }
48        }
49    }
50
51    // Combine all input codecs first, then merge by language
52    let mut codec = Codec::from_codecs(input_codecs);
53
54    // Skip validation for merge operations since we expect multiple resources with potentially duplicate languages
55
56    // Merge resources using the new lib crate method
57    println!("Merging resources...");
58    let conflict_strategy = match strategy {
59        ConflictStrategy::First => langcodec::types::ConflictStrategy::First,
60        ConflictStrategy::Last => langcodec::types::ConflictStrategy::Last,
61        ConflictStrategy::Skip => langcodec::types::ConflictStrategy::Skip,
62    };
63
64    let merge_count = codec.merge_resources(&conflict_strategy);
65    println!("Merged {} language groups", merge_count);
66
67    println!("Writing merged output...");
68    match converter::infer_format_from_path(output.clone()) {
69        Some(format) => {
70            println!("Converting resources to format: {:?}", format);
71            // Set source_language field in the resources to make sure xcstrings format would not throw an error
72            // First, try to get the source language from the first resource if it exists; otherwise, the first resource's language
73            // would be used as the source language. If the two checks fail, the default value "en" would be used.
74            let source_language = source_language_override
75                .filter(|s| !s.trim().is_empty())
76                .unwrap_or_else(|| {
77                    codec
78                        .resources
79                        .first()
80                        .and_then(|r| {
81                            r.metadata
82                                .custom
83                                .get("source_language")
84                                .cloned()
85                                .filter(|s| !s.trim().is_empty())
86                        })
87                        .unwrap_or_else(|| {
88                            codec
89                                .resources
90                                .first()
91                                .map(|r| r.metadata.language.clone())
92                                .unwrap_or("en".to_string())
93                        })
94                });
95
96            println!("Setting metadata.source_language to: {}", source_language);
97
98            // Set version field in the resources to make sure xcstrings format would not throw an error
99            let version = version_override.unwrap_or_else(|| {
100                codec
101                    .resources
102                    .first()
103                    .and_then(|r| r.metadata.custom.get("version").cloned())
104                    .unwrap_or_else(|| "1.0".to_string())
105            });
106
107            println!("Setting metadata.version to: {}", version);
108
109            codec.iter_mut().for_each(|r| {
110                r.metadata
111                    .custom
112                    .insert("source_language".to_string(), source_language.clone());
113                r.metadata
114                    .custom
115                    .insert("version".to_string(), version.clone());
116            });
117
118            if let Err(e) = converter::convert_resources_to_format(codec.resources, &output, format)
119            {
120                println!("❌ Error converting resources to format");
121                eprintln!("Error converting to {}: {}", output, e);
122                std::process::exit(1);
123            }
124        }
125        None => {
126            if codec.resources.len() == 1 {
127                println!("Writing single resource to output file");
128                if let Some(resource) = codec.resources.first()
129                    && let Err(e) = Codec::write_resource_to_file(resource, &output)
130                {
131                    println!("❌ Error writing output file");
132                    eprintln!("Error writing to {}: {}", output, e);
133                    std::process::exit(1);
134                }
135            } else {
136                println!("❌ Error writing output file");
137                eprintln!("Error writing to {}: multiple resources", output);
138                std::process::exit(1);
139            }
140        }
141    }
142
143    println!(
144        "✅ Successfully merged {} files into {}",
145        inputs.len(),
146        output
147    );
148}
149
150/// Read a single input file into a vector of Resources, supporting both standard and custom formats
151fn read_input_to_resources(
152    input: &str,
153    lang: Option<String>,
154) -> Result<Vec<langcodec::Resource>, String> {
155    // Try standard format via lib crate (uses extension + language inference)
156    {
157        let mut local_codec = Codec::new();
158        if let Ok(()) = local_codec.read_file_by_extension(input, lang.clone()) {
159            return Ok(local_codec.resources);
160        }
161    }
162
163    // Try custom JSON/YAML formats (for merge, we follow the existing JSON-language-map behavior)
164    if input.ends_with(".json") || input.ends_with(".yaml") || input.ends_with(".yml") {
165        // Validate custom format file
166        crate::validation::validate_custom_format_file(input)
167            .map_err(|e| format!("Failed to validate {}: {}", input, e))?;
168
169        // Auto-detect format based on file content
170        let file_content = std::fs::read_to_string(input)
171            .map_err(|e| format!("Error reading file {}: {}", input, e))?;
172
173        // Validate file content (ignore returned format; keep parity with existing merge behavior)
174        crate::formats::validate_custom_format_content(input, &file_content)
175            .map_err(|e| format!("Invalid custom format {}: {}", input, e))?;
176
177        // Convert custom format to Resource using JSON language map to match current merge behavior
178        let resources = custom_format_to_resource(
179            input.to_string(),
180            parse_custom_format("json-language-map")
181                .map_err(|e| format!("Failed to parse custom format: {}", e))?,
182        )
183        .map_err(|e| format!("Failed to convert custom format {}: {}", input, e))?;
184
185        return Ok(resources);
186    }
187
188    Err(format!("Error reading {}: unsupported format", input))
189}
190
191/// Read a single input into a Codec (wrapper over read_input_to_resources)
192fn read_input_to_codec(input: &str, lang: Option<String>) -> Result<Codec, String> {
193    let resources = read_input_to_resources(input, lang)?;
194    Ok(Codec { resources })
195}