Skip to main content

langcodec_cli/
merge.rs

1use crate::formats::parse_custom_format;
2use crate::transformers::custom_format_to_resource;
3
4use langcodec::{Codec, ReadOptions, converter};
5use rayon::prelude::*;
6
7/// Strategy for handling conflicts when merging localization files.
8#[derive(Debug, Clone, PartialEq, clap::ValueEnum)]
9pub enum ConflictStrategy {
10    /// Keep the first occurrence of a key
11    First,
12    /// Keep the last occurrence of a key (default)
13    Last,
14    /// Skip conflicting entries
15    Skip,
16}
17
18/// Run the merge command: merge multiple localization files into one output file.
19pub fn run_merge_command(
20    inputs: Vec<String>,
21    output: String,
22    strategy: ConflictStrategy,
23    lang: Option<String>,
24    source_language_override: Option<String>,
25    version_override: Option<String>,
26    strict: bool,
27) {
28    if inputs.is_empty() {
29        eprintln!("Error: At least one input file is required.");
30        std::process::exit(1);
31    }
32
33    // Read all input files concurrently into Codecs, then combine and merge
34    println!("Reading {} input files...", inputs.len());
35    let read_results: Vec<Result<Codec, String>> = inputs
36        .par_iter()
37        .map(|input| read_input_to_codec(input, lang.clone(), strict))
38        .collect();
39
40    let mut input_codecs: Vec<Codec> = Vec::with_capacity(read_results.len());
41    for (idx, res) in read_results.into_iter().enumerate() {
42        match res {
43            Ok(c) => input_codecs.push(c),
44            Err(e) => {
45                println!("❌ Error reading input file {}/{}", idx + 1, inputs.len());
46                eprintln!("{}", e);
47                std::process::exit(1);
48            }
49        }
50    }
51
52    // Combine all input codecs first, then merge by language
53    let mut codec = Codec::from_codecs(input_codecs);
54
55    // Skip validation for merge operations since we expect multiple resources with potentially duplicate languages
56
57    // Merge resources using the new lib crate method
58    println!("Merging resources...");
59    let conflict_strategy = match strategy {
60        ConflictStrategy::First => langcodec::types::ConflictStrategy::First,
61        ConflictStrategy::Last => langcodec::types::ConflictStrategy::Last,
62        ConflictStrategy::Skip => langcodec::types::ConflictStrategy::Skip,
63    };
64
65    let merge_count = codec.merge_resources(&conflict_strategy);
66    println!("Merged {} language groups", merge_count);
67
68    println!("Writing merged output...");
69    match converter::infer_format_from_path(output.clone()) {
70        Some(format) => {
71            println!("Converting resources to format: {:?}", format);
72            // Set source_language field in the resources to make sure xcstrings format would not throw an error
73            // First, try to get the source language from the first resource if it exists; otherwise, the first resource's language
74            // would be used as the source language. If the two checks fail, the default value "en" would be used.
75            let source_language = source_language_override
76                .filter(|s| !s.trim().is_empty())
77                .unwrap_or_else(|| {
78                    codec
79                        .resources
80                        .first()
81                        .and_then(|r| {
82                            r.metadata
83                                .custom
84                                .get("source_language")
85                                .cloned()
86                                .filter(|s| !s.trim().is_empty())
87                        })
88                        .unwrap_or_else(|| {
89                            codec
90                                .resources
91                                .first()
92                                .map(|r| r.metadata.language.clone())
93                                .unwrap_or("en".to_string())
94                        })
95                });
96
97            println!("Setting metadata.source_language to: {}", source_language);
98
99            // Set version field in the resources to make sure xcstrings format would not throw an error
100            let version = version_override.unwrap_or_else(|| {
101                codec
102                    .resources
103                    .first()
104                    .and_then(|r| r.metadata.custom.get("version").cloned())
105                    .unwrap_or_else(|| "1.0".to_string())
106            });
107
108            println!("Setting metadata.version to: {}", version);
109
110            codec.iter_mut().for_each(|r| {
111                r.metadata
112                    .custom
113                    .insert("source_language".to_string(), source_language.clone());
114                r.metadata
115                    .custom
116                    .insert("version".to_string(), version.clone());
117            });
118
119            if let Err(e) = converter::convert_resources_to_format(codec.resources, &output, format)
120            {
121                println!("❌ Error converting resources to format");
122                eprintln!("Error converting to {}: {}", output, e);
123                std::process::exit(1);
124            }
125        }
126        None => {
127            if codec.resources.len() == 1 {
128                println!("Writing single resource to output file");
129                if let Some(resource) = codec.resources.first()
130                    && let Err(e) = Codec::write_resource_to_file(resource, &output)
131                {
132                    println!("❌ Error writing output file");
133                    eprintln!("Error writing to {}: {}", output, e);
134                    std::process::exit(1);
135                }
136            } else {
137                println!("❌ Error writing output file");
138                eprintln!("Error writing to {}: multiple resources", output);
139                std::process::exit(1);
140            }
141        }
142    }
143
144    println!(
145        "✅ Successfully merged {} files into {}",
146        inputs.len(),
147        output
148    );
149}
150
151/// Read a single input file into a vector of Resources, supporting both standard and custom formats
152fn read_input_to_resources(
153    input: &str,
154    lang: Option<String>,
155    strict: bool,
156) -> Result<Vec<langcodec::Resource>, String> {
157    if strict {
158        if input.ends_with(".json") || input.ends_with(".yaml") || input.ends_with(".yml") {
159            crate::validation::validate_custom_format_file(input)
160                .map_err(|e| format!("Failed to validate {}: {}", input, e))?;
161
162            let file_content = std::fs::read_to_string(input)
163                .map_err(|e| format!("Error reading file {}: {}", input, e))?;
164
165            crate::formats::validate_custom_format_content(input, &file_content)
166                .map_err(|e| format!("Invalid custom format {}: {}", input, e))?;
167
168            let resources = custom_format_to_resource(
169                input.to_string(),
170                parse_custom_format("json-language-map")
171                    .map_err(|e| format!("Failed to parse custom format: {}", e))?,
172            )
173            .map_err(|e| format!("Failed to convert custom format {}: {}", input, e))?;
174
175            return Ok(resources);
176        }
177
178        let mut local_codec = Codec::new();
179        local_codec
180            .read_file_by_extension_with_options(
181                input,
182                &ReadOptions::new()
183                    .with_language_hint(lang)
184                    .with_strict(true),
185            )
186            .map_err(|e| format!("Error reading {}: {}", input, e))?;
187        return Ok(local_codec.resources);
188    }
189
190    // Try standard format via lib crate (uses extension + language inference)
191    {
192        let mut local_codec = Codec::new();
193        if let Ok(()) = local_codec.read_file_by_extension(input, lang.clone()) {
194            return Ok(local_codec.resources);
195        }
196    }
197
198    // Try custom JSON/YAML formats (for merge, we follow the existing JSON-language-map behavior)
199    if input.ends_with(".json") || input.ends_with(".yaml") || input.ends_with(".yml") {
200        // Validate custom format file
201        crate::validation::validate_custom_format_file(input)
202            .map_err(|e| format!("Failed to validate {}: {}", input, e))?;
203
204        // Auto-detect format based on file content
205        let file_content = std::fs::read_to_string(input)
206            .map_err(|e| format!("Error reading file {}: {}", input, e))?;
207
208        // Validate file content (ignore returned format; keep parity with existing merge behavior)
209        crate::formats::validate_custom_format_content(input, &file_content)
210            .map_err(|e| format!("Invalid custom format {}: {}", input, e))?;
211
212        // Convert custom format to Resource using JSON language map to match current merge behavior
213        let resources = custom_format_to_resource(
214            input.to_string(),
215            parse_custom_format("json-language-map")
216                .map_err(|e| format!("Failed to parse custom format: {}", e))?,
217        )
218        .map_err(|e| format!("Failed to convert custom format {}: {}", input, e))?;
219
220        return Ok(resources);
221    }
222
223    Err(format!("Error reading {}: unsupported format", input))
224}
225
226/// Read a single input into a Codec (wrapper over read_input_to_resources)
227fn read_input_to_codec(input: &str, lang: Option<String>, strict: bool) -> Result<Codec, String> {
228    let resources = read_input_to_resources(input, lang, strict)?;
229    Ok(Codec { resources })
230}