lindera_assets/
lib.rs

1use std::error::Error;
2use std::path::Path;
3
4use lindera_core::dictionary_builder::DictionaryBuilder;
5
6pub struct FetchParams {
7    /// Dictionary file name
8    pub file_name: &'static str,
9    /// MeCab directory
10    pub input_dir: &'static str,
11    /// Lindera directory
12    pub output_dir: &'static str,
13
14    /// Dummy input for docs.rs
15    pub dummy_input: &'static str,
16
17    /// URL from which to fetch the asset
18    pub download_url: &'static str,
19}
20
21#[cfg(not(target_os = "windows"))]
22fn empty_directory(dir: &Path) -> Result<(), Box<dyn Error>> {
23    if dir.is_dir() {
24        for entry in std::fs::read_dir(dir)? {
25            let entry = entry?;
26            let path = entry.path();
27            if path.is_dir() {
28                std::fs::remove_dir_all(&path)?;
29            } else {
30                std::fs::remove_file(&path)?;
31            }
32        }
33    }
34    Ok(())
35}
36
37#[cfg(target_os = "windows")]
38fn copy_dir_all(src: &Path, dst: &Path) -> Result<(), Box<dyn Error>> {
39    if !dst.exists() {
40        std::fs::create_dir(dst)?;
41    }
42
43    for entry in std::fs::read_dir(src)? {
44        let entry = entry?;
45        let entry_path = entry.path();
46        let dst_path = dst.join(entry.file_name());
47
48        if entry_path.is_dir() {
49            copy_dir_all(&entry_path, &dst_path)?;
50        } else {
51            std::fs::copy(&entry_path, &dst_path)?;
52        }
53    }
54    Ok(())
55}
56
57/// Fetch the necessary assets and then build the dictionary using `builder`
58pub fn fetch(params: FetchParams, builder: impl DictionaryBuilder) -> Result<(), Box<dyn Error>> {
59    use std::env;
60    use std::fs::{create_dir, rename, File};
61    use std::io::{self, Cursor, Read, Write};
62    use std::path::{Path, PathBuf};
63
64    use encoding::all::UTF_8;
65    use encoding::{EncoderTrap, Encoding};
66    use flate2::read::GzDecoder;
67    use tar::Archive;
68
69    println!("cargo:rerun-if-changed=build.rs");
70    println!("cargo:rerun-if-changed=Cargo.toml");
71
72    // Directory path for build package
73    // if the `LINDERA_CACHE` variable is defined, behaves like a cache, where data is invalidated only:
74    // - on new lindera-assets version
75    // - if the LINDERA_CACHE dir changed
76    // otherwise, keeps behavior of always redownloading and rebuilding
77    let (build_dir, is_cache) = if let Some(lindera_cache_dir) = env::var_os("LINDERA_CACHE") {
78        (
79            PathBuf::from(lindera_cache_dir).join(env::var_os("CARGO_PKG_VERSION").unwrap()),
80            true,
81        )
82    } else {
83        (
84            PathBuf::from(env::var_os("OUT_DIR").unwrap()), /* ex) target/debug/build/<pkg>/out */
85            false,
86        )
87    };
88
89    // environment variable passed to dependents, that will actually be used to include the dictionary in the library
90    println!("cargo::rustc-env=LINDERA_WORKDIR={}", build_dir.display());
91
92    std::fs::create_dir_all(&build_dir)?;
93
94    let input_dir = build_dir.join(params.input_dir);
95
96    let output_dir = build_dir.join(params.output_dir);
97
98    // Fast path where the data is already in cache
99    if is_cache && output_dir.is_dir() {
100        return Ok(());
101    }
102
103    if std::env::var("DOCS_RS").is_ok() {
104        // Create directory for dummy input directory for build docs
105        create_dir(&input_dir)?;
106
107        // Create dummy char.def
108        let mut dummy_char_def = File::create(input_dir.join("char.def"))?;
109        dummy_char_def.write_all(b"DEFAULT 0 1 0\n")?;
110
111        // Create dummy CSV file
112        let mut dummy_dict_csv = File::create(input_dir.join("dummy_dict.csv"))?;
113        dummy_dict_csv.write_all(
114            &UTF_8
115                .encode(params.dummy_input, EncoderTrap::Ignore)
116                .unwrap(),
117        )?;
118
119        // Create dummy unk.def
120        File::create(input_dir.join("unk.def"))?;
121        let mut dummy_matrix_def = File::create(input_dir.join("matrix.def"))?;
122        dummy_matrix_def.write_all(b"0 1 0\n")?;
123    } else {
124        // Source file path for build package
125        let source_path_for_build = &build_dir.join(params.file_name);
126
127        // Download source file to build directory
128        // copy(&source_path, &source_path_for_build)?;
129        let tmp_path = Path::new(&build_dir).join(params.file_name.to_owned() + ".download");
130
131        // Download a tarball
132        let resp = ureq::get(params.download_url).call()?;
133        let mut dest = File::create(&tmp_path)?;
134
135        io::copy(&mut resp.into_reader(), &mut dest)?;
136        dest.flush()?;
137
138        rename(tmp_path, source_path_for_build).expect("Failed to rename temporary file");
139
140        // Decompress a tar.gz file
141        let tmp_extract_path =
142            Path::new(&build_dir).join(format!("tmp-archive-{}", params.input_dir));
143        let tmp_extracted_path = tmp_extract_path.join(params.input_dir);
144        let _ = std::fs::remove_dir_all(&tmp_extract_path);
145        std::fs::create_dir_all(&tmp_extract_path)?;
146
147        let mut tar_gz = File::open(source_path_for_build)?;
148        let mut buffer = Vec::new();
149        tar_gz.read_to_end(&mut buffer)?;
150        let cursor = Cursor::new(buffer);
151        let decoder = GzDecoder::new(cursor);
152        let mut archive = Archive::new(decoder);
153        archive.unpack(&tmp_extract_path)?;
154        rename(tmp_extracted_path, &input_dir).expect("Failed to rename archive directory");
155        let _ = std::fs::remove_dir_all(&tmp_extract_path);
156        drop(dest);
157        let _ = std::fs::remove_file(source_path_for_build);
158    }
159
160    let tmp_path = build_dir.join(format!("tmp-output-{}", params.output_dir));
161    let _ = std::fs::remove_dir_all(&tmp_path);
162
163    builder.build_dictionary(&input_dir, &tmp_path)?;
164
165    #[cfg(target_os = "windows")]
166    {
167        // Check if output_dir exists
168        if output_dir.exists() {
169            // Remove output_dir
170            std::fs::remove_dir_all(&output_dir).expect("Failed to remove output directory");
171
172            // Make output_dir
173            std::fs::create_dir_all(&output_dir).expect("Failed to create output directory");
174        }
175
176        // Copy tmp_path to output_dir
177        copy_dir_all(&tmp_path, &output_dir).expect("Failed to copy output directory");
178
179        // remove tmp_path
180        std::fs::remove_dir_all(&tmp_path).expect("Failed to copy output directory");
181    }
182
183    #[cfg(not(target_os = "windows"))]
184    {
185        // Empty the output directory
186        empty_directory(&output_dir).expect("Failed to empty output directory");
187
188        // Rename tmp_path to output_dir
189        rename(tmp_path, &output_dir).expect("Failed to rename output directory");
190    }
191
192    let _ = std::fs::remove_dir_all(&input_dir);
193
194    Ok(())
195}