Skip to main content

jmdict_fast_ffi/
lib.rs

1//! FFI-agnostic facade over `jmdict-fast`.
2//!
3//! This crate intentionally exposes only the shapes that every popular Rust
4//! FFI generator can describe: owned data, concrete enums, `Arc<Self>` handles,
5//! no lifetimes, no generics, no iterators across the boundary. The generator
6//! crates (`jmdict-fast-uniffi`, `jmdict-fast-frb`, `jmdict-fast-bolt`, …)
7//! describe these types externally (UDL, FRB scan, bolt IDL) and add their
8//! own scaffolding — they should not need to reach into `jmdict-fast` itself.
9//!
10//! ## Differences from `jmdict-fast`
11//!
12//! - `Dict::load` / `load_default` return `Arc<Self>` (every generator requires
13//!   `Arc` for object handles).
14//! - The chainable `QueryBuilder` collapses into a `QueryOptions` record so
15//!   callers in foreign languages don't need fluent chaining.
16//! - `JmdictError::IoError(std::io::Error)` collapses into `Error::Io { message }`
17//!   so the error survives codegen — `io::Error` is not FFI-safe.
18//! - Iterators (`iter_entries`) become an explicit `(start, count)` pagination.
19//!
20//! ## Re-exports
21//!
22//! Data types (`Entry`, `KanjiEntry`, `Xref`, …) are re-exported unchanged from
23//! `jmdict-fast`. They are already plain POD with public fields and no
24//! lifetimes — every FFI generator can describe them as records.
25
26use std::sync::Arc;
27
28use jmdict_fast as core;
29
30pub use core::{
31    DataVersion, DeinflectionInfo, Entry, GlossEntry, KanaEntry, KanjiEntry, LanguageSource,
32    MatchMode, MatchType, SenseEntry, Xref,
33};
34
35// ---------------------------------------------------------------------------
36// Error
37// ---------------------------------------------------------------------------
38
39/// Errors surfaced across the FFI boundary.
40///
41/// `Error::Io` carries the underlying `io::Error` message as a `String` —
42/// `std::io::Error` itself is not FFI-safe, so we collapse the variant here.
43#[derive(Debug, Clone)]
44pub enum Error {
45    /// Data files not found at the expected path.
46    DataNotFound,
47    /// Binary format version mismatch between data and library.
48    DataVersionMismatch { expected: u32, found: u32 },
49    /// Data files are corrupted or have an invalid format.
50    DataCorrupted,
51    /// The query was invalid (e.g., empty string).
52    InvalidQuery,
53    /// An I/O error occurred while reading data files.
54    Io { message: String },
55    /// Failed to deserialize entry data.
56    Deserialization,
57    /// `Dict::install*` needs a cache directory on this platform (iOS,
58    /// Android, WASM) and the host hasn't registered one via
59    /// `init_sdk_cache_dir` / `InstallOptions::cache_dir`.
60    ///
61    /// `platform` is the target_os string ("ios", "android", "wasm") so
62    /// foreign-language callers can branch on it without parsing the
63    /// Display message. `&'static str` → `String` for FFI safety.
64    #[cfg(feature = "install")]
65    CacheDirRequired { platform: String },
66    /// `init_sdk_cache_dir` was called twice. The cache root is process-
67    /// global and first-set-wins; subsequent calls are rejected rather
68    /// than silently leaving older `Dict`s pointing at a stale root.
69    #[cfg(feature = "install")]
70    CacheDirAlreadySet,
71    /// A network request inside `Dict::install*` failed (timeout, DNS,
72    /// non-2xx status, oversize body, …).
73    #[cfg(feature = "install")]
74    Network { message: String },
75}
76
77impl Error {
78    /// Stable numeric code for each variant. Matches `JmdictError::code()`.
79    pub fn code(&self) -> u32 {
80        match self {
81            Error::DataNotFound => 1,
82            Error::DataVersionMismatch { .. } => 2,
83            Error::DataCorrupted => 3,
84            Error::InvalidQuery => 4,
85            Error::Io { .. } => 5,
86            Error::Deserialization => 6,
87            #[cfg(feature = "install")]
88            Error::CacheDirRequired { .. } => 7,
89            #[cfg(feature = "install")]
90            Error::Network { .. } => 8,
91            #[cfg(feature = "install")]
92            Error::CacheDirAlreadySet => 9,
93        }
94    }
95}
96
97impl std::fmt::Display for Error {
98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99        match self {
100            Error::DataNotFound => write!(
101                f,
102                "Dictionary data files not found. Provide a path to load(), set JMDICT_DATA, or place files under dist/."
103            ),
104            Error::DataVersionMismatch { expected, found } => write!(
105                f,
106                "Data format version {found}, library expects {expected}. Regenerate with `cargo xtask generate`."
107            ),
108            Error::DataCorrupted => write!(f, "Dictionary data is corrupted or has an invalid format."),
109            Error::InvalidQuery => write!(f, "The search query is invalid."),
110            Error::Io { message } => write!(f, "I/O error: {message}"),
111            Error::Deserialization => write!(f, "Failed to deserialize dictionary entry data."),
112            #[cfg(feature = "install")]
113            Error::CacheDirRequired { platform } => write!(
114                f,
115                "Cache directory required on {platform}: call init_sdk_cache_dir(path) from the host \
116                 (e.g. path_provider on Flutter, FileManager on iOS, Context.getCacheDir on Android), \
117                 or pass InstallOptions::cache_dir(path) per call."
118            ),
119            #[cfg(feature = "install")]
120            Error::CacheDirAlreadySet => write!(
121                f,
122                "init_sdk_cache_dir was already called for this process; the cache root is one-shot."
123            ),
124            #[cfg(feature = "install")]
125            Error::Network { message } => write!(f, "Network error during install: {message}"),
126        }
127    }
128}
129
130impl std::error::Error for Error {}
131
132impl From<core::JmdictError> for Error {
133    fn from(err: core::JmdictError) -> Self {
134        match err {
135            core::JmdictError::DataNotFound => Error::DataNotFound,
136            core::JmdictError::DataVersionMismatch { expected, found } => {
137                Error::DataVersionMismatch { expected, found }
138            }
139            core::JmdictError::DataCorrupted => Error::DataCorrupted,
140            core::JmdictError::InvalidQuery => Error::InvalidQuery,
141            core::JmdictError::IoError(e) => Error::Io { message: e.to_string() },
142            core::JmdictError::DeserializationError => Error::Deserialization,
143            #[cfg(feature = "install")]
144            core::JmdictError::CacheDirRequired { platform } => Error::CacheDirRequired {
145                platform: platform.to_string(),
146            },
147            #[cfg(feature = "install")]
148            core::JmdictError::CacheDirAlreadySet => Error::CacheDirAlreadySet,
149            #[cfg(feature = "install")]
150            core::JmdictError::NetworkError(message) => Error::Network { message },
151        }
152    }
153}
154
155// ---------------------------------------------------------------------------
156// LookupResult, QueryOptions, BatchResult
157// ---------------------------------------------------------------------------
158
159/// A single dictionary lookup hit.
160///
161/// Mirrors `core::LookupResult` but lives in the FFI surface so FFI codegen
162/// can describe it without reaching across crate boundaries.
163#[derive(Debug, Clone)]
164pub struct LookupResult {
165    pub entry: Entry,
166    pub match_type: MatchType,
167    pub match_key: String,
168    pub score: f64,
169    pub deinflection: Option<DeinflectionInfo>,
170}
171
172impl From<core::LookupResult> for LookupResult {
173    fn from(r: core::LookupResult) -> Self {
174        Self {
175            entry: r.entry,
176            match_type: r.match_type,
177            match_key: r.match_key,
178            score: r.score,
179            deinflection: r.deinflection,
180        }
181    }
182}
183
184/// Options for [`Dict::lookup_with_options`] / [`Dict::lookup_batch`].
185///
186/// Foreign-language callers populate this record instead of chaining
187/// `QueryBuilder` setters. All fields have sensible defaults; see
188/// [`QueryOptions::default`].
189#[derive(Debug, Clone)]
190pub struct QueryOptions {
191    pub mode: MatchMode,
192    pub common_only: bool,
193    pub pos: Vec<String>,
194    pub misc: Vec<String>,
195    pub field: Vec<String>,
196    pub dialect: Vec<String>,
197    /// `None` means no cap. `u32` (not `usize`) so the type is portable across
198    /// 32-/64-bit FFI targets.
199    pub limit: Option<u32>,
200    /// Only consulted when `mode == MatchMode::Fuzzy`. Clamped to
201    /// `core::MAX_FUZZY_DISTANCE` by the underlying builder.
202    pub max_distance: u32,
203}
204
205impl Default for QueryOptions {
206    fn default() -> Self {
207        Self {
208            mode: MatchMode::Exact,
209            common_only: false,
210            pos: Vec::new(),
211            misc: Vec::new(),
212            field: Vec::new(),
213            dialect: Vec::new(),
214            limit: None,
215            max_distance: 2,
216        }
217    }
218}
219
220/// Per-term batch result, paired with the original term so callers can
221/// correlate output without relying on input order.
222#[derive(Debug, Clone)]
223pub struct BatchResult {
224    pub term: String,
225    pub results: Vec<LookupResult>,
226}
227
228// ---------------------------------------------------------------------------
229// Dict handle
230// ---------------------------------------------------------------------------
231
232/// FFI-friendly handle around a loaded dictionary. Construct via
233/// [`Dict::load`] or [`Dict::load_default`]; the resulting `Arc<Dict>` can be
234/// shared freely across threads and FFI calls.
235pub struct Dict {
236    inner: core::Dict,
237}
238
239impl Dict {
240    /// Load all FSTs and entries via real `mmap`.
241    pub fn load(path: String) -> Result<Arc<Self>, Error> {
242        let inner = core::Dict::load(&path)?;
243        Ok(Arc::new(Self { inner }))
244    }
245
246    /// Try the same cascade as `core::Dict::load_default` (embedded feature,
247    /// `JMDICT_DATA` env var, `dist/`).
248    pub fn load_default() -> Result<Arc<Self>, Error> {
249        let inner = core::Dict::load_default()?;
250        Ok(Arc::new(Self { inner }))
251    }
252
253    pub fn entry_count(&self) -> u64 {
254        self.inner.entry_count() as u64
255    }
256
257    pub fn version(&self) -> DataVersion {
258        self.inner.version()
259    }
260
261    // ------- Convenience lookups --------------------------------------------
262
263    pub fn lookup_exact(&self, term: String) -> Vec<LookupResult> {
264        self.inner
265            .lookup_exact(&term)
266            .into_iter()
267            .map(Into::into)
268            .collect()
269    }
270
271    pub fn lookup_partial(&self, prefix: String) -> Vec<LookupResult> {
272        self.inner
273            .lookup_partial(&prefix)
274            .into_iter()
275            .map(Into::into)
276            .collect()
277    }
278
279    pub fn lookup_exact_with_deinflection(&self, term: String) -> Vec<LookupResult> {
280        self.inner
281            .lookup_exact_with_deinflection(&term)
282            .into_iter()
283            .map(Into::into)
284            .collect()
285    }
286
287    pub fn lookup_gloss(&self, query: String) -> Vec<LookupResult> {
288        self.inner
289            .lookup_gloss(&query)
290            .into_iter()
291            .map(Into::into)
292            .collect()
293    }
294
295    pub fn lookup_by_id(&self, jmdict_id: String) -> Option<LookupResult> {
296        self.inner.lookup_by_id(&jmdict_id).map(Into::into)
297    }
298
299    pub fn resolve_xref(&self, xref: Xref) -> Vec<LookupResult> {
300        self.inner
301            .resolve_xref(&xref)
302            .into_iter()
303            .map(Into::into)
304            .collect()
305    }
306
307    // ------- Builder-equivalent ---------------------------------------------
308
309    /// Run a single query with full options.
310    pub fn lookup_with_options(
311        &self,
312        term: String,
313        options: QueryOptions,
314    ) -> Result<Vec<LookupResult>, Error> {
315        let pos: Vec<&str> = options.pos.iter().map(String::as_str).collect();
316        let misc: Vec<&str> = options.misc.iter().map(String::as_str).collect();
317        let field: Vec<&str> = options.field.iter().map(String::as_str).collect();
318        let dialect: Vec<&str> = options.dialect.iter().map(String::as_str).collect();
319
320        let mut builder = self
321            .inner
322            .lookup(&term)
323            .mode(options.mode)
324            .common_only(options.common_only)
325            .pos(&pos)
326            .misc(&misc)
327            .field(&field)
328            .dialect(&dialect)
329            .max_distance(options.max_distance);
330        if let Some(limit) = options.limit {
331            builder = builder.limit(limit as usize);
332        }
333
334        Ok(builder.execute()?.into_iter().map(Into::into).collect())
335    }
336
337    /// Run the same query options across many terms. Each `BatchResult`
338    /// carries its term alongside the hits so callers can correlate output.
339    ///
340    /// Routes through `core::BatchQueryBuilder` so filter slices are prepared
341    /// once and the options record is not cloned per term.
342    pub fn lookup_batch(
343        &self,
344        terms: Vec<String>,
345        options: QueryOptions,
346    ) -> Result<Vec<BatchResult>, Error> {
347        let term_refs: Vec<&str> = terms.iter().map(String::as_str).collect();
348        let pos: Vec<&str> = options.pos.iter().map(String::as_str).collect();
349        let misc: Vec<&str> = options.misc.iter().map(String::as_str).collect();
350        let field: Vec<&str> = options.field.iter().map(String::as_str).collect();
351        let dialect: Vec<&str> = options.dialect.iter().map(String::as_str).collect();
352
353        let mut builder = self
354            .inner
355            .lookup_batch(&term_refs)
356            .mode(options.mode)
357            .common_only(options.common_only)
358            .pos(&pos)
359            .misc(&misc)
360            .field(&field)
361            .dialect(&dialect)
362            .max_distance(options.max_distance);
363        if let Some(limit) = options.limit {
364            builder = builder.limit(limit as usize);
365        }
366
367        Ok(builder
368            .execute()?
369            .into_iter()
370            .map(|(term, results)| BatchResult {
371                term,
372                results: results.into_iter().map(Into::into).collect(),
373            })
374            .collect())
375    }
376
377    // ------- Browsing --------------------------------------------------------
378
379    /// Fetch a single entry by sequential index.
380    pub fn get(&self, seq_id: u64) -> Option<Entry> {
381        self.inner.get(seq_id)
382    }
383
384    /// Paginate over the entry list. `start` is the first sequential index to
385    /// return; `count` is the maximum number of entries. Returns fewer than
386    /// `count` items at the end of the dictionary. Iterators don't translate
387    /// across FFI, so callers loop on `(start, count)` themselves.
388    pub fn iter_entries(&self, start: u64, count: u64) -> Vec<Entry> {
389        let total = self.inner.entry_count() as u64;
390        let end = start.saturating_add(count).min(total);
391        (start..end).filter_map(|i| self.inner.get(i)).collect()
392    }
393}
394
395// ---------------------------------------------------------------------------
396// Install surface (feature = "install")
397// ---------------------------------------------------------------------------
398
399/// Source of the install bytes. Mirrors `core::install::InstallSource` but
400/// lives here so FFI generators can describe it without crossing crate
401/// boundaries. Strings (not `PathBuf`) keep the type FFI-safe.
402#[cfg(feature = "install")]
403#[derive(Debug, Clone)]
404pub enum InstallSource {
405    /// The GitHub release tarball matching this build's crate / JMdict /
406    /// format versions.
407    OfficialRelease,
408    /// Any `.tar.gz` reachable over HTTPS.
409    Url { url: String },
410    /// A `.tar.gz` already on the local filesystem (path as string).
411    Tarball { path: String },
412}
413
414#[cfg(feature = "install")]
415impl Default for InstallSource {
416    fn default() -> Self {
417        InstallSource::OfficialRelease
418    }
419}
420
421/// Options for [`Dict::install_with`]. POD record, all fields optional —
422/// FFI callers populate it instead of chaining builder setters.
423#[cfg(feature = "install")]
424#[derive(Debug, Clone, Default)]
425pub struct InstallOptions {
426    /// Per-call cache directory. Wins over `init_sdk_cache_dir` and the
427    /// platform default. `None` falls back to the resolver chain.
428    pub cache_dir: Option<String>,
429    pub source: InstallSource,
430    /// Re-extract even when the cache appears complete (recovers from a
431    /// stale or partially-corrupted install).
432    pub force: bool,
433}
434
435#[cfg(feature = "install")]
436impl Dict {
437    /// Download the official release tarball into the platform cache and
438    /// load it. No-op on a warm cache.
439    pub fn install() -> Result<Arc<Self>, Error> {
440        Self::install_with(InstallOptions::default())
441    }
442
443    /// Download an arbitrary tarball URL and load it.
444    pub fn install_from_url(url: String) -> Result<Arc<Self>, Error> {
445        Self::install_with(InstallOptions {
446            source: InstallSource::Url { url },
447            ..Default::default()
448        })
449    }
450
451    /// Extract a local `.tar.gz` and load it.
452    pub fn install_from_tarball(path: String) -> Result<Arc<Self>, Error> {
453        Self::install_with(InstallOptions {
454            source: InstallSource::Tarball { path },
455            ..Default::default()
456        })
457    }
458
459    /// Full install with explicit options. Returns the loaded `Arc<Dict>`.
460    pub fn install_with(options: InstallOptions) -> Result<Arc<Self>, Error> {
461        let mut core_opts = core::install::InstallOptions::default()
462            .source(match options.source {
463                InstallSource::OfficialRelease => core::install::InstallSource::OfficialRelease,
464                InstallSource::Url { url } => core::install::InstallSource::Url(url),
465                InstallSource::Tarball { path } => {
466                    core::install::InstallSource::Tarball(std::path::PathBuf::from(path))
467                }
468            })
469            .force(options.force);
470        if let Some(p) = options.cache_dir {
471            core_opts = core_opts.cache_dir(std::path::PathBuf::from(p));
472        }
473        let inner = core::Dict::install_with(core_opts)?;
474        Ok(Arc::new(Self { inner }))
475    }
476}
477
478/// Register a process-global cache directory for `Dict::install*`. First
479/// call wins; subsequent calls return [`Error::CacheDirAlreadySet`].
480///
481/// On iOS / Android / WASM this is **mandatory** — the host gets the right
482/// path from a platform API (Flutter's `path_provider`,
483/// `Context.getCacheDir`, `FileManager`) and registers it at startup.
484#[cfg(feature = "install")]
485pub fn init_sdk_cache_dir(path: String) -> Result<(), Error> {
486    core::install::init_sdk_cache_dir(std::path::PathBuf::from(path)).map_err(Into::into)
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492
493    #[test]
494    fn error_codes_match_core() {
495        // The FFI codes must agree with core so consumers can switch on a
496        // shared numeric protocol.
497        assert_eq!(Error::DataNotFound.code(), 1);
498        assert_eq!(
499            Error::DataVersionMismatch {
500                expected: 4,
501                found: 3
502            }
503            .code(),
504            2
505        );
506        assert_eq!(Error::DataCorrupted.code(), 3);
507        assert_eq!(Error::InvalidQuery.code(), 4);
508        assert_eq!(
509            Error::Io {
510                message: "boom".into()
511            }
512            .code(),
513            5
514        );
515        assert_eq!(Error::Deserialization.code(), 6);
516    }
517
518    #[test]
519    fn error_from_jmdict_io_collapses_to_string() {
520        let io = std::io::Error::new(std::io::ErrorKind::Other, "disk on fire");
521        let core_err = core::JmdictError::IoError(io);
522        match Error::from(core_err) {
523            Error::Io { message } => assert!(message.contains("disk on fire")),
524            other => panic!("expected Error::Io, got {other:?}"),
525        }
526    }
527
528    #[test]
529    fn query_options_defaults() {
530        let q = QueryOptions::default();
531        assert_eq!(q.mode, MatchMode::Exact);
532        assert!(!q.common_only);
533        assert!(q.pos.is_empty());
534        assert!(q.misc.is_empty());
535        assert!(q.field.is_empty());
536        assert!(q.dialect.is_empty());
537        assert_eq!(q.limit, None);
538        assert_eq!(q.max_distance, 2);
539    }
540}