lindera_sqlite/
common.rs

1//! Common types and constants shared across the extension.
2//!
3//! This module defines the fundamental types and constants used for FFI communication
4//! between Rust and SQLite's C API.
5
6use core::convert::TryFrom;
7
8use libc::{c_char, c_int, c_void};
9
10use lindera::tokenizer::Tokenizer;
11
12// sqlite3.h
13
14/// SQLite success status code.
15///
16/// Returned by functions to indicate successful completion.
17/// Value: 0
18pub const SQLITE_OK: c_int = 0;
19
20/// SQLite internal error status code.
21///
22/// Indicates an internal error in SQLite or the extension.
23/// Used when unexpected errors occur during tokenization or initialization.
24/// Value: 2
25pub const SQLITE_INTERNAL: c_int = 2;
26
27/// SQLite misuse error status code.
28///
29/// Indicates the library is being used incorrectly.
30/// Used when version requirements are not met.
31/// Value: 21
32pub const SQLITE_MISUSE: c_int = 21;
33
34/// Wrapper for Lindera tokenizer used in FTS5.
35///
36/// This structure wraps the Lindera [`Tokenizer`] for use in the FTS5 tokenizer API.
37/// Each FTS5 table using the Lindera tokenizer will have its own instance of this struct.
38///
39/// # Memory Management
40///
41/// Instances are heap-allocated in [`fts5_create_lindera_tokenizer`](crate::extension::fts5_create_lindera_tokenizer)
42/// and deallocated in [`fts5_delete_lindera_tokenizer`](crate::extension::fts5_delete_lindera_tokenizer).
43pub struct Fts5Tokenizer {
44    /// The underlying Lindera tokenizer instance.
45    pub tokenizer: Tokenizer,
46}
47
48/// Convenience wrapper around SQLite's token callback.
49///
50/// This helper keeps the unsafe FFI boundary localized and provides
51/// clear intent when emitting tokens from Rust back into SQLite.
52pub struct TokenCallback {
53    context: *mut c_void,
54    function: TokenFunction,
55}
56
57impl TokenCallback {
58    /// Creates a new callback wrapper using the opaque context pointer
59    /// and the C callback function provided by SQLite.
60    pub const fn new(context: *mut c_void, function: TokenFunction) -> Self {
61        Self { context, function }
62    }
63
64    /// Emits a token back to SQLite, returning any propagated SQLite
65    /// status code as an error.
66    pub fn emit(&self, token: &[u8], byte_start: usize, byte_end: usize) -> Result<(), c_int> {
67        let token_len = cast_usize_to_c_int(token.len())?;
68        let start = cast_usize_to_c_int(byte_start)?;
69        let end = cast_usize_to_c_int(byte_end)?;
70
71        let status = (self.function)(
72            self.context,
73            0,
74            token.as_ptr() as *const c_char,
75            token_len,
76            start,
77            end,
78        );
79
80        if status == SQLITE_OK {
81            Ok(())
82        } else {
83            Err(status)
84        }
85    }
86}
87
88fn cast_usize_to_c_int(value: usize) -> Result<c_int, c_int> {
89    c_int::try_from(value).map_err(|_| SQLITE_INTERNAL)
90}
91
92/// Runs an operation behind a panic boundary suitable for the SQLite FFI.
93///
94/// Any panic is translated to [`SQLITE_INTERNAL`], mirroring SQLite's
95/// expectation that FFI callbacks never unwind across the boundary.
96pub fn ffi_panic_boundary<F>(operation: F) -> c_int
97where
98    F: FnOnce() -> Result<(), c_int>,
99{
100    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(operation)) {
101        Ok(Ok(())) => SQLITE_OK,
102        Ok(Err(code)) => code,
103        Err(_) => SQLITE_INTERNAL,
104    }
105}
106
107/// Token callback function type.
108///
109/// This type represents the callback function provided by SQLite FTS5 for each token
110/// produced during tokenization. The extension calls this function once per token.
111///
112/// # Parameters
113///
114/// - `p_ctx` - Context pointer passed through from the tokenization call
115/// - `t_flags` - Token flags (currently always 0 in this implementation)
116/// - `p_token` - Pointer to the token text (UTF-8 encoded)
117/// - `n_token` - Length of the token in bytes
118/// - `i_start` - Byte offset where the token starts in the original text
119/// - `i_end` - Byte offset where the token ends in the original text
120///
121/// # Returns
122///
123/// - [`SQLITE_OK`] - Token processed successfully, continue tokenization
124/// - Other codes - Error occurred, stop tokenization
125///
126/// # Example Flow
127///
128/// ```text
129/// Input: "日本語" (9 bytes in UTF-8)
130///
131/// Callback 1: token="日本", n_token=6, i_start=0, i_end=6
132/// Callback 2: token="語",   n_token=3, i_start=6, i_end=9
133/// ```
134pub type TokenFunction = extern "C" fn(
135    p_ctx: *mut c_void,
136    t_flags: c_int,
137    p_token: *const c_char,
138    n_token: c_int,
139    i_start: c_int,
140    i_end: c_int,
141) -> c_int;