lindera_sqlite/common.rs
1//! Common types and constants shared across the extension.
2//!
3//! This module defines the fundamental types and constants used for FFI communication
4//! between Rust and SQLite's C API.
5
6use core::convert::TryFrom;
7
8use libc::{c_char, c_int, c_void};
9
10use lindera::tokenizer::Tokenizer;
11
12// sqlite3.h
13
14/// SQLite success status code.
15///
16/// Returned by functions to indicate successful completion.
17/// Value: 0
18pub const SQLITE_OK: c_int = 0;
19
20/// SQLite internal error status code.
21///
22/// Indicates an internal error in SQLite or the extension.
23/// Used when unexpected errors occur during tokenization or initialization.
24/// Value: 2
25pub const SQLITE_INTERNAL: c_int = 2;
26
27/// SQLite misuse error status code.
28///
29/// Indicates the library is being used incorrectly.
30/// Used when version requirements are not met.
31/// Value: 21
32pub const SQLITE_MISUSE: c_int = 21;
33
34/// Wrapper for Lindera tokenizer used in FTS5.
35///
36/// This structure wraps the Lindera [`Tokenizer`] for use in the FTS5 tokenizer API.
37/// Each FTS5 table using the Lindera tokenizer will have its own instance of this struct.
38///
39/// # Memory Management
40///
41/// Instances are heap-allocated in [`fts5_create_lindera_tokenizer`](crate::extension::fts5_create_lindera_tokenizer)
42/// and deallocated in [`fts5_delete_lindera_tokenizer`](crate::extension::fts5_delete_lindera_tokenizer).
43pub struct Fts5Tokenizer {
44 /// The underlying Lindera tokenizer instance.
45 pub tokenizer: Tokenizer,
46}
47
48/// Convenience wrapper around SQLite's token callback.
49///
50/// This helper keeps the unsafe FFI boundary localized and provides
51/// clear intent when emitting tokens from Rust back into SQLite.
52pub struct TokenCallback {
53 context: *mut c_void,
54 function: TokenFunction,
55}
56
57impl TokenCallback {
58 /// Creates a new callback wrapper using the opaque context pointer
59 /// and the C callback function provided by SQLite.
60 pub const fn new(context: *mut c_void, function: TokenFunction) -> Self {
61 Self { context, function }
62 }
63
64 /// Emits a token back to SQLite, returning any propagated SQLite
65 /// status code as an error.
66 pub fn emit(&self, token: &[u8], byte_start: usize, byte_end: usize) -> Result<(), c_int> {
67 let token_len = cast_usize_to_c_int(token.len())?;
68 let start = cast_usize_to_c_int(byte_start)?;
69 let end = cast_usize_to_c_int(byte_end)?;
70
71 let status = (self.function)(
72 self.context,
73 0,
74 token.as_ptr() as *const c_char,
75 token_len,
76 start,
77 end,
78 );
79
80 if status == SQLITE_OK {
81 Ok(())
82 } else {
83 Err(status)
84 }
85 }
86}
87
88fn cast_usize_to_c_int(value: usize) -> Result<c_int, c_int> {
89 c_int::try_from(value).map_err(|_| SQLITE_INTERNAL)
90}
91
92/// Runs an operation behind a panic boundary suitable for the SQLite FFI.
93///
94/// Any panic is translated to [`SQLITE_INTERNAL`], mirroring SQLite's
95/// expectation that FFI callbacks never unwind across the boundary.
96pub fn ffi_panic_boundary<F>(operation: F) -> c_int
97where
98 F: FnOnce() -> Result<(), c_int>,
99{
100 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(operation)) {
101 Ok(Ok(())) => SQLITE_OK,
102 Ok(Err(code)) => code,
103 Err(_) => SQLITE_INTERNAL,
104 }
105}
106
107/// Token callback function type.
108///
109/// This type represents the callback function provided by SQLite FTS5 for each token
110/// produced during tokenization. The extension calls this function once per token.
111///
112/// # Parameters
113///
114/// - `p_ctx` - Context pointer passed through from the tokenization call
115/// - `t_flags` - Token flags (currently always 0 in this implementation)
116/// - `p_token` - Pointer to the token text (UTF-8 encoded)
117/// - `n_token` - Length of the token in bytes
118/// - `i_start` - Byte offset where the token starts in the original text
119/// - `i_end` - Byte offset where the token ends in the original text
120///
121/// # Returns
122///
123/// - [`SQLITE_OK`] - Token processed successfully, continue tokenization
124/// - Other codes - Error occurred, stop tokenization
125///
126/// # Example Flow
127///
128/// ```text
129/// Input: "日本語" (9 bytes in UTF-8)
130///
131/// Callback 1: token="日本", n_token=6, i_start=0, i_end=6
132/// Callback 2: token="語", n_token=3, i_start=6, i_end=9
133/// ```
134pub type TokenFunction = extern "C" fn(
135 p_ctx: *mut c_void,
136 t_flags: c_int,
137 p_token: *const c_char,
138 n_token: c_int,
139 i_start: c_int,
140 i_end: c_int,
141) -> c_int;