lindera_sqlite/
common.rs

1//! Common types and constants shared across the extension.
2//!
3//! This module defines the fundamental types and constants used for FFI communication
4//! between Rust and SQLite's C API.
5
6use libc::{c_char, c_int, c_void};
7
8use lindera::tokenizer::Tokenizer;
9
10// sqlite3.h
11
12/// SQLite success status code.
13///
14/// Returned by functions to indicate successful completion.
15/// Value: 0
16pub const SQLITE_OK: c_int = 0;
17
18/// SQLite internal error status code.
19///
20/// Indicates an internal error in SQLite or the extension.
21/// Used when unexpected errors occur during tokenization or initialization.
22/// Value: 2
23pub const SQLITE_INTERNAL: c_int = 2;
24
25/// SQLite misuse error status code.
26///
27/// Indicates the library is being used incorrectly.
28/// Used when version requirements are not met.
29/// Value: 21
30pub const SQLITE_MISUSE: c_int = 21;
31
32/// Wrapper for Lindera tokenizer used in FTS5.
33///
34/// This structure wraps the Lindera [`Tokenizer`] for use in the FTS5 tokenizer API.
35/// Each FTS5 table using the Lindera tokenizer will have its own instance of this struct.
36///
37/// # Memory Management
38///
39/// Instances are heap-allocated in [`fts5_create_lindera_tokenizer`](crate::extension::fts5_create_lindera_tokenizer)
40/// and deallocated in [`fts5_delete_lindera_tokenizer`](crate::extension::fts5_delete_lindera_tokenizer).
41pub struct Fts5Tokenizer {
42    /// The underlying Lindera tokenizer instance.
43    pub tokenizer: Tokenizer,
44}
45
46/// Token callback function type.
47///
48/// This type represents the callback function provided by SQLite FTS5 for each token
49/// produced during tokenization. The extension calls this function once per token.
50///
51/// # Parameters
52///
53/// - `p_ctx` - Context pointer passed through from the tokenization call
54/// - `t_flags` - Token flags (currently always 0 in this implementation)
55/// - `p_token` - Pointer to the token text (UTF-8 encoded)
56/// - `n_token` - Length of the token in bytes
57/// - `i_start` - Byte offset where the token starts in the original text
58/// - `i_end` - Byte offset where the token ends in the original text
59///
60/// # Returns
61///
62/// - [`SQLITE_OK`] - Token processed successfully, continue tokenization
63/// - Other codes - Error occurred, stop tokenization
64///
65/// # Example Flow
66///
67/// ```text
68/// Input: "日本語" (9 bytes in UTF-8)
69///
70/// Callback 1: token="日本", n_token=6, i_start=0, i_end=6
71/// Callback 2: token="語",   n_token=3, i_start=6, i_end=9
72/// ```
73pub type TokenFunction = extern "C" fn(
74    p_ctx: *mut c_void,
75    t_flags: c_int,
76    p_token: *const c_char,
77    n_token: c_int,
78    i_start: c_int,
79    i_end: c_int,
80) -> c_int;