Skip to main content

hedl_ffi/
lib.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! HEDL FFI Bindings
19//!
20//! Provides C-compatible interface for HEDL operations.
21//! All functions use C-style error handling with return codes.
22//!
23//! # Memory Management
24//!
25//! **IMPORTANT:** Memory ownership follows strict rules:
26//!
27//! - Strings returned by `hedl_*` functions MUST be freed with `hedl_free_string`
28//! - Byte arrays returned by `hedl_to_parquet` MUST be freed with `hedl_free_bytes`
29//! - Documents MUST be freed with `hedl_free_document`
30//! - Diagnostics MUST be freed with `hedl_free_diagnostics`
31//!
32//! **WARNING - Memory Safety Requirements:**
33//!
34//! The `hedl_free_*` functions ONLY accept pointers that were allocated by HEDL
35//! functions. Passing any of the following will cause undefined behavior:
36//!
37//! - Pointers from `malloc`/`calloc`/`realloc` (wrong allocator)
38//! - Stack-allocated variables
39//! - Already-freed pointers (double free)
40//! - Pointers from other libraries
41//! - NULL is safe and will be ignored
42//!
43//! # Thread Safety
44//!
45//! ## Error Handling Thread Safety
46//!
47//! Error messages are stored in **thread-local storage**, providing lock-free,
48//! wait-free error handling for multi-threaded applications.
49//!
50//! **Key Guarantees:**
51//! - Each thread maintains its own independent error state
52//! - `hedl_get_last_error()` / `hedl_get_last_error_threadsafe()` returns the
53//!   error for the CALLING thread only
54//! - Errors from one thread will NEVER appear in or overwrite errors in another thread
55//! - No mutexes, locks, or other synchronization primitives are required
56//! - Zero contention between threads accessing error messages
57//! - You MUST call error functions from the same thread that received the error code
58//!
59//! **Thread-Safe Functions:**
60//! - `hedl_get_last_error()` - Get error for current thread
61//! - `hedl_get_last_error_threadsafe()` - Explicit thread-safe alias
62//! - `hedl_clear_error_threadsafe()` - Clear error for current thread
63//!
64//! **Example (Multi-threaded C with pthreads):**
65//! ```c
66//! void* worker(void* arg) {
67//!     const char* input = (const char*)arg;
68//!     HedlDocument* doc = NULL;
69//!
70//!     if (hedl_parse(input, -1, 0, &doc) != HEDL_OK) {
71//!         // Get error for THIS thread - independent of other threads
72//!         const char* err = hedl_get_last_error_threadsafe();
73//!         fprintf(stderr, "Parse error: %s\n", err);
74//!         return NULL;
75//!     }
76//!
77//!     // Process document...
78//!     hedl_free_document(doc);
79//!     return (void*)1;
80//! }
81//!
82//! int main() {
83//!     pthread_t threads[8];
84//!     const char* inputs[8] = { ... };
85//!
86//!     // Launch threads - each with independent error state
87//!     for (int i = 0; i < 8; i++) {
88//!         pthread_create(&threads[i], NULL, worker, (void*)inputs[i]);
89//!     }
90//!
91//!     for (int i = 0; i < 8; i++) {
92//!         pthread_join(threads[i], NULL);
93//!     }
94//! }
95//! ```
96//!
97//! ## Document Handle Thread Safety
98//!
99//! Document handles (`HedlDocument*`) are **NOT thread-safe** by design for
100//! performance reasons. Do not share document handles between threads without
101//! external synchronization (mutexes, etc.).
102//!
103//! **Safe Pattern:**
104//! - Each thread creates its own document handles
105//! - Each thread frees its own document handles
106//! - No sharing of document pointers across threads
107//!
108//! **Unsafe Pattern:**
109//! - Passing a `HedlDocument*` to multiple threads (data race)
110//! - Accessing the same document from multiple threads (undefined behavior)
111//!
112//! # Error Handling
113//!
114//! - All functions return error codes (`HEDL_OK` on success)
115//! - Use `hedl_get_last_error` to get the error message for the current thread
116//!
117//! # Security
118//!
119//! ## Poison Pointers
120//!
121//! To detect double-free and use-after-free bugs, this library uses poison pointers:
122//!
123//! - After freeing a document or diagnostics, the internal pointer is checked against
124//!   a poison value
125//! - All accessor functions validate that pointers are not poisoned before use
126//! - This provides defense-in-depth against memory safety bugs
127//!
128//! **Note**: Since C passes pointers by value, we cannot modify the caller's pointer
129//! after freeing. However, we can detect if a freed pointer is passed back to us
130//! by checking for the poison value in accessor functions.
131//!
132//! # Audit Logging
133//!
134//! This library provides comprehensive audit logging for all FFI function calls
135//! using the `tracing` crate. The logging system captures:
136//!
137//! - Function entry/exit with timing information
138//! - Sanitized parameters (pointer addresses are masked for security)
139//! - Success/failure outcomes with error details
140//! - Performance metrics (call duration)
141//! - Thread context for correlation
142//!
143//! ## Configuring Logging
144//!
145//! To enable logging, initialize a tracing subscriber in your application:
146//!
147//! ```rust,no_run
148//! use tracing_subscriber::{fmt, EnvFilter};
149//!
150//! // Initialize the tracing subscriber
151//! tracing_subscriber::fmt()
152//!     .with_env_filter(
153//!         EnvFilter::try_from_default_env()
154//!             .unwrap_or_else(|_| EnvFilter::new("info"))
155//!     )
156//!     .with_target(true)
157//!     .with_thread_ids(true)
158//!     .with_line_number(true)
159//!     .init();
160//!
161//! // Now all FFI calls will be logged
162//! ```
163//!
164//! ## Log Levels
165//!
166//! - `ERROR`: Function failures with error details
167//! - `WARN`: Recoverable errors or unusual conditions
168//! - `INFO`: Function call entry/exit with basic metrics
169//! - `DEBUG`: Detailed parameter information (sanitized)
170//!
171//! ## Environment Variables
172//!
173//! Control logging via the `RUST_LOG` environment variable:
174//!
175//! ```bash
176//! # Log all INFO and above
177//! export RUST_LOG=info
178//!
179//! # Log only FFI audit events
180//! export RUST_LOG=hedl_ffi::audit=debug
181//!
182//! # Log everything at DEBUG level
183//! export RUST_LOG=debug
184//! ```
185//!
186//! ## Example Output
187//!
188//! ```text
189//! 2025-01-05T10:30:45.123Z INFO hedl_ffi::audit: FFI call started function="hedl_parse" thread_id=ThreadId(1) depth=0
190//! 2025-01-05T10:30:45.125Z DEBUG hedl_ffi::audit: FFI call parameters function="hedl_parse" params=[("input_len", "1024")]
191//! 2025-01-05T10:30:45.130Z INFO hedl_ffi::audit: FFI call completed function="hedl_parse" duration_ms=7.2 status="success"
192//! ```
193//!
194//! See the [`audit`] module for more details on the logging implementation.
195
196// =============================================================================
197// Module Declarations
198// =============================================================================
199
200#![cfg_attr(not(test), warn(missing_docs))]
201/// Async FFI operations.
202pub mod async_ops;
203/// FFI audit logging.
204pub mod audit;
205mod conversions;
206mod diagnostics;
207mod error;
208mod ffi_strings;
209mod memory;
210mod operations;
211mod parsing;
212/// Reentrancy guard.
213pub mod reentrancy;
214mod types;
215
216// =============================================================================
217// Re-exports
218// =============================================================================
219
220// Types and error codes
221pub use types::{
222    HedlDiagnostics, HedlDocument, HEDL_ERR_ALLOC, HEDL_ERR_CANCELLED, HEDL_ERR_CANONICALIZE,
223    HEDL_ERR_CSV, HEDL_ERR_INVALID_HANDLE, HEDL_ERR_INVALID_UTF8, HEDL_ERR_JSON, HEDL_ERR_LINT,
224    HEDL_ERR_NEO4J, HEDL_ERR_NULL_PTR, HEDL_ERR_PARQUET, HEDL_ERR_PARSE, HEDL_ERR_QUEUE_FULL,
225    HEDL_ERR_REENTRANT_CALL, HEDL_ERR_TOON, HEDL_ERR_XML, HEDL_ERR_YAML, HEDL_OK,
226};
227
228// Error handling
229pub use error::{hedl_clear_error_threadsafe, hedl_get_last_error, hedl_get_last_error_threadsafe};
230
231// Memory management
232pub use memory::{hedl_free_bytes, hedl_free_diagnostics, hedl_free_document, hedl_free_string};
233
234// Parsing functions
235pub use parsing::{
236    hedl_alias_count, hedl_get_version, hedl_parse, hedl_root_item_count, hedl_schema_count,
237    hedl_validate,
238};
239
240// Operations
241pub use operations::{hedl_canonicalize, hedl_lint};
242
243// Diagnostics
244pub use diagnostics::{hedl_diagnostics_count, hedl_diagnostics_get, hedl_diagnostics_severity};
245
246// Conversion functions (to_*)
247#[cfg(feature = "json")]
248pub use conversions::to_formats::hedl_to_json;
249
250#[cfg(feature = "yaml")]
251pub use conversions::to_formats::hedl_to_yaml;
252
253#[cfg(feature = "xml")]
254pub use conversions::to_formats::hedl_to_xml;
255
256#[cfg(feature = "csv")]
257pub use conversions::to_formats::hedl_to_csv;
258
259#[cfg(feature = "parquet")]
260pub use conversions::to_formats::hedl_to_parquet;
261
262#[cfg(feature = "neo4j")]
263pub use conversions::to_formats::hedl_to_neo4j_cypher;
264
265#[cfg(feature = "toon")]
266pub use conversions::to_formats::hedl_to_toon;
267
268// Zero-copy callback functions (to_*_callback)
269pub use conversions::to_formats_callback::HedlOutputCallback;
270
271#[cfg(feature = "json")]
272pub use conversions::to_formats_callback::hedl_to_json_callback;
273
274#[cfg(feature = "yaml")]
275pub use conversions::to_formats_callback::hedl_to_yaml_callback;
276
277#[cfg(feature = "xml")]
278pub use conversions::to_formats_callback::hedl_to_xml_callback;
279
280#[cfg(feature = "csv")]
281pub use conversions::to_formats_callback::hedl_to_csv_callback;
282
283#[cfg(feature = "neo4j")]
284pub use conversions::to_formats_callback::hedl_to_neo4j_cypher_callback;
285
286pub use conversions::to_formats_callback::hedl_canonicalize_callback;
287
288// Conversion functions (from_*)
289#[cfg(feature = "json")]
290pub use conversions::from_formats::hedl_from_json;
291
292#[cfg(feature = "yaml")]
293pub use conversions::from_formats::hedl_from_yaml;
294
295#[cfg(feature = "xml")]
296pub use conversions::from_formats::hedl_from_xml;
297
298#[cfg(feature = "parquet")]
299pub use conversions::from_formats::hedl_from_parquet;
300
301#[cfg(feature = "toon")]
302pub use conversions::from_formats::hedl_from_toon;
303
304// Async operations
305pub use async_ops::{
306    hedl_async_cancel, hedl_async_free, hedl_canonicalize_async, hedl_lint_async, hedl_parse_async,
307    HedlAsyncOp, HedlCompletionCallback, HedlCompletionCallbackFn,
308};
309
310#[cfg(feature = "json")]
311pub use async_ops::hedl_to_json_async;
312
313#[cfg(feature = "yaml")]
314pub use async_ops::hedl_to_yaml_async;
315
316#[cfg(feature = "xml")]
317pub use async_ops::hedl_to_xml_async;
318
319#[cfg(feature = "csv")]
320pub use async_ops::hedl_to_csv_async;
321
322#[cfg(feature = "neo4j")]
323pub use async_ops::hedl_to_neo4j_cypher_async;
324
325#[cfg(feature = "toon")]
326pub use async_ops::hedl_to_toon_async;
327
328// =============================================================================
329// Tests
330// =============================================================================
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335    #[cfg(any(feature = "json", feature = "yaml", feature = "xml"))]
336    use std::ffi::CStr;
337    use std::os::raw::c_char;
338    use std::ptr;
339
340    const VALID_HEDL: &[u8] = b"%V:2.0\n%NULL:~\n%QUOTE:\"\n---\nkey: value\0";
341    const INVALID_HEDL: &[u8] = b"not valid hedl\0";
342
343    #[test]
344    fn test_parse_and_free() {
345        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
346        // `doc` is a valid mutable pointer for receiving document handle.
347        // SAFETY: FFI function requires raw pointer for output parameter
348        unsafe {
349            let mut doc: *mut HedlDocument = ptr::null_mut();
350            let result = hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 1, &mut doc);
351
352            assert_eq!(result, HEDL_OK);
353            assert!(!doc.is_null());
354
355            hedl_free_document(doc);
356        }
357    }
358
359    #[test]
360    fn test_validate_valid() {
361        // SAFETY: Test constant is valid null-terminated UTF-8 string.
362        unsafe {
363            assert_eq!(
364                hedl_validate(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 1),
365                HEDL_OK
366            );
367        }
368    }
369
370    #[test]
371    fn test_validate_invalid() {
372        // SAFETY: Test constant is valid null-terminated UTF-8 string.
373        unsafe {
374            assert_ne!(
375                hedl_validate(INVALID_HEDL.as_ptr().cast::<c_char>(), -1, 1),
376                HEDL_OK
377            );
378        }
379    }
380
381    #[test]
382    fn test_null_ptr_handling() {
383        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
384        // `doc` is a valid mutable pointer for receiving document handle.
385        // SAFETY: FFI function requires raw pointer for output parameter
386        unsafe {
387            let mut doc: *mut HedlDocument = ptr::null_mut();
388            assert_eq!(hedl_parse(ptr::null(), -1, 0, &mut doc), HEDL_ERR_NULL_PTR);
389        }
390    }
391
392    #[test]
393    fn test_get_version() {
394        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
395        // `doc` is a valid mutable pointer for receiving document handle.
396        // SAFETY: FFI function requires raw pointer for output parameter
397        unsafe {
398            let mut doc: *mut HedlDocument = ptr::null_mut();
399            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
400
401            let mut major: i32 = 0;
402            let mut minor: i32 = 0;
403            let result = hedl_get_version(doc, &mut major, &mut minor);
404
405            assert_eq!(result, HEDL_OK);
406            assert_eq!(major, 2);
407            assert_eq!(minor, 0);
408
409            hedl_free_document(doc);
410        }
411    }
412
413    #[test]
414    fn test_canonicalize() {
415        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
416        // `doc` is a valid mutable pointer for receiving document handle.
417        // SAFETY: FFI function requires raw pointer for output parameter
418        unsafe {
419            let mut doc: *mut HedlDocument = ptr::null_mut();
420            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
421
422            let mut out_str: *mut c_char = ptr::null_mut();
423            let result = hedl_canonicalize(doc, &mut out_str);
424
425            assert_eq!(result, HEDL_OK);
426            assert!(!out_str.is_null());
427
428            hedl_free_string(out_str);
429            hedl_free_document(doc);
430        }
431    }
432
433    #[cfg(feature = "json")]
434    #[test]
435    fn test_to_json() {
436        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
437        // `doc` is a valid mutable pointer for receiving document handle.
438        // SAFETY: FFI function requires raw pointer for output parameter
439        unsafe {
440            let mut doc: *mut HedlDocument = ptr::null_mut();
441            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
442
443            let mut out_str: *mut c_char = ptr::null_mut();
444            let result = hedl_to_json(doc, 0, &mut out_str);
445
446            assert_eq!(result, HEDL_OK);
447            assert!(!out_str.is_null());
448
449            let json = CStr::from_ptr(out_str).to_str().unwrap();
450            assert!(json.contains("key"));
451
452            hedl_free_string(out_str);
453            hedl_free_document(doc);
454        }
455    }
456
457    #[cfg(feature = "yaml")]
458    #[test]
459    fn test_to_yaml() {
460        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
461        // `doc` is a valid mutable pointer for receiving document handle.
462        // SAFETY: FFI function requires raw pointer for output parameter
463        unsafe {
464            let mut doc: *mut HedlDocument = ptr::null_mut();
465            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
466
467            let mut out_str: *mut c_char = ptr::null_mut();
468            let result = hedl_to_yaml(doc, 0, &mut out_str);
469
470            assert_eq!(result, HEDL_OK);
471            assert!(!out_str.is_null());
472
473            let yaml = CStr::from_ptr(out_str).to_str().unwrap();
474            assert!(yaml.contains("key"));
475
476            hedl_free_string(out_str);
477            hedl_free_document(doc);
478        }
479    }
480
481    #[cfg(feature = "xml")]
482    #[test]
483    fn test_to_xml() {
484        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
485        // `doc` is a valid mutable pointer for receiving document handle.
486        // SAFETY: FFI function requires raw pointer for output parameter
487        unsafe {
488            let mut doc: *mut HedlDocument = ptr::null_mut();
489            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
490
491            let mut out_str: *mut c_char = ptr::null_mut();
492            let result = hedl_to_xml(doc, &mut out_str);
493
494            assert_eq!(result, HEDL_OK);
495            assert!(!out_str.is_null());
496
497            let xml = CStr::from_ptr(out_str).to_str().unwrap();
498            assert!(xml.contains("<?xml"));
499
500            hedl_free_string(out_str);
501            hedl_free_document(doc);
502        }
503    }
504
505    #[test]
506    fn test_lint() {
507        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
508        // `doc` is a valid mutable pointer for receiving document handle.
509        // SAFETY: FFI function requires raw pointer for output parameter
510        unsafe {
511            let mut doc: *mut HedlDocument = ptr::null_mut();
512            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
513
514            let mut diag: *mut HedlDiagnostics = ptr::null_mut();
515            let result = hedl_lint(doc, &mut diag);
516
517            assert_eq!(result, HEDL_OK);
518            assert!(!diag.is_null());
519
520            let count = hedl_diagnostics_count(diag);
521            assert!(count >= 0);
522
523            hedl_free_diagnostics(diag);
524            hedl_free_document(doc);
525        }
526    }
527
528    #[cfg(feature = "json")]
529    #[test]
530    fn test_from_json_roundtrip() {
531        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
532        // `doc` is a valid mutable pointer for receiving document handle.
533        // SAFETY: FFI function requires raw pointer for output parameter
534        unsafe {
535            // Parse original HEDL
536            let mut doc1: *mut HedlDocument = ptr::null_mut();
537            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc1);
538
539            // Convert to JSON
540            let mut json_str: *mut c_char = ptr::null_mut();
541            hedl_to_json(doc1, 1, &mut json_str);
542
543            // Parse JSON back to HEDL
544            let mut doc2: *mut HedlDocument = ptr::null_mut();
545            let result = hedl_from_json(json_str, -1, &mut doc2);
546
547            assert_eq!(result, HEDL_OK);
548            assert!(!doc2.is_null());
549
550            hedl_free_string(json_str);
551            hedl_free_document(doc1);
552            hedl_free_document(doc2);
553        }
554    }
555
556    #[cfg(feature = "neo4j")]
557    #[test]
558    fn test_to_neo4j_cypher() {
559        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
560        // `doc` is a valid mutable pointer for receiving document handle.
561        // SAFETY: FFI function requires raw pointer for output parameter
562        unsafe {
563            let mut doc: *mut HedlDocument = ptr::null_mut();
564            hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
565
566            let mut out_str: *mut c_char = ptr::null_mut();
567            let result = hedl_to_neo4j_cypher(doc, 1, &mut out_str);
568
569            assert_eq!(result, HEDL_OK);
570            assert!(!out_str.is_null());
571
572            // The simple key: value doc doesn't produce Cypher nodes,
573            // but the function should succeed
574            hedl_free_string(out_str);
575            hedl_free_document(doc);
576        }
577    }
578
579    #[test]
580    fn test_list_value_support() {
581        const LIST_HEDL: &[u8] = b"%VERSION: 1.1
582%STRUCT: User: [id, roles]
583---
584id: alice, roles: (admin, editor, viewer)
585\0";
586
587        // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
588        // `doc` is a valid mutable pointer for receiving document handle.
589        // SAFETY: FFI function requires raw pointer for output parameter
590        unsafe {
591            let mut doc: *mut HedlDocument = ptr::null_mut();
592            let result = hedl_parse(LIST_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
593
594            assert_eq!(result, HEDL_OK);
595            assert!(!doc.is_null());
596
597            // Document should parse successfully with list values
598            let root_count = hedl_root_item_count(doc);
599            assert_eq!(root_count, 1);
600
601            hedl_free_document(doc);
602        }
603    }
604}