hedl_ffi/lib.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! HEDL FFI Bindings
19//!
20//! Provides C-compatible interface for HEDL operations.
21//! All functions use C-style error handling with return codes.
22//!
23//! # Memory Management
24//!
25//! **IMPORTANT:** Memory ownership follows strict rules:
26//!
27//! - Strings returned by `hedl_*` functions MUST be freed with `hedl_free_string`
28//! - Byte arrays returned by `hedl_to_parquet` MUST be freed with `hedl_free_bytes`
29//! - Documents MUST be freed with `hedl_free_document`
30//! - Diagnostics MUST be freed with `hedl_free_diagnostics`
31//!
32//! **WARNING - Memory Safety Requirements:**
33//!
34//! The `hedl_free_*` functions ONLY accept pointers that were allocated by HEDL
35//! functions. Passing any of the following will cause undefined behavior:
36//!
37//! - Pointers from `malloc`/`calloc`/`realloc` (wrong allocator)
38//! - Stack-allocated variables
39//! - Already-freed pointers (double free)
40//! - Pointers from other libraries
41//! - NULL is safe and will be ignored
42//!
43//! # Thread Safety
44//!
45//! ## Error Handling Thread Safety
46//!
47//! Error messages are stored in **thread-local storage**, providing lock-free,
48//! wait-free error handling for multi-threaded applications.
49//!
50//! **Key Guarantees:**
51//! - Each thread maintains its own independent error state
52//! - `hedl_get_last_error()` / `hedl_get_last_error_threadsafe()` returns the
53//! error for the CALLING thread only
54//! - Errors from one thread will NEVER appear in or overwrite errors in another thread
55//! - No mutexes, locks, or other synchronization primitives are required
56//! - Zero contention between threads accessing error messages
57//! - You MUST call error functions from the same thread that received the error code
58//!
59//! **Thread-Safe Functions:**
60//! - `hedl_get_last_error()` - Get error for current thread
61//! - `hedl_get_last_error_threadsafe()` - Explicit thread-safe alias
62//! - `hedl_clear_error_threadsafe()` - Clear error for current thread
63//!
64//! **Example (Multi-threaded C with pthreads):**
65//! ```c
66//! void* worker(void* arg) {
67//! const char* input = (const char*)arg;
68//! HedlDocument* doc = NULL;
69//!
70//! if (hedl_parse(input, -1, 0, &doc) != HEDL_OK) {
71//! // Get error for THIS thread - independent of other threads
72//! const char* err = hedl_get_last_error_threadsafe();
73//! fprintf(stderr, "Parse error: %s\n", err);
74//! return NULL;
75//! }
76//!
77//! // Process document...
78//! hedl_free_document(doc);
79//! return (void*)1;
80//! }
81//!
82//! int main() {
83//! pthread_t threads[8];
84//! const char* inputs[8] = { ... };
85//!
86//! // Launch threads - each with independent error state
87//! for (int i = 0; i < 8; i++) {
88//! pthread_create(&threads[i], NULL, worker, (void*)inputs[i]);
89//! }
90//!
91//! for (int i = 0; i < 8; i++) {
92//! pthread_join(threads[i], NULL);
93//! }
94//! }
95//! ```
96//!
97//! ## Document Handle Thread Safety
98//!
99//! Document handles (`HedlDocument*`) are **NOT thread-safe** by design for
100//! performance reasons. Do not share document handles between threads without
101//! external synchronization (mutexes, etc.).
102//!
103//! **Safe Pattern:**
104//! - Each thread creates its own document handles
105//! - Each thread frees its own document handles
106//! - No sharing of document pointers across threads
107//!
108//! **Unsafe Pattern:**
109//! - Passing a `HedlDocument*` to multiple threads (data race)
110//! - Accessing the same document from multiple threads (undefined behavior)
111//!
112//! # Error Handling
113//!
114//! - All functions return error codes (`HEDL_OK` on success)
115//! - Use `hedl_get_last_error` to get the error message for the current thread
116//!
117//! # Security
118//!
119//! ## Poison Pointers
120//!
121//! To detect double-free and use-after-free bugs, this library uses poison pointers:
122//!
123//! - After freeing a document or diagnostics, the internal pointer is checked against
124//! a poison value
125//! - All accessor functions validate that pointers are not poisoned before use
126//! - This provides defense-in-depth against memory safety bugs
127//!
128//! **Note**: Since C passes pointers by value, we cannot modify the caller's pointer
129//! after freeing. However, we can detect if a freed pointer is passed back to us
130//! by checking for the poison value in accessor functions.
131//!
132//! # Audit Logging
133//!
134//! This library provides comprehensive audit logging for all FFI function calls
135//! using the `tracing` crate. The logging system captures:
136//!
137//! - Function entry/exit with timing information
138//! - Sanitized parameters (pointer addresses are masked for security)
139//! - Success/failure outcomes with error details
140//! - Performance metrics (call duration)
141//! - Thread context for correlation
142//!
143//! ## Configuring Logging
144//!
145//! To enable logging, initialize a tracing subscriber in your application:
146//!
147//! ```rust,no_run
148//! use tracing_subscriber::{fmt, EnvFilter};
149//!
150//! // Initialize the tracing subscriber
151//! tracing_subscriber::fmt()
152//! .with_env_filter(
153//! EnvFilter::try_from_default_env()
154//! .unwrap_or_else(|_| EnvFilter::new("info"))
155//! )
156//! .with_target(true)
157//! .with_thread_ids(true)
158//! .with_line_number(true)
159//! .init();
160//!
161//! // Now all FFI calls will be logged
162//! ```
163//!
164//! ## Log Levels
165//!
166//! - `ERROR`: Function failures with error details
167//! - `WARN`: Recoverable errors or unusual conditions
168//! - `INFO`: Function call entry/exit with basic metrics
169//! - `DEBUG`: Detailed parameter information (sanitized)
170//!
171//! ## Environment Variables
172//!
173//! Control logging via the `RUST_LOG` environment variable:
174//!
175//! ```bash
176//! # Log all INFO and above
177//! export RUST_LOG=info
178//!
179//! # Log only FFI audit events
180//! export RUST_LOG=hedl_ffi::audit=debug
181//!
182//! # Log everything at DEBUG level
183//! export RUST_LOG=debug
184//! ```
185//!
186//! ## Example Output
187//!
188//! ```text
189//! 2025-01-05T10:30:45.123Z INFO hedl_ffi::audit: FFI call started function="hedl_parse" thread_id=ThreadId(1) depth=0
190//! 2025-01-05T10:30:45.125Z DEBUG hedl_ffi::audit: FFI call parameters function="hedl_parse" params=[("input_len", "1024")]
191//! 2025-01-05T10:30:45.130Z INFO hedl_ffi::audit: FFI call completed function="hedl_parse" duration_ms=7.2 status="success"
192//! ```
193//!
194//! See the [`audit`] module for more details on the logging implementation.
195
196// =============================================================================
197// Module Declarations
198// =============================================================================
199
200#![cfg_attr(not(test), warn(missing_docs))]
201/// Async FFI operations.
202pub mod async_ops;
203/// FFI audit logging.
204pub mod audit;
205mod conversions;
206mod diagnostics;
207mod error;
208mod ffi_strings;
209mod memory;
210mod operations;
211mod parsing;
212/// Reentrancy guard.
213pub mod reentrancy;
214mod types;
215
216// =============================================================================
217// Re-exports
218// =============================================================================
219
220// Types and error codes
221pub use types::{
222 HedlDiagnostics, HedlDocument, HEDL_ERR_ALLOC, HEDL_ERR_CANCELLED, HEDL_ERR_CANONICALIZE,
223 HEDL_ERR_CSV, HEDL_ERR_INVALID_HANDLE, HEDL_ERR_INVALID_UTF8, HEDL_ERR_JSON, HEDL_ERR_LINT,
224 HEDL_ERR_NEO4J, HEDL_ERR_NULL_PTR, HEDL_ERR_PARQUET, HEDL_ERR_PARSE, HEDL_ERR_QUEUE_FULL,
225 HEDL_ERR_REENTRANT_CALL, HEDL_ERR_TOON, HEDL_ERR_XML, HEDL_ERR_YAML, HEDL_OK,
226};
227
228// Error handling
229pub use error::{hedl_clear_error_threadsafe, hedl_get_last_error, hedl_get_last_error_threadsafe};
230
231// Memory management
232pub use memory::{hedl_free_bytes, hedl_free_diagnostics, hedl_free_document, hedl_free_string};
233
234// Parsing functions
235pub use parsing::{
236 hedl_alias_count, hedl_get_version, hedl_parse, hedl_root_item_count, hedl_schema_count,
237 hedl_validate,
238};
239
240// Operations
241pub use operations::{hedl_canonicalize, hedl_lint};
242
243// Diagnostics
244pub use diagnostics::{hedl_diagnostics_count, hedl_diagnostics_get, hedl_diagnostics_severity};
245
246// Conversion functions (to_*)
247#[cfg(feature = "json")]
248pub use conversions::to_formats::hedl_to_json;
249
250#[cfg(feature = "yaml")]
251pub use conversions::to_formats::hedl_to_yaml;
252
253#[cfg(feature = "xml")]
254pub use conversions::to_formats::hedl_to_xml;
255
256#[cfg(feature = "csv")]
257pub use conversions::to_formats::hedl_to_csv;
258
259#[cfg(feature = "parquet")]
260pub use conversions::to_formats::hedl_to_parquet;
261
262#[cfg(feature = "neo4j")]
263pub use conversions::to_formats::hedl_to_neo4j_cypher;
264
265#[cfg(feature = "toon")]
266pub use conversions::to_formats::hedl_to_toon;
267
268// Zero-copy callback functions (to_*_callback)
269pub use conversions::to_formats_callback::HedlOutputCallback;
270
271#[cfg(feature = "json")]
272pub use conversions::to_formats_callback::hedl_to_json_callback;
273
274#[cfg(feature = "yaml")]
275pub use conversions::to_formats_callback::hedl_to_yaml_callback;
276
277#[cfg(feature = "xml")]
278pub use conversions::to_formats_callback::hedl_to_xml_callback;
279
280#[cfg(feature = "csv")]
281pub use conversions::to_formats_callback::hedl_to_csv_callback;
282
283#[cfg(feature = "neo4j")]
284pub use conversions::to_formats_callback::hedl_to_neo4j_cypher_callback;
285
286pub use conversions::to_formats_callback::hedl_canonicalize_callback;
287
288// Conversion functions (from_*)
289#[cfg(feature = "json")]
290pub use conversions::from_formats::hedl_from_json;
291
292#[cfg(feature = "yaml")]
293pub use conversions::from_formats::hedl_from_yaml;
294
295#[cfg(feature = "xml")]
296pub use conversions::from_formats::hedl_from_xml;
297
298#[cfg(feature = "parquet")]
299pub use conversions::from_formats::hedl_from_parquet;
300
301#[cfg(feature = "toon")]
302pub use conversions::from_formats::hedl_from_toon;
303
304// Async operations
305pub use async_ops::{
306 hedl_async_cancel, hedl_async_free, hedl_canonicalize_async, hedl_lint_async, hedl_parse_async,
307 HedlAsyncOp, HedlCompletionCallback, HedlCompletionCallbackFn,
308};
309
310#[cfg(feature = "json")]
311pub use async_ops::hedl_to_json_async;
312
313#[cfg(feature = "yaml")]
314pub use async_ops::hedl_to_yaml_async;
315
316#[cfg(feature = "xml")]
317pub use async_ops::hedl_to_xml_async;
318
319#[cfg(feature = "csv")]
320pub use async_ops::hedl_to_csv_async;
321
322#[cfg(feature = "neo4j")]
323pub use async_ops::hedl_to_neo4j_cypher_async;
324
325#[cfg(feature = "toon")]
326pub use async_ops::hedl_to_toon_async;
327
328// =============================================================================
329// Tests
330// =============================================================================
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335 #[cfg(any(feature = "json", feature = "yaml", feature = "xml"))]
336 use std::ffi::CStr;
337 use std::os::raw::c_char;
338 use std::ptr;
339
340 const VALID_HEDL: &[u8] = b"%V:2.0\n%NULL:~\n%QUOTE:\"\n---\nkey: value\0";
341 const INVALID_HEDL: &[u8] = b"not valid hedl\0";
342
343 #[test]
344 fn test_parse_and_free() {
345 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
346 // `doc` is a valid mutable pointer for receiving document handle.
347 // SAFETY: FFI function requires raw pointer for output parameter
348 unsafe {
349 let mut doc: *mut HedlDocument = ptr::null_mut();
350 let result = hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 1, &mut doc);
351
352 assert_eq!(result, HEDL_OK);
353 assert!(!doc.is_null());
354
355 hedl_free_document(doc);
356 }
357 }
358
359 #[test]
360 fn test_validate_valid() {
361 // SAFETY: Test constant is valid null-terminated UTF-8 string.
362 unsafe {
363 assert_eq!(
364 hedl_validate(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 1),
365 HEDL_OK
366 );
367 }
368 }
369
370 #[test]
371 fn test_validate_invalid() {
372 // SAFETY: Test constant is valid null-terminated UTF-8 string.
373 unsafe {
374 assert_ne!(
375 hedl_validate(INVALID_HEDL.as_ptr().cast::<c_char>(), -1, 1),
376 HEDL_OK
377 );
378 }
379 }
380
381 #[test]
382 fn test_null_ptr_handling() {
383 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
384 // `doc` is a valid mutable pointer for receiving document handle.
385 // SAFETY: FFI function requires raw pointer for output parameter
386 unsafe {
387 let mut doc: *mut HedlDocument = ptr::null_mut();
388 assert_eq!(hedl_parse(ptr::null(), -1, 0, &mut doc), HEDL_ERR_NULL_PTR);
389 }
390 }
391
392 #[test]
393 fn test_get_version() {
394 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
395 // `doc` is a valid mutable pointer for receiving document handle.
396 // SAFETY: FFI function requires raw pointer for output parameter
397 unsafe {
398 let mut doc: *mut HedlDocument = ptr::null_mut();
399 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
400
401 let mut major: i32 = 0;
402 let mut minor: i32 = 0;
403 let result = hedl_get_version(doc, &mut major, &mut minor);
404
405 assert_eq!(result, HEDL_OK);
406 assert_eq!(major, 2);
407 assert_eq!(minor, 0);
408
409 hedl_free_document(doc);
410 }
411 }
412
413 #[test]
414 fn test_canonicalize() {
415 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
416 // `doc` is a valid mutable pointer for receiving document handle.
417 // SAFETY: FFI function requires raw pointer for output parameter
418 unsafe {
419 let mut doc: *mut HedlDocument = ptr::null_mut();
420 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
421
422 let mut out_str: *mut c_char = ptr::null_mut();
423 let result = hedl_canonicalize(doc, &mut out_str);
424
425 assert_eq!(result, HEDL_OK);
426 assert!(!out_str.is_null());
427
428 hedl_free_string(out_str);
429 hedl_free_document(doc);
430 }
431 }
432
433 #[cfg(feature = "json")]
434 #[test]
435 fn test_to_json() {
436 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
437 // `doc` is a valid mutable pointer for receiving document handle.
438 // SAFETY: FFI function requires raw pointer for output parameter
439 unsafe {
440 let mut doc: *mut HedlDocument = ptr::null_mut();
441 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
442
443 let mut out_str: *mut c_char = ptr::null_mut();
444 let result = hedl_to_json(doc, 0, &mut out_str);
445
446 assert_eq!(result, HEDL_OK);
447 assert!(!out_str.is_null());
448
449 let json = CStr::from_ptr(out_str).to_str().unwrap();
450 assert!(json.contains("key"));
451
452 hedl_free_string(out_str);
453 hedl_free_document(doc);
454 }
455 }
456
457 #[cfg(feature = "yaml")]
458 #[test]
459 fn test_to_yaml() {
460 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
461 // `doc` is a valid mutable pointer for receiving document handle.
462 // SAFETY: FFI function requires raw pointer for output parameter
463 unsafe {
464 let mut doc: *mut HedlDocument = ptr::null_mut();
465 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
466
467 let mut out_str: *mut c_char = ptr::null_mut();
468 let result = hedl_to_yaml(doc, 0, &mut out_str);
469
470 assert_eq!(result, HEDL_OK);
471 assert!(!out_str.is_null());
472
473 let yaml = CStr::from_ptr(out_str).to_str().unwrap();
474 assert!(yaml.contains("key"));
475
476 hedl_free_string(out_str);
477 hedl_free_document(doc);
478 }
479 }
480
481 #[cfg(feature = "xml")]
482 #[test]
483 fn test_to_xml() {
484 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
485 // `doc` is a valid mutable pointer for receiving document handle.
486 // SAFETY: FFI function requires raw pointer for output parameter
487 unsafe {
488 let mut doc: *mut HedlDocument = ptr::null_mut();
489 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
490
491 let mut out_str: *mut c_char = ptr::null_mut();
492 let result = hedl_to_xml(doc, &mut out_str);
493
494 assert_eq!(result, HEDL_OK);
495 assert!(!out_str.is_null());
496
497 let xml = CStr::from_ptr(out_str).to_str().unwrap();
498 assert!(xml.contains("<?xml"));
499
500 hedl_free_string(out_str);
501 hedl_free_document(doc);
502 }
503 }
504
505 #[test]
506 fn test_lint() {
507 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
508 // `doc` is a valid mutable pointer for receiving document handle.
509 // SAFETY: FFI function requires raw pointer for output parameter
510 unsafe {
511 let mut doc: *mut HedlDocument = ptr::null_mut();
512 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
513
514 let mut diag: *mut HedlDiagnostics = ptr::null_mut();
515 let result = hedl_lint(doc, &mut diag);
516
517 assert_eq!(result, HEDL_OK);
518 assert!(!diag.is_null());
519
520 let count = hedl_diagnostics_count(diag);
521 assert!(count >= 0);
522
523 hedl_free_diagnostics(diag);
524 hedl_free_document(doc);
525 }
526 }
527
528 #[cfg(feature = "json")]
529 #[test]
530 fn test_from_json_roundtrip() {
531 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
532 // `doc` is a valid mutable pointer for receiving document handle.
533 // SAFETY: FFI function requires raw pointer for output parameter
534 unsafe {
535 // Parse original HEDL
536 let mut doc1: *mut HedlDocument = ptr::null_mut();
537 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc1);
538
539 // Convert to JSON
540 let mut json_str: *mut c_char = ptr::null_mut();
541 hedl_to_json(doc1, 1, &mut json_str);
542
543 // Parse JSON back to HEDL
544 let mut doc2: *mut HedlDocument = ptr::null_mut();
545 let result = hedl_from_json(json_str, -1, &mut doc2);
546
547 assert_eq!(result, HEDL_OK);
548 assert!(!doc2.is_null());
549
550 hedl_free_string(json_str);
551 hedl_free_document(doc1);
552 hedl_free_document(doc2);
553 }
554 }
555
556 #[cfg(feature = "neo4j")]
557 #[test]
558 fn test_to_neo4j_cypher() {
559 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
560 // `doc` is a valid mutable pointer for receiving document handle.
561 // SAFETY: FFI function requires raw pointer for output parameter
562 unsafe {
563 let mut doc: *mut HedlDocument = ptr::null_mut();
564 hedl_parse(VALID_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
565
566 let mut out_str: *mut c_char = ptr::null_mut();
567 let result = hedl_to_neo4j_cypher(doc, 1, &mut out_str);
568
569 assert_eq!(result, HEDL_OK);
570 assert!(!out_str.is_null());
571
572 // The simple key: value doc doesn't produce Cypher nodes,
573 // but the function should succeed
574 hedl_free_string(out_str);
575 hedl_free_document(doc);
576 }
577 }
578
579 #[test]
580 fn test_list_value_support() {
581 const LIST_HEDL: &[u8] = b"%VERSION: 1.1
582%STRUCT: User: [id, roles]
583---
584id: alice, roles: (admin, editor, viewer)
585\0";
586
587 // SAFETY: Test uses valid null-terminated UTF-8 string from constant.
588 // `doc` is a valid mutable pointer for receiving document handle.
589 // SAFETY: FFI function requires raw pointer for output parameter
590 unsafe {
591 let mut doc: *mut HedlDocument = ptr::null_mut();
592 let result = hedl_parse(LIST_HEDL.as_ptr().cast::<c_char>(), -1, 0, &mut doc);
593
594 assert_eq!(result, HEDL_OK);
595 assert!(!doc.is_null());
596
597 // Document should parse successfully with list values
598 let root_count = hedl_root_item_count(doc);
599 assert_eq!(root_count, 1);
600
601 hedl_free_document(doc);
602 }
603 }
604}