Skip to main content

dashem/
lib.rs

1//! Enterprise-Grade Em-Dash Removal Library for Rust
2//!
3//! This crate provides safe Rust bindings to the high-performance C library
4//! for removing em-dashes (U+2014) from UTF-8 encoded strings.
5//!
6//! # Examples
7//!
8//! ```ignore
9//! // The library name is 'dashem', not 'dash-em' (due to Cargo naming)
10//! let result = dashem::remove("Hello—world").unwrap();
11//! assert_eq!(result, "Helloworld");
12//! ```
13
14#[link(name = "dashem", kind = "static")]
15unsafe extern "C" {
16    fn dashem_remove(
17        input: *const u8,
18        input_len: usize,
19        output: *mut u8,
20        output_capacity: usize,
21        output_len: *mut usize,
22    ) -> i32;
23
24    fn dashem_version() -> *const u8;
25    fn dashem_implementation_name() -> *const u8;
26    fn dashem_detect_cpu_features() -> u32;
27}
28
29/// Removes all em-dashes (U+2014) from the input string.
30///
31/// # Examples
32///
33/// ```ignore
34/// let result = dashem::remove("Hello—world").unwrap();
35/// assert_eq!(result, "Helloworld");
36/// ```
37///
38/// # Errors
39///
40/// Returns an error if the input is invalid or the operation fails.
41pub fn remove(input: &str) -> Result<String, String> {
42    let input_bytes = input.as_bytes();
43    let mut output = vec![0u8; input_bytes.len()];
44    let mut output_len = 0usize;
45
46    let result = unsafe {
47        dashem_remove(
48            input_bytes.as_ptr(),
49            input_bytes.len(),
50            output.as_mut_ptr(),
51            output.len(),
52            &mut output_len,
53        )
54    };
55
56    if result != 0 {
57        return Err(format!("dashem_remove failed with code {}", result));
58    }
59
60    output.truncate(output_len);
61    Ok(String::from_utf8(output)
62        .map_err(|e| format!("Invalid UTF-8 in output: {}", e))?)
63}
64
65/// Returns the library version string.
66pub fn version() -> String {
67    unsafe {
68        let ptr = dashem_version();
69        std::ffi::CStr::from_ptr(ptr as *const i8)
70            .to_string_lossy()
71            .to_string()
72    }
73}
74
75/// Returns the name of the active implementation (e.g., "AVX2", "SSE4.2", "Scalar").
76pub fn implementation_name() -> String {
77    unsafe {
78        let ptr = dashem_implementation_name();
79        std::ffi::CStr::from_ptr(ptr as *const i8)
80            .to_string_lossy()
81            .to_string()
82    }
83}
84
85/// Detects available CPU features.
86pub fn detect_cpu_features() -> u32 {
87    unsafe { dashem_detect_cpu_features() }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn test_remove_single_emdash() {
96        let result = remove("Hello—world").unwrap();
97        assert_eq!(result, "Helloworld");
98    }
99
100    #[test]
101    fn test_remove_multiple_emdashes() {
102        let result = remove("First—second—third").unwrap();
103        assert_eq!(result, "Firstsecondthird");
104    }
105
106    #[test]
107    fn test_remove_no_emdashes() {
108        let input = "Hello, world!";
109        let result = remove(input).unwrap();
110        assert_eq!(result, input);
111    }
112
113    #[test]
114    fn test_version() {
115        let ver = version();
116        assert!(!ver.is_empty());
117    }
118
119    #[test]
120    fn test_implementation_name() {
121        let name = implementation_name();
122        assert!(!name.is_empty());
123    }
124}