Skip to main content

normalize

Function normalize 

Source
pub fn normalize(text: &str) -> String
Expand description

Normalise Thai text into canonical form.

Returns an owned String with both transformations applied. ASCII and non-Thai characters are passed through unchanged.

§Examples

use kham_core::normalizer::normalize;

// 1. วรรณยุกต์ dedup: double tone mark → single (keep last)
let doubled = "\u{0E01}\u{0E48}\u{0E49}"; // ก + อ่ + อ้
let fixed = normalize(doubled);
assert_eq!(fixed, "\u{0E01}\u{0E49}"); // ก้ only
use kham_core::normalizer::normalize;

// 2. Sara Am composition: nikhahit + sara aa → sara am
let decomposed = "\u{0E01}\u{0E4D}\u{0E32}"; // ก + อํ + อา
let fixed = normalize(decomposed);
assert_eq!(fixed, "\u{0E01}\u{0E33}"); // กำ