1#![deny(clippy::all)]
29
30use napi::bindgen_prelude::*;
31use napi_derive::napi;
32use serde::Deserialize;
33
34use gukhanmun::fst::FstDictionary;
35use gukhanmun::html::HtmlElementInfo;
36use gukhanmun::markdown::MarkdownVariant;
37use gukhanmun::{
38 Builder, ContextWindow, Converter, DirectiveAction, HomophoneDetection, NumeralStrategy,
39 OriginalGloss, Preset, Recovery, RenderMode, RenderOptions, RubyBase, SegmentationStrategy,
40};
41
42#[derive(Deserialize, Default)]
45#[serde(rename_all = "camelCase")]
46struct JsOptions {
47 preset: Option<String>,
48 rendering: Option<String>,
49 original_gloss: Option<String>,
50 segmentation: Option<String>,
51 numerals: Option<String>,
52 initial_sound_law: Option<bool>,
53 homophone_window: Option<String>,
54 homophone_detection: Option<String>,
55 first_occurrence_window: Option<String>,
56 collapse_redundant_parens: Option<bool>,
57 recovery: Option<String>,
58 directives: Option<JsDirectives>,
59 html: Option<JsHtmlOptions>,
60}
61
62#[derive(Deserialize, Default)]
63#[serde(rename_all = "camelCase")]
64struct JsDirectives {
65 #[serde(default)]
66 require_hanja: Vec<String>,
67 #[serde(default)]
68 require_hangul: Vec<String>,
69 #[serde(default)]
70 skip_annotation: Vec<String>,
71}
72
73#[derive(Deserialize, Default)]
74#[serde(rename_all = "camelCase")]
75struct JsHtmlOptions {
76 #[serde(default)]
77 preserve_classes: Vec<String>,
78 #[serde(default)]
79 preserve_attributes: Vec<JsPreserveAttr>,
80}
81
82#[derive(Deserialize)]
83#[serde(untagged)]
84enum JsPreserveAttr {
85 Name(String),
86 NameValue { name: String, value: Option<String> },
87}
88
89#[napi(object)]
93pub struct RawDictInput {
94 pub format: String,
96 pub bytes: Buffer,
98}
99
100#[derive(Deserialize, Clone)]
103#[serde(untagged)]
104enum StreamFormatJson {
105 MarkdownObj { format: String, gfm: Option<bool> },
107}
108
109#[derive(Clone, Copy)]
110enum StreamFormat {
111 Text,
112 Html,
113 Markdown { gfm: bool },
114}
115
116#[napi]
124pub struct NapiGukhanmun {
125 converter: Converter<'static>,
126}
127
128unsafe impl Send for NapiGukhanmun {}
131unsafe impl Sync for NapiGukhanmun {}
132
133#[napi]
134impl NapiGukhanmun {
135 #[napi(factory)]
146 pub fn load(
147 options_json: Option<String>,
148 dictionaries: Option<Vec<RawDictInput>>,
149 ) -> napi::Result<NapiGukhanmun> {
150 let opts: JsOptions = match options_json.as_deref() {
151 None | Some("null") | Some("undefined") => JsOptions::default(),
152 Some(json) => {
153 serde_json::from_str(json).map_err(|e| napi_err("invalid-input", &e.to_string()))?
154 }
155 };
156
157 let preset = parse_preset(opts.preset.as_deref().unwrap_or("ko-kr"))?;
158 let mut builder = Builder::with_preset(preset).no_bundled_dictionaries();
159
160 if let Some(r) = &opts.rendering {
161 let mode = parse_render_mode(r, opts.original_gloss.as_deref())?;
162 builder = builder.rendering(mode);
163 }
164 if let Some(s) = &opts.segmentation {
165 builder = builder.segmentation(parse_segmentation(s)?);
166 }
167 if let Some(n) = &opts.numerals {
168 builder = builder.numerals(parse_numerals(n)?);
169 }
170 if let Some(v) = opts.initial_sound_law {
171 builder = builder.initial_sound_law(v);
172 }
173 if let Some(w) = &opts.homophone_window {
174 builder = builder.homophone_window(parse_context_window(w)?);
175 }
176 if let Some(d) = &opts.homophone_detection {
177 builder = builder.homophone_detection(parse_homophone_detection(d)?);
178 }
179 if let Some(w) = &opts.first_occurrence_window {
180 builder = builder.first_occurrence_window(parse_context_window(w)?);
181 }
182 if let Some(v) = opts.collapse_redundant_parens {
183 builder = builder.collapse_redundant_parens(v);
184 }
185 if let Some(r) = &opts.recovery {
186 builder = builder.recovery(parse_recovery(r)?);
187 }
188
189 if let Some(dirs) = opts.directives {
190 for h in dirs.require_hanja {
191 builder = builder.directive(h, DirectiveAction::RequireHanja);
192 }
193 for h in dirs.require_hangul {
194 builder = builder.directive(h, DirectiveAction::RequireHangul);
195 }
196 for h in dirs.skip_annotation {
197 builder = builder.directive(h, DirectiveAction::SkipAnnotation);
198 }
199 }
200
201 if let Some(html_opts) = opts.html {
202 let classes = html_opts.preserve_classes;
203 let attrs = html_opts.preserve_attributes;
204 builder = builder.html_preserve_when(move |info: &HtmlElementInfo<'_>| {
205 for cls in &classes {
206 if has_class(info.raw_attributes, cls) {
207 return true;
208 }
209 }
210 for attr in &attrs {
211 match attr {
212 JsPreserveAttr::Name(name) => {
213 if has_attribute(info.raw_attributes, name, None) {
214 return true;
215 }
216 }
217 JsPreserveAttr::NameValue { name, value } => {
218 if has_attribute(info.raw_attributes, name, value.as_deref()) {
219 return true;
220 }
221 }
222 }
223 }
224 false
225 });
226 }
227
228 for dict in dictionaries.unwrap_or_default() {
229 let bytes = dict.bytes.as_ref();
230 match dict.format.as_str() {
231 "fst" => {
232 let d = FstDictionary::from_bytes(bytes)
233 .map_err(|e| napi_err("dictionary-load", &e.to_string()))?;
234 builder = builder.push_dictionary(d);
235 }
236 "cdb" => {
237 use gukhanmun::cdb::CdbDictionary;
238 let d = CdbDictionary::from_bytes(bytes)
239 .map_err(|e| napi_err("dictionary-load", &e.to_string()))?;
240 builder = builder.push_dictionary(d);
241 }
242 other => {
243 return Err(napi_err(
244 "unsupported-content-type",
245 &format!("unknown dictionary format: {other}"),
246 ));
247 }
248 }
249 }
250
251 let converter = builder.build().map_err(|e| map_gukhanmun_error(&e))?;
252
253 Ok(NapiGukhanmun { converter })
254 }
255
256 #[napi]
264 pub fn convert(&self, source: String, format_json: Option<String>) -> napi::Result<String> {
265 let fmt = parse_format_json(format_json.as_deref())?;
266 convert_with_format(&self.converter, &source, fmt)
267 }
268
269 #[napi]
276 pub fn open_stream(&self, format_json: Option<String>) -> napi::Result<External<StreamState>> {
277 let fmt = parse_format_json(format_json.as_deref())?;
278 Ok(External::new(StreamState {
279 buffer: String::new(),
280 format: fmt,
281 }))
282 }
283
284 #[napi]
287 pub fn stream_push(
288 &self,
289 stream: &mut External<StreamState>,
290 chunk: String,
291 ) -> napi::Result<String> {
292 stream.buffer.push_str(&chunk);
293 Ok(String::new())
294 }
295
296 #[napi]
300 pub fn stream_finish(&self, stream: &mut External<StreamState>) -> napi::Result<String> {
301 let result = convert_with_format(&self.converter, &stream.buffer, stream.format)?;
302 stream.buffer.clear();
303 Ok(result)
304 }
305}
306
307pub struct StreamState {
311 buffer: String,
312 format: StreamFormat,
313}
314
315fn convert_with_format(
318 converter: &Converter<'static>,
319 source: &str,
320 fmt: StreamFormat,
321) -> napi::Result<String> {
322 match fmt {
323 StreamFormat::Text => converter
324 .convert_text_to_string(source)
325 .map_err(|e| map_gukhanmun_error(&e)),
326 StreamFormat::Html => converter
327 .convert_html_fragment_to_string(source)
328 .map_err(|e| map_gukhanmun_error(&e)),
329 StreamFormat::Markdown { gfm } => {
330 let variant = if gfm {
331 MarkdownVariant::Gfm
332 } else {
333 MarkdownVariant::CommonMark
334 };
335 converter
336 .convert_markdown_to_string(source, variant)
337 .map_err(|e| map_gukhanmun_error(&e))
338 }
339 }
340}
341
342fn parse_format_json(json: Option<&str>) -> napi::Result<StreamFormat> {
343 match json {
344 None | Some("null") | Some("undefined") => return Ok(StreamFormat::Text),
345 _ => {}
346 }
347 let raw = json.unwrap();
348 if let Ok(s) = serde_json::from_str::<String>(raw) {
350 return match s.as_str() {
351 "text" => Ok(StreamFormat::Text),
352 "html" => Ok(StreamFormat::Html),
353 "markdown" => Ok(StreamFormat::Markdown { gfm: false }),
354 other => Err(napi_err(
355 "unsupported-content-type",
356 &format!("unknown format: {other}"),
357 )),
358 };
359 }
360 if let Ok(obj) = serde_json::from_str::<StreamFormatJson>(raw) {
362 let StreamFormatJson::MarkdownObj { format, gfm } = obj;
363 if format == "markdown" {
364 return Ok(StreamFormat::Markdown {
365 gfm: gfm.unwrap_or(false),
366 });
367 }
368 return Err(napi_err(
369 "unsupported-content-type",
370 &format!("unknown format in object: {format}"),
371 ));
372 }
373 Err(napi_err("unsupported-content-type", "invalid format value"))
374}
375
376fn parse_preset(s: &str) -> napi::Result<Preset> {
377 match s {
378 "ko-kr" => Ok(Preset::KoKr),
379 "ko-kp" => Ok(Preset::KoKp),
380 other => Err(napi_err(
381 "invalid-input",
382 &format!("unknown preset: {other}"),
383 )),
384 }
385}
386
387fn parse_render_mode(mode: &str, gloss: Option<&str>) -> napi::Result<RenderOptions> {
388 let render_mode = match mode {
389 "hangul-only" => RenderMode::HangulOnly,
390 "hangul-hanja-parens" => RenderMode::HangulHanjaParens,
391 "hanja-hangul-parens" => RenderMode::HanjaHangulParens,
392 "ruby-on-hangul" => RenderMode::Ruby(RubyBase::OnHangul),
393 "ruby-on-hanja" => RenderMode::Ruby(RubyBase::OnHanja),
394 "original" => RenderMode::Original,
395 other => {
396 return Err(napi_err(
397 "invalid-input",
398 &format!("unknown rendering mode: {other}"),
399 ));
400 }
401 };
402 let original_gloss = if mode == "original" {
403 match gloss.unwrap_or("parens") {
404 "parens" => OriginalGloss::Parens,
405 "ruby" => OriginalGloss::Ruby,
406 other => {
407 return Err(napi_err(
408 "invalid-input",
409 &format!("unknown originalGloss: {other}"),
410 ));
411 }
412 }
413 } else {
414 OriginalGloss::Parens
415 };
416 Ok(RenderOptions {
417 mode: render_mode,
418 original_gloss,
419 })
420}
421
422fn parse_segmentation(s: &str) -> napi::Result<SegmentationStrategy> {
423 match s {
424 "lattice" => Ok(SegmentationStrategy::Lattice),
425 "eager" => Ok(SegmentationStrategy::Eager),
426 other => Err(napi_err(
427 "invalid-input",
428 &format!("unknown segmentation strategy: {other}"),
429 )),
430 }
431}
432
433fn parse_numerals(s: &str) -> napi::Result<NumeralStrategy> {
434 match s {
435 "hangul-phonetic" => Ok(NumeralStrategy::HangulPhonetic),
436 "positional-arabic" => Ok(NumeralStrategy::PositionalArabic),
437 "additive-arabic" => Ok(NumeralStrategy::AdditiveArabic),
438 "smart" => Ok(NumeralStrategy::Smart),
439 other => Err(napi_err(
440 "invalid-input",
441 &format!("unknown numeral strategy: {other}"),
442 )),
443 }
444}
445
446fn parse_context_window(s: &str) -> napi::Result<ContextWindow> {
447 match s {
448 "off" => Ok(ContextWindow::Off),
449 "per-block" => Ok(ContextWindow::PerBlock),
450 "per-section" => Ok(ContextWindow::PerSection),
451 "per-document" => Ok(ContextWindow::PerDocument),
452 other => Err(napi_err(
453 "invalid-input",
454 &format!("unknown context window: {other}"),
455 )),
456 }
457}
458
459fn parse_homophone_detection(s: &str) -> napi::Result<HomophoneDetection> {
460 match s {
461 "context-local" => Ok(HomophoneDetection::ContextLocal),
462 "dictionary-wide" => Ok(HomophoneDetection::DictionaryWide),
463 other => Err(napi_err(
464 "invalid-input",
465 &format!("unknown homophone detection: {other}"),
466 )),
467 }
468}
469
470fn parse_recovery(s: &str) -> napi::Result<Recovery> {
471 match s {
472 "strict" => Ok(Recovery::Strict),
473 "lenient" => Ok(Recovery::Lenient),
474 other => Err(napi_err(
475 "invalid-input",
476 &format!("unknown recovery policy: {other}"),
477 )),
478 }
479}
480
481fn napi_err(code: &str, message: &str) -> napi::Error {
484 let reason = serde_json::json!({
485 "code": code,
486 "message": message,
487 "chain": []
488 })
489 .to_string();
490 napi::Error::from_reason(reason)
491}
492
493fn map_gukhanmun_error(e: &gukhanmun::Error) -> napi::Error {
494 use gukhanmun::Error;
495 use std::error::Error as StdError;
496 let code = match e {
497 Error::Core(_) => "segmentation",
498 Error::Html(_) => "html-scan",
499 Error::Markdown(_) => "markdown",
500 Error::Fst(_) => "dictionary-load",
501 Error::Cdb(_) => "dictionary-load",
502 Error::Io(_) => "io",
503 Error::Config(_) => "invalid-input",
504 _ => "internal",
505 };
506 let mut chain: Vec<serde_json::Value> = Vec::new();
507 let mut src: Option<&(dyn StdError + 'static)> = e.source();
508 while let Some(s) = src {
509 chain.push(serde_json::json!({ "code": "internal", "message": s.to_string() }));
510 src = s.source();
511 }
512 chain.reverse();
513 let reason = serde_json::json!({
514 "code": code,
515 "message": e.to_string(),
516 "chain": chain,
517 })
518 .to_string();
519 napi::Error::from_reason(reason)
520}
521
522struct AttrIter<'a> {
527 raw: &'a str,
528 pos: usize,
529}
530
531impl<'a> AttrIter<'a> {
532 fn new(raw: &'a str) -> Self {
533 Self { raw, pos: 0 }
534 }
535}
536
537impl<'a> Iterator for AttrIter<'a> {
538 type Item = (&'a str, Option<&'a str>);
539
540 fn next(&mut self) -> Option<Self::Item> {
541 let bytes = self.raw.as_bytes();
542 loop {
543 while self.pos < bytes.len() && bytes[self.pos].is_ascii_whitespace() {
544 self.pos += 1;
545 }
546 if self.pos >= bytes.len() {
547 return None;
548 }
549 let name_start = self.pos;
550 while self.pos < bytes.len()
551 && (bytes[self.pos].is_ascii_alphanumeric()
552 || matches!(bytes[self.pos], b'-' | b':' | b'_' | b'.'))
553 {
554 self.pos += 1;
555 }
556 if self.pos == name_start {
557 self.pos += 1;
558 continue;
559 }
560 let name = &self.raw[name_start..self.pos];
561 while self.pos < bytes.len() && bytes[self.pos].is_ascii_whitespace() {
562 self.pos += 1;
563 }
564 if bytes.get(self.pos) != Some(&b'=') {
565 return Some((name, None));
566 }
567 self.pos += 1;
568 while self.pos < bytes.len() && bytes[self.pos].is_ascii_whitespace() {
569 self.pos += 1;
570 }
571 let value = if matches!(bytes.get(self.pos), Some(b'\'' | b'"')) {
572 let quote = bytes[self.pos];
573 self.pos += 1;
574 let value_start = self.pos;
575 while self.pos < bytes.len() && bytes[self.pos] != quote {
576 self.pos += 1;
577 }
578 let v = &self.raw[value_start..self.pos];
579 if self.pos < bytes.len() {
580 self.pos += 1;
581 }
582 v
583 } else {
584 let value_start = self.pos;
585 while self.pos < bytes.len() && !bytes[self.pos].is_ascii_whitespace() {
586 self.pos += 1;
587 }
588 &self.raw[value_start..self.pos]
589 };
590 return Some((name, Some(value)));
591 }
592 }
593}
594
595fn decode_attr_value(raw: &str) -> String {
599 let mut out = String::with_capacity(raw.len());
600 let bytes = raw.as_bytes();
601 let mut i = 0;
602 while i < bytes.len() {
603 if bytes[i] != b'&' {
604 let next = raw[i..].find('&').map_or(raw.len(), |off| i + off);
605 out.push_str(&raw[i..next]);
606 i = next;
607 continue;
608 }
609 if let Some(semi_rel) = raw[i + 1..].find(';') {
610 let semi = i + 1 + semi_rel;
611 let reference = &raw[i + 1..semi];
612 let ch: Option<char> = match reference {
613 "amp" => Some('&'),
614 "lt" => Some('<'),
615 "gt" => Some('>'),
616 "quot" => Some('"'),
617 "apos" => Some('\''),
618 _ if reference.starts_with('#') => {
619 let digits = &reference[1..];
620 let code = if let Some(hex) = digits.strip_prefix(['x', 'X']) {
621 u32::from_str_radix(hex, 16).ok()
622 } else {
623 digits.parse::<u32>().ok()
624 };
625 code.and_then(char::from_u32)
626 }
627 _ => None,
628 };
629 if let Some(c) = ch {
630 out.push(c);
631 i = semi + 1;
632 } else {
633 out.push_str(&raw[i..=semi]);
634 i = semi + 1;
635 }
636 } else {
637 out.push_str(&raw[i..]);
638 break;
639 }
640 }
641 out
642}
643
644fn has_class(raw_attributes: &str, class_name: &str) -> bool {
649 for (name, value) in AttrIter::new(raw_attributes) {
650 if name.eq_ignore_ascii_case("class") {
651 let raw = value.unwrap_or("");
652 let decoded = decode_attr_value(raw);
653 return decoded
654 .split_ascii_whitespace()
655 .any(|tok| tok == class_name);
656 }
657 }
658 false
659}
660
661fn has_attribute(raw_attributes: &str, attr_name: &str, attr_value: Option<&str>) -> bool {
666 for (name, value) in AttrIter::new(raw_attributes) {
667 if name.eq_ignore_ascii_case(attr_name) {
668 return match attr_value {
669 None => true,
670 Some(required) => match value {
671 None => false,
672 Some(raw) => decode_attr_value(raw) == required,
673 },
674 };
675 }
676 }
677 false
678}