use super::prelude::*;
use crate::Tokenization as RustTokenization;
use crate::parsing::ExtractedToken as RustExtractedToken;
use crate::utf16::Utf16IndexMap;
use self_cell::self_cell;
use std::sync::Arc;
self_cell!(
struct TokenizationInner {
owner: String,
#[covariant]
dependent: RustTokenization,
}
impl {Debug}
);
#[wasm_bindgen]
#[derive(Debug)]
pub struct Tokenization {
inner: Arc<TokenizationInner>,
}
#[wasm_bindgen]
impl Tokenization {
#[inline]
pub(crate) fn get(&self) -> &RustTokenization<'_> {
self.inner.borrow_dependent()
}
#[wasm_bindgen]
pub fn copy(&self) -> Tokenization {
Tokenization {
inner: Arc::clone(&self.inner),
}
}
#[wasm_bindgen]
pub fn text(&self) -> String {
self.inner.borrow_owner().clone()
}
#[wasm_bindgen]
pub fn tokens(&self) -> Result<JsValue, JsValue> {
self.inner
.with_dependent(|_, inner| rust_to_js!(convert_tokens_utf16(inner)))
}
}
#[wasm_bindgen]
pub fn tokenize(text: String) -> Tokenization {
let inner = TokenizationInner::new(text, |text: &String| crate::tokenize(text));
Tokenization {
inner: Arc::new(inner),
}
}
fn convert_tokens_utf16<'a>(
tokenization: &'a RustTokenization,
) -> Vec<RustExtractedToken<'a>> {
let full_text = tokenization.full_text().inner();
let utf16_map = Utf16IndexMap::new(full_text);
tokenization
.tokens()
.iter()
.map(|token| token.to_utf16_indices(&utf16_map))
.collect()
}