use std::ops::Range;
use std::sync::Arc;
use reflexo_typst::debug_loc::SourceSpanOffset;
use serde::{Deserialize, Serialize};
use tinymist_std::typst::TypstDocument;
use tinymist_task::TextExport;
use typst::{syntax::Span, text::TextItem};
use unicode_script::{Script, UnicodeScript};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct WordsCount {
pub words: usize,
pub chars: usize,
pub spaces: usize,
pub cjk_chars: usize,
}
pub fn word_count(doc: &TypstDocument) -> WordsCount {
let _ = TextContent::map_back_spans;
let mut words = 0;
let mut chars = 0;
let mut cjk_chars = 0;
let mut spaces = 0;
let content = TextExport::run_on_doc(doc).unwrap_or_default();
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CountState {
InSpace,
InNonCJK,
InCJK,
}
fn is_cjk(c: char) -> bool {
matches!(
c.script(),
Script::Han | Script::Hiragana | Script::Katakana | Script::Hangul
)
}
let mut state = CountState::InSpace;
for c in content.chars() {
chars += 1;
if c.is_whitespace() {
if state != CountState::InSpace {
spaces += 1;
}
state = CountState::InSpace;
continue;
}
if is_cjk(c) {
words += 1;
cjk_chars += 1;
state = CountState::InCJK;
} else {
if state != CountState::InNonCJK {
words += 1;
}
state = CountState::InNonCJK;
}
}
WordsCount {
words,
chars,
spaces,
cjk_chars,
}
}
#[derive(Debug, Clone)]
pub struct MappedSpan {
pub span: SourceSpanOffset,
pub span_end: Option<SourceSpanOffset>,
pub completed: bool,
}
#[derive(Debug, Clone)]
pub struct TextContent {
pub content: String,
pub doc: Arc<TypstDocument>,
}
impl TextContent {
pub fn map_back_spans(
&self,
mut spans: Vec<std::ops::Range<usize>>,
) -> Vec<Option<MappedSpan>> {
spans.sort_by_key(|r| r.start);
let mut mapper = SpanMapper::default();
mapper.doc(&self.doc);
let mut offsets = mapper.span_offset;
while spans.len() < offsets.len() {
offsets.pop();
}
while spans.len() > offsets.len() {
offsets.push(None);
}
offsets
}
}
#[derive(Debug, Clone, Default)]
struct SpanMapper {
offset: usize,
spans_to_map: Vec<std::ops::Range<usize>>,
span_offset: Vec<Option<MappedSpan>>,
}
impl SpanMapper {
fn doc(&mut self, doc: &TypstDocument) {
match doc {
TypstDocument::Paged(paged_doc) => {
for page in paged_doc.pages.iter() {
self.frame(&page.frame);
}
}
TypstDocument::Html(..) => {}
}
}
fn frame(&mut self, doc: &typst::layout::Frame) {
for (_, item) in doc.items() {
self.item(item);
}
}
fn item(&mut self, item: &typst::layout::FrameItem) {
use typst::layout::FrameItem::*;
match item {
Group(g) => self.frame(&g.frame),
Text(t) => {
self.check(t.text.as_str(), Some(t));
}
Link(..) | Shape(..) | Image(..) => {
self.check("object", None);
}
Tag(..) => {}
}
}
fn check(&mut self, text: &str, src: Option<&TextItem>) {
if let Some(src) = src {
self.do_check(src);
}
self.offset += text.len();
}
fn do_check(&mut self, text: &TextItem) -> Option<()> {
let beg = self.offset;
let end = beg + text.text.len();
loop {
let so = self.span_offset.len();
let to_check = self.spans_to_map.get(so)?;
if to_check.start >= end {
return Some(());
}
if to_check.end <= beg {
self.span_offset.push(None);
log::info!("span out of range {to_check:?}");
continue;
}
if to_check.start < beg {
self.span_offset.push(None);
log::info!("span skipped {to_check:?}");
continue;
}
log::info!("span checking {to_check:?} in {text:?}");
let inner = to_check.start - beg;
self.span_offset
.push(self.check_text_inner(text, inner..inner + (to_check.len())));
}
}
fn check_text_inner(&self, text: &TextItem, rng: std::ops::Range<usize>) -> Option<MappedSpan> {
let glyphs = text
.glyphs
.iter()
.filter(|g| rng.contains(&g.range().start));
let mut min_span: Option<(Range<usize>, (Span, u16))> = None;
let mut max_span: Option<(Range<usize>, (Span, u16))> = None;
let mut found = vec![];
for glyph in glyphs {
found.push(glyph.range());
if let Some((mii, s)) = min_span.as_ref() {
if glyph.range().start < mii.start && !s.0.is_detached() {
min_span = Some((glyph.range(), glyph.span));
}
} else {
min_span = Some((glyph.range(), glyph.span));
}
if let Some((mai, s)) = max_span.as_ref() {
if glyph.range().end > mai.end && !s.0.is_detached() {
max_span = Some((glyph.range(), glyph.span));
}
} else {
max_span = Some((glyph.range(), glyph.span));
}
}
found.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end)));
let completed = !found.is_empty()
&& found[0].start <= rng.start
&& found[found.len() - 1].end >= rng.end;
let span = min_span?.1 .0;
let span_end = max_span.map(|m| m.1 .0);
Some(MappedSpan {
span: SourceSpanOffset::from(span),
span_end: span_end.map(SourceSpanOffset::from),
completed,
})
}
}