use std::ops::Range;
use std::time::Instant;
use log::{debug, info, trace};
use serde::{Deserialize, Serialize};
use typst::introspection::Tag;
use typst::layout::{Frame, FrameItem, PagedDocument, Point};
use typst::syntax::Span;
use crate::pipeline::BoundIndex;
pub(crate) fn pt_to_px(pt: f64, ppi: f32) -> u32 {
let pixel_per_pt = ppi / 72.0;
(pixel_per_pt * pt as f32).round().max(1.0) as u32
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VisualLine {
pub y_pt: f64, pub y_px: u32, pub md_block_range: Option<Range<usize>>,
pub md_offset: Option<usize>,
}
pub fn extract_visual_lines(document: &PagedDocument, ppi: f32) -> Vec<VisualLine> {
extract_visual_lines_with_map(document, ppi, None)
}
pub fn extract_visual_lines_with_map(
document: &PagedDocument,
ppi: f32,
mapping: Option<&BoundIndex<'_>>,
) -> Vec<VisualLine> {
let start = Instant::now();
if document.pages.is_empty() {
return Vec::new();
}
let mut raw_lines: Vec<(f64, Vec<Span>)> = Vec::new();
collect_visual_lines_structural(&document.pages[0].frame, Point::zero(), &mut raw_lines);
raw_lines.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
let mut deduped: Vec<(f64, Vec<Span>)> = Vec::new();
for (y, spans) in raw_lines {
if deduped
.last()
.is_none_or(|(prev_y, _)| (y - prev_y).abs() > 5.0)
{
deduped.push((y, spans));
} else if let Some(last) = deduped.last_mut() {
last.1.extend(spans);
}
}
let lines: Vec<VisualLine> = deduped
.into_iter()
.enumerate()
.map(|(i, (y_pt, spans))| {
trace!(
"visual_line[{i}]: y={y_pt:.1}pt, {} span candidates",
spans.len()
);
let pos = mapping.and_then(|bi| {
spans
.iter()
.filter(|s| !s.is_detached())
.find_map(|&s| bi.resolve_span(s))
});
VisualLine {
y_pt,
y_px: pt_to_px(y_pt, ppi),
md_block_range: pos.as_ref().map(|p| p.block_range.clone()),
md_offset: pos.as_ref().map(|p| p.offset),
}
})
.collect();
info!(
"visual_line: extract completed in {:.1}ms ({} lines)",
start.elapsed().as_secs_f64() * 1000.0,
lines.len()
);
if let Some(bi) = mapping {
let mapped = lines.iter().filter(|l| l.md_block_range.is_some()).count();
debug!(
"extract_visual_lines: {} lines ({} mapped, {} unmapped)",
lines.len(),
mapped,
lines.len() - mapped
);
for (i, vl) in lines.iter().enumerate() {
if let Some(ref r) = vl.md_block_range {
let s = byte_offset_to_line(bi.md_source(), r.start);
let e = byte_offset_to_line(bi.md_source(), r.end.saturating_sub(1).max(r.start));
let preview: String = bi
.md_source()
.lines()
.nth(s - 1)
.unwrap_or("")
.chars()
.take(60)
.collect();
debug!(" vl[{i}] y={:.1}pt → md L{s}-{e}: {:?}", vl.y_pt, preview);
} else {
debug!(" vl[{i}] y={:.1}pt → (unmapped)", vl.y_pt);
}
}
}
lines
}
pub fn byte_offset_to_line(source: &str, offset: usize) -> usize {
let offset = offset.min(source.len());
source[..offset].bytes().filter(|&b| b == b'\n').count() + 1
}
fn collect_visual_lines_structural(
frame: &Frame,
parent_offset: Point,
out: &mut Vec<(f64, Vec<Span>)>,
) {
let mut pending_texts: Vec<(f64, Span)> = Vec::new();
for (pos, item) in frame.items() {
let abs = parent_offset + *pos;
match item {
FrameItem::Tag(_) | FrameItem::Link(_, _) | FrameItem::Shape(_, _) => {}
FrameItem::Image(_, _, _) => {}
FrameItem::Text(text) => {
if let Some(span) = text.glyphs.first().map(|g| g.span.0) {
pending_texts.push((abs.y.to_pt(), span));
}
}
FrameItem::Group(group) => {
if should_recurse(&group.frame) {
flush_pending_texts(&mut pending_texts, out);
collect_visual_lines_structural(&group.frame, abs, out);
} else {
flush_pending_texts(&mut pending_texts, out);
let baseline_y = find_representative_baseline(&group.frame, abs);
let spans = collect_all_spans_recursive(&group.frame);
if !spans.is_empty() {
out.push((baseline_y, spans));
}
}
}
}
}
flush_pending_texts(&mut pending_texts, out);
}
fn should_recurse(frame: &Frame) -> bool {
has_line_structure(frame) || has_dominant_child_group(frame) || has_raw_line_tags(frame)
}
fn has_line_structure(frame: &Frame) -> bool {
let mut child_groups: Vec<(f64, f64)> = Vec::new(); for (pos, item) in frame.items() {
if let FrameItem::Group(g) = item {
child_groups.push((pos.y.to_pt(), g.frame.size().y.to_pt()));
}
}
if child_groups.len() < 2 {
return false;
}
child_groups.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
child_groups
.windows(2)
.all(|w| w[1].0 >= w[0].0 + w[0].1 - 1.0)
}
fn has_dominant_child_group(frame: &Frame) -> bool {
let parent_h = frame.size().y.to_pt();
if parent_h <= 0.0 {
return false;
}
frame.items().any(|(_, item)| {
if let FrameItem::Group(g) = item {
g.frame.size().y.to_pt() > parent_h * 0.5
} else {
false
}
})
}
fn has_raw_line_tags(frame: &Frame) -> bool {
frame.items().any(|(_, item)| {
if let FrameItem::Tag(Tag::Start(content, _)) = item {
content.elem().name() == "line"
} else {
false
}
})
}
fn find_representative_baseline(frame: &Frame, offset: Point) -> f64 {
for (pos, item) in frame.items() {
let abs_y = offset.y.to_pt() + pos.y.to_pt();
match item {
FrameItem::Text(_) => return abs_y,
FrameItem::Group(g) => {
let child_offset = Point::new(offset.x + pos.x, offset.y + pos.y);
let result = find_representative_baseline_inner(&g.frame, child_offset);
if let Some(y) = result {
return y;
}
}
_ => {}
}
}
offset.y.to_pt()
}
fn find_representative_baseline_inner(frame: &Frame, offset: Point) -> Option<f64> {
for (pos, item) in frame.items() {
let abs_y = offset.y.to_pt() + pos.y.to_pt();
match item {
FrameItem::Text(_) => return Some(abs_y),
FrameItem::Group(g) => {
let child_offset = Point::new(offset.x + pos.x, offset.y + pos.y);
if let Some(y) = find_representative_baseline_inner(&g.frame, child_offset) {
return Some(y);
}
}
_ => {}
}
}
None
}
fn collect_all_spans_recursive(frame: &Frame) -> Vec<Span> {
let mut spans = Vec::new();
collect_spans_inner(frame, &mut spans);
spans
}
fn collect_spans_inner(frame: &Frame, out: &mut Vec<Span>) {
for (_, item) in frame.items() {
match item {
FrameItem::Text(text) => {
if let Some(span) = text.glyphs.first().map(|g| g.span.0) {
out.push(span);
}
}
FrameItem::Group(g) => {
collect_spans_inner(&g.frame, out);
}
_ => {}
}
}
}
fn flush_pending_texts(pending: &mut Vec<(f64, Span)>, out: &mut Vec<(f64, Vec<Span>)>) {
if pending.is_empty() {
return;
}
pending.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
for &(y, span) in pending.iter() {
if out
.last()
.is_none_or(|(prev_y, _)| (y - prev_y).abs() > 5.0)
{
out.push((y, vec![span]));
} else if let Some(last) = out.last_mut() {
last.1.push(span);
}
}
pending.clear();
}