Skip to main content

hjkl_syntax/
lib.rs

1//! Renderer-agnostic syntax-highlighting pipeline for the hjkl editor stack.
2//!
3//! Fully synchronous: parse and highlight run on the main thread.
4//! Call [`SyntaxLayer::set_language_for_path`] after opening a file,
5//! [`SyntaxLayer::apply_edits`] after each batch of [`hjkl_engine::ContentEdit`]s,
6//! and [`SyntaxLayer::render_viewport`] to get styled spans for the visible rows.
7//!
8//! Output is renderer-agnostic: [`RenderOutput::spans`] carries
9//! `(byte_start, byte_end, [`StyleSpec`])` triples.
10//! A TUI adapter ([`hjkl-syntax-tui`]) maps these to `ratatui::style::Style`.
11
12use std::collections::HashMap;
13use std::ops::Range;
14use std::path::Path;
15use std::sync::Arc;
16
17use hjkl_bonsai::runtime::{Grammar, LoadHandle};
18use hjkl_bonsai::{
19    CommentMarkerPass, DotFallbackTheme, HEX_BG_KEY, HEX_COLOR_CAPTURE, HEX_FG_KEY, HexColorPass,
20    Highlighter, InputEdit, MetaValue, Point, Theme,
21};
22use hjkl_engine::Query;
23use hjkl_lang::{GrammarRequest, LanguageDirectory};
24
25pub use hjkl_theme::{Color, Modifiers, StyleSpec};
26
27/// Stable identifier for an open buffer.
28///
29/// # Examples
30///
31/// ```
32/// use hjkl_syntax::BufferId;
33/// let id: BufferId = 42;
34/// assert_eq!(id, 42);
35/// ```
36pub use hjkl_buffer::BufferId;
37
38// ---------------------------------------------------------------------------
39// Public output types
40// ---------------------------------------------------------------------------
41
42/// A single diagnostic sign emitted from the syntax pipeline.
43///
44/// # Examples
45///
46/// ```
47/// use hjkl_syntax::DiagSign;
48/// let s = DiagSign::new(3, 'E', 100);
49/// assert_eq!(s.row, 3);
50/// ```
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52#[non_exhaustive]
53pub struct DiagSign {
54    /// Document row (0-indexed).
55    pub row: usize,
56    /// Gutter character (e.g. `'E'` for a syntax error).
57    pub ch: char,
58    /// Gutter priority — higher wins when multiple signs land on the same row.
59    pub priority: u8,
60}
61
62impl Default for DiagSign {
63    fn default() -> Self {
64        Self {
65            row: 0,
66            ch: 'E',
67            priority: 0,
68        }
69    }
70}
71
72impl DiagSign {
73    /// Create a new diagnostic sign.
74    ///
75    /// # Examples
76    ///
77    /// ```
78    /// use hjkl_syntax::DiagSign;
79    /// let s = DiagSign::new(1, 'E', 100);
80    /// assert_eq!(s.row, 1);
81    /// ```
82    pub fn new(row: usize, ch: char, priority: u8) -> Self {
83        Self { row, ch, priority }
84    }
85}
86
87/// Per-call sub-step timings. Kept for API compat (PerfBreakdown is re-exported
88/// in the TUI shim and referenced from `:perf` overlay code).
89///
90/// # Examples
91///
92/// ```
93/// use hjkl_syntax::PerfBreakdown;
94/// let p = PerfBreakdown::default();
95/// assert_eq!(p.parse_us, 0);
96/// ```
97#[derive(Default, Debug, Clone, Copy)]
98#[non_exhaustive]
99pub struct PerfBreakdown {
100    /// Microseconds spent building the source string + row_starts table.
101    pub source_build_us: u128,
102    /// Microseconds spent in `tree_sitter::Parser::parse`.
103    pub parse_us: u128,
104    /// Microseconds spent in `hjkl_bonsai::Highlighter::highlight_range_*`.
105    pub highlight_us: u128,
106    /// Microseconds spent building the per-row span table from flat spans.
107    pub by_row_us: u128,
108    /// Microseconds spent scanning for diagnostic ERROR/MISSING nodes.
109    pub diag_us: u128,
110}
111
112impl PerfBreakdown {
113    /// Construct a zeroed breakdown.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// use hjkl_syntax::PerfBreakdown;
119    /// let p = PerfBreakdown::new();
120    /// assert_eq!(p.highlight_us, 0);
121    /// ```
122    pub fn new() -> Self {
123        Self::default()
124    }
125}
126
127/// Per-frame output of the syntax pipeline.
128///
129/// Contains the styled span table (one inner `Vec` per document row) and the
130/// diagnostic signs for the gutter.
131///
132/// # Examples
133///
134/// ```
135/// use hjkl_syntax::{RenderOutput, PerfBreakdown};
136/// let out = RenderOutput::new(0, Vec::new(), Vec::new(), (0, 0, 0), PerfBreakdown::default());
137/// assert_eq!(out.buffer_id, 0);
138/// ```
139#[derive(Debug, Clone)]
140#[non_exhaustive]
141pub struct RenderOutput {
142    /// Routes spans/signs back to the matching buffer slot.
143    pub buffer_id: BufferId,
144    /// Per-row span table.
145    pub spans: Vec<Vec<(usize, usize, StyleSpec)>>,
146    /// Diagnostic signs for the gutter.
147    pub signs: Vec<DiagSign>,
148    /// `(dirty_gen, viewport_top, viewport_height)` cache key.
149    pub key: (u64, usize, usize),
150    /// Sub-step timing breakdown (zeroed in fully-sync path).
151    pub perf: PerfBreakdown,
152}
153
154impl RenderOutput {
155    /// Construct a new `RenderOutput`.
156    ///
157    /// # Examples
158    ///
159    /// ```
160    /// use hjkl_syntax::{RenderOutput, PerfBreakdown};
161    /// let out = RenderOutput::new(1, Vec::new(), Vec::new(), (7, 0, 30), PerfBreakdown::new());
162    /// assert_eq!(out.buffer_id, 1);
163    /// ```
164    pub fn new(
165        buffer_id: BufferId,
166        spans: Vec<Vec<(usize, usize, StyleSpec)>>,
167        signs: Vec<DiagSign>,
168        key: (u64, usize, usize),
169        perf: PerfBreakdown,
170    ) -> Self {
171        Self {
172            buffer_id,
173            spans,
174            signs,
175            key,
176            perf,
177        }
178    }
179}
180
181impl PartialEq for RenderOutput {
182    fn eq(&self, other: &Self) -> bool {
183        self.spans == other.spans
184            && self.signs.len() == other.signs.len()
185            && self
186                .signs
187                .iter()
188                .zip(other.signs.iter())
189                .all(|(a, b)| a.row == b.row && a.ch == b.ch && a.priority == b.priority)
190    }
191}
192
193// ---------------------------------------------------------------------------
194// Public outcome types for set_language_for_path / poll_pending_loads
195// ---------------------------------------------------------------------------
196
197/// Outcome of [`SyntaxLayer::set_language_for_path`].
198///
199/// # Examples
200///
201/// ```
202/// use hjkl_syntax::SetLanguageOutcome;
203/// assert!(SetLanguageOutcome::Ready.is_known());
204/// assert!(SetLanguageOutcome::Loading("rust".to_string()).is_known());
205/// assert!(!SetLanguageOutcome::Unknown.is_known());
206/// ```
207#[non_exhaustive]
208pub enum SetLanguageOutcome {
209    /// Grammar was already cached — installed immediately.
210    Ready,
211    /// Grammar is being fetched/compiled on the background pool.
212    Loading(#[allow(dead_code)] String),
213    /// Extension unrecognized. No grammar — plain text only.
214    Unknown,
215}
216
217impl SetLanguageOutcome {
218    /// `true` when a grammar was found (either already cached or now in flight).
219    pub fn is_known(&self) -> bool {
220        matches!(self, Self::Ready | Self::Loading(_))
221    }
222}
223
224/// Event emitted by [`SyntaxLayer::poll_pending_loads`].
225///
226/// # Examples
227///
228/// ```
229/// use hjkl_syntax::LoadEvent;
230/// let e = LoadEvent::Ready { id: 0, name: "rust".into() };
231/// match e {
232///     LoadEvent::Ready { id, name } => assert_eq!(name, "rust"),
233///     LoadEvent::Failed { .. } => panic!("unexpected"),
234///     _ => {}
235/// }
236/// ```
237#[non_exhaustive]
238pub enum LoadEvent {
239    /// Grammar installed; trigger a redraw + re-render for `id`.
240    Ready { id: BufferId, name: String },
241    /// Load failed; buffer stays plain text.
242    Failed {
243        id: BufferId,
244        name: String,
245        error: String,
246    },
247}
248
249/// Exhaustive view of a [`LoadEvent`] for dispatch callbacks.
250#[derive(Debug)]
251pub enum LoadEventKind<'a> {
252    /// Grammar installed successfully.
253    Ready { id: BufferId, name: &'a str },
254    /// Grammar load failed.
255    Failed {
256        id: BufferId,
257        name: &'a str,
258        error: &'a str,
259    },
260}
261
262// ---------------------------------------------------------------------------
263// In-flight grammar load tracking
264// ---------------------------------------------------------------------------
265
266struct PendingLoad {
267    id: BufferId,
268    name: String,
269    handle: LoadHandle,
270}
271
272// ---------------------------------------------------------------------------
273// Per-buffer client state (main thread)
274// ---------------------------------------------------------------------------
275
276/// Per-buffer state owned by the main-thread [`SyntaxLayer`].
277struct BufferClient {
278    has_language: bool,
279    current_lang: Option<Arc<Grammar>>,
280    /// Owns Parser + Tree for this buffer.
281    highlighter: Option<Highlighter>,
282    /// dirty_gen the cache was built at (None = cache absent).
283    cache_dirty_gen: Option<u64>,
284    /// Contiguous row range covered by `cache_spans`.
285    cache_rows: Range<usize>,
286    /// Per-row span table for `cache_rows`.
287    cache_spans: Vec<Vec<(usize, usize, StyleSpec)>>,
288    /// `(dirty_gen, row_starts)` — rebuilt only when dirty_gen changes.
289    cache_row_starts: Option<(u64, Arc<Vec<usize>>)>,
290    /// dirty_gen of the most recent successful parse. Gate reparsing.
291    parsed_dirty_gen: Option<u64>,
292    /// Cached diag signs keyed by `(dirty_gen, vp_top, vp_end)`.
293    cache_signs: Option<(u64, usize, usize, Vec<DiagSign>)>,
294}
295
296impl Default for BufferClient {
297    fn default() -> Self {
298        Self {
299            has_language: false,
300            current_lang: None,
301            highlighter: None,
302            cache_dirty_gen: None,
303            cache_rows: 0..0,
304            cache_spans: Vec::new(),
305            cache_row_starts: None,
306            parsed_dirty_gen: None,
307            cache_signs: None,
308        }
309    }
310}
311
312impl BufferClient {
313    fn invalidate_cache(&mut self) {
314        self.cache_dirty_gen = None;
315        self.cache_rows = 0..0;
316        self.cache_spans.clear();
317        self.cache_row_starts = None;
318        self.parsed_dirty_gen = None;
319        self.cache_signs = None;
320    }
321}
322
323// ---------------------------------------------------------------------------
324// SyntaxLayer — main-thread, fully synchronous
325// ---------------------------------------------------------------------------
326
327/// Per-App syntax highlighting layer. Multiplexes per-buffer state.
328/// Fully synchronous — no background thread.
329///
330/// # Examples
331///
332/// ```no_run
333/// use std::sync::Arc;
334/// use hjkl_syntax::SyntaxLayer;
335/// use hjkl_bonsai::DotFallbackTheme;
336/// use hjkl_lang::LanguageDirectory;
337///
338/// let theme = Arc::new(DotFallbackTheme::dark());
339/// let dir = Arc::new(LanguageDirectory::new().unwrap());
340/// let layer = SyntaxLayer::new(theme, dir);
341/// ```
342pub struct SyntaxLayer {
343    /// Shared grammar resolver.
344    pub directory: Arc<LanguageDirectory>,
345    theme: Arc<dyn Theme + Send + Sync>,
346    clients: HashMap<BufferId, BufferClient>,
347    pending_loads: Vec<PendingLoad>,
348}
349
350impl SyntaxLayer {
351    /// Create a new layer with no buffers attached.
352    ///
353    /// # Examples
354    ///
355    /// ```no_run
356    /// use std::sync::Arc;
357    /// use hjkl_syntax::SyntaxLayer;
358    /// use hjkl_bonsai::DotFallbackTheme;
359    /// use hjkl_lang::LanguageDirectory;
360    ///
361    /// let theme = Arc::new(DotFallbackTheme::dark());
362    /// let dir = Arc::new(LanguageDirectory::new().unwrap());
363    /// let layer = SyntaxLayer::new(theme, dir);
364    /// ```
365    pub fn new(theme: Arc<dyn Theme + Send + Sync>, directory: Arc<LanguageDirectory>) -> Self {
366        Self {
367            directory,
368            theme,
369            clients: HashMap::new(),
370            pending_loads: Vec::new(),
371        }
372    }
373
374    /// Borrow the shared language directory.
375    pub fn directory(&self) -> &Arc<LanguageDirectory> {
376        &self.directory
377    }
378
379    fn client_mut(&mut self, id: BufferId) -> &mut BufferClient {
380        self.clients.entry(id).or_default()
381    }
382
383    /// Detect the language for `path` and attach a grammar.
384    ///
385    /// - `Ready`   — grammar cached; highlighter installed immediately.
386    /// - `Loading` — grammar compiling; renders as plain text until
387    ///   `poll_pending_loads` fires `LoadEvent::Ready`.
388    /// - `Unknown` — unrecognized extension; plain text only.
389    ///
390    /// # Examples
391    ///
392    /// ```no_run
393    /// use std::sync::Arc;
394    /// use std::path::Path;
395    /// use hjkl_syntax::{SyntaxLayer, SetLanguageOutcome};
396    /// use hjkl_bonsai::DotFallbackTheme;
397    /// use hjkl_lang::LanguageDirectory;
398    ///
399    /// let theme = Arc::new(DotFallbackTheme::dark());
400    /// let dir = Arc::new(LanguageDirectory::new().unwrap());
401    /// let mut layer = SyntaxLayer::new(theme, dir);
402    /// let outcome = layer.set_language_for_path(0, Path::new("a.zzz_not_real"));
403    /// assert!(!outcome.is_known());
404    /// ```
405    pub fn set_language_for_path(&mut self, id: BufferId, path: &Path) -> SetLanguageOutcome {
406        match self.directory.request_for_path(path) {
407            GrammarRequest::Cached(grammar) => {
408                self.attach_grammar(id, grammar.clone());
409                let c = self.client_mut(id);
410                c.current_lang = Some(grammar);
411                c.has_language = true;
412                SetLanguageOutcome::Ready
413            }
414            GrammarRequest::Loading { name, handle } => {
415                let c = self.client_mut(id);
416                c.current_lang = None;
417                c.has_language = false;
418                c.highlighter = None;
419                c.invalidate_cache();
420                self.pending_loads.push(PendingLoad {
421                    id,
422                    name: name.clone(),
423                    handle,
424                });
425                SetLanguageOutcome::Loading(name)
426            }
427            GrammarRequest::Unknown | _ => {
428                let c = self.client_mut(id);
429                c.current_lang = None;
430                c.has_language = false;
431                c.highlighter = None;
432                c.invalidate_cache();
433                SetLanguageOutcome::Unknown
434            }
435        }
436    }
437
438    /// Attach a grammar to a buffer, creating/replacing the Highlighter.
439    fn attach_grammar(&mut self, id: BufferId, grammar: Arc<Grammar>) {
440        let c = self.clients.entry(id).or_default();
441        c.invalidate_cache();
442        match Highlighter::new(grammar) {
443            Ok(h) => {
444                c.highlighter = Some(h);
445            }
446            Err(e) => {
447                tracing::error!(buffer_id = id, error = %e, "failed to attach highlighter");
448                c.highlighter = None;
449            }
450        }
451    }
452
453    /// Poll all in-flight grammar loads. Call once per tick.
454    ///
455    /// Returns one `LoadEvent` per handle that resolved during this tick.
456    pub fn poll_pending_loads(&mut self) -> Vec<LoadEvent> {
457        let mut events = Vec::new();
458        let mut i = 0;
459        while i < self.pending_loads.len() {
460            match self.pending_loads[i].handle.try_recv() {
461                None => {
462                    i += 1;
463                }
464                Some(Ok(lib_path)) => {
465                    let name = self.pending_loads[i].name.clone();
466                    let bid = self.pending_loads[i].id;
467                    self.pending_loads.swap_remove(i);
468                    match self.directory.complete_load(&name, lib_path) {
469                        Ok(grammar) => {
470                            self.attach_grammar(bid, grammar.clone());
471                            let c = self.client_mut(bid);
472                            c.current_lang = Some(grammar);
473                            c.has_language = true;
474                            events.push(LoadEvent::Ready { id: bid, name });
475                        }
476                        Err(e) => {
477                            events.push(LoadEvent::Failed {
478                                id: bid,
479                                name,
480                                error: format!("{e:#}"),
481                            });
482                        }
483                    }
484                }
485                Some(Err(err)) => {
486                    let name = self.pending_loads[i].name.clone();
487                    let bid = self.pending_loads[i].id;
488                    self.pending_loads.swap_remove(i);
489                    events.push(LoadEvent::Failed {
490                        id: bid,
491                        name,
492                        error: err.to_string(),
493                    });
494                }
495            }
496        }
497        events
498    }
499
500    /// Drop all state for a buffer. Call on close.
501    pub fn forget(&mut self, id: BufferId) {
502        self.clients.remove(&id);
503    }
504
505    /// Swap the active theme. Next `render_viewport` call uses the new theme.
506    pub fn set_theme(&mut self, theme: Arc<dyn Theme + Send + Sync>) {
507        self.theme = theme;
508        // Invalidate all per-buffer caches so they repaint with the new theme.
509        for c in self.clients.values_mut() {
510            c.invalidate_cache();
511        }
512    }
513
514    /// Apply a batch of engine `ContentEdit`s to the buffer's retained tree
515    /// synchronously. The cache will be invalidated on the next `render_viewport`
516    /// call via dirty_gen mismatch.
517    ///
518    /// No-op when no grammar is attached.
519    pub fn apply_edits(&mut self, id: BufferId, edits: &[hjkl_engine::ContentEdit]) {
520        let c = match self.clients.get_mut(&id) {
521            Some(c) if c.has_language => c,
522            _ => return,
523        };
524        let h = match c.highlighter.as_mut() {
525            Some(h) => h,
526            None => return,
527        };
528        for e in edits {
529            h.edit(&InputEdit {
530                start_byte: e.start_byte,
531                old_end_byte: e.old_end_byte,
532                new_end_byte: e.new_end_byte,
533                start_position: Point {
534                    row: e.start_position.0 as usize,
535                    column: e.start_position.1 as usize,
536                },
537                old_end_position: Point {
538                    row: e.old_end_position.0 as usize,
539                    column: e.old_end_position.1 as usize,
540                },
541                new_end_position: Point {
542                    row: e.new_end_position.0 as usize,
543                    column: e.new_end_position.1 as usize,
544                },
545            });
546        }
547        // dirty_gen will advance — invalidate parse + row_starts + sign caches.
548        // cache_spans / cache_rows are dropped on dirty_gen mismatch in render_viewport.
549        c.parsed_dirty_gen = None;
550        c.cache_row_starts = None;
551        c.cache_signs = None;
552    }
553
554    /// Drop the buffer's retained tree. Next `render_viewport` reparses from scratch.
555    ///
556    /// Call on `:e!` / content reset.
557    pub fn reset(&mut self, id: BufferId) {
558        if let Some(c) = self.clients.get_mut(&id) {
559            if let Some(h) = c.highlighter.as_mut() {
560                h.reset();
561            }
562            c.invalidate_cache();
563        }
564    }
565
566    /// Render spans for the visible viewport. Fully synchronous.
567    ///
568    /// 1. Returns `None` when no grammar is attached.
569    /// 2. Clears the cache when `buffer.dirty_gen()` has advanced.
570    /// 3. Returns cached rows when the request is fully inside the cached range.
571    /// 4. Walks only rows outside the cache (extend prefix/suffix), splices into
572    ///    `cache_spans`, extends `cache_rows`.
573    pub fn render_viewport(
574        &mut self,
575        id: BufferId,
576        buffer: &impl Query,
577        viewport_top: usize,
578        viewport_height: usize,
579    ) -> Option<RenderOutput> {
580        let client = self.clients.get_mut(&id)?;
581        if !client.has_language {
582            return None;
583        }
584        let dg = buffer.dirty_gen();
585        let row_count = buffer.line_count() as usize;
586        if row_count == 0 || viewport_height == 0 {
587            return None;
588        }
589
590        let vp_top = viewport_top.min(row_count);
591        let vp_end = (vp_top + viewport_height).min(row_count);
592        if vp_end <= vp_top {
593            return None;
594        }
595
596        // Single dirty_gen invalidation point.
597        if client.cache_dirty_gen != Some(dg) {
598            client.invalidate_cache();
599        }
600
601        // Get a rope snapshot — O(1) Arc-clone from hjkl_buffer::Buffer.
602        // All downstream consumers (parse, highlight, row_starts, diag signs)
603        // now read directly from the rope: no full-document String allocation.
604        let rope = buffer.rope();
605
606        // Get or build row_starts, cached per dirty_gen.
607        // Scan newlines chunk-by-chunk from the rope so we never materialise
608        // the full document as a contiguous byte slice.
609        let row_starts: Arc<Vec<usize>> = if client
610            .cache_row_starts
611            .as_ref()
612            .is_some_and(|(g, _)| *g == dg)
613        {
614            Arc::clone(&client.cache_row_starts.as_ref().unwrap().1)
615        } else {
616            // SIMD-vectorised newline scan via memchr — measurably faster than
617            // a per-byte loop. Pre-sized to row_count + 1 to avoid realloc churn.
618            let mut rs: Vec<usize> = Vec::with_capacity(row_count + 1);
619            rs.push(0);
620            let mut chunk_pos = 0usize;
621            for chunk in rope.chunks() {
622                for nl in memchr::memchr_iter(b'\n', chunk.as_bytes()) {
623                    rs.push(chunk_pos + nl + 1);
624                }
625                chunk_pos += chunk.len();
626            }
627            let arc = Arc::new(rs);
628            client.cache_row_starts = Some((dg, Arc::clone(&arc)));
629            arc
630        };
631
632        // Reparse only when needed. Use rope-streaming parse to avoid passing
633        // the full bytes slice into the parser (tree-sitter reads chunk-by-chunk
634        // via the closure; no contiguous copy required for the parse step).
635        let needs_reparse = client.parsed_dirty_gen != Some(dg);
636        {
637            let highlighter = client.highlighter.as_mut()?;
638            if highlighter.tree().is_none() {
639                highlighter.parse_initial_rope(&rope);
640                if highlighter.tree().is_some() {
641                    client.parsed_dirty_gen = Some(dg);
642                }
643            } else if needs_reparse {
644                // No-diff incremental: we discard the changed-byte ranges
645                // (cache is keyed by dirty_gen + viewport, not by edit
646                // ranges). Computing `old.changed_ranges(&new)` walks both
647                // trees and was ~54 % of per-keystroke CPU on a 1.86 M-line
648                // file.
649                let ok = highlighter.parse_incremental_rope(&rope);
650                if ok && highlighter.tree().is_some() {
651                    client.parsed_dirty_gen = Some(dg);
652                }
653            }
654        }
655
656        // Re-borrow after parse.
657        let client = self.clients.get_mut(&id)?;
658        let highlighter = client.highlighter.as_mut()?;
659
660        // If still no tree (parse failed), give up.
661        highlighter.tree()?;
662
663        let theme = self.theme.as_ref();
664        let directory = Arc::clone(&self.directory);
665
666        // Extend cache to cover [vp_top, vp_end).
667        if client.cache_rows.is_empty() {
668            // Case A: empty cache — walk full range.
669            client.cache_spans = walk_rows(
670                highlighter,
671                &rope,
672                &row_starts,
673                row_count,
674                vp_top,
675                vp_end,
676                theme,
677                &directory,
678            );
679            client.cache_rows = vp_top..vp_end;
680            client.cache_dirty_gen = Some(dg);
681        } else {
682            let cache_covers_overlap =
683                vp_top < client.cache_rows.end && vp_end > client.cache_rows.start;
684            if !cache_covers_overlap {
685                // Disjoint — just rebuild the whole viewport.
686                client.cache_spans = walk_rows(
687                    highlighter,
688                    &rope,
689                    &row_starts,
690                    row_count,
691                    vp_top,
692                    vp_end,
693                    theme,
694                    &directory,
695                );
696                client.cache_rows = vp_top..vp_end;
697            } else {
698                // Case B: extend prefix if needed.
699                if vp_top < client.cache_rows.start {
700                    let new_rows = walk_rows(
701                        highlighter,
702                        &rope,
703                        &row_starts,
704                        row_count,
705                        vp_top,
706                        client.cache_rows.start,
707                        theme,
708                        &directory,
709                    );
710                    let mut combined = new_rows;
711                    combined.append(&mut client.cache_spans);
712                    client.cache_spans = combined;
713                    client.cache_rows.start = vp_top;
714                }
715                // Case C: extend suffix if needed.
716                if vp_end > client.cache_rows.end {
717                    let new_rows = walk_rows(
718                        highlighter,
719                        &rope,
720                        &row_starts,
721                        row_count,
722                        client.cache_rows.end,
723                        vp_end,
724                        theme,
725                        &directory,
726                    );
727                    client.cache_spans.extend(new_rows);
728                    client.cache_rows.end = vp_end;
729                }
730            }
731            client.cache_dirty_gen = Some(dg);
732        }
733
734        // Slice the requested viewport from the cache.
735        let offset = vp_top - client.cache_rows.start;
736        let len = vp_end - vp_top;
737        let spans: Vec<Vec<(usize, usize, StyleSpec)>> =
738            client.cache_spans[offset..offset + len].to_vec();
739
740        // Get or build signs, cached per (dirty_gen, vp_top, vp_end).
741        let signs = if client
742            .cache_signs
743            .as_ref()
744            .is_some_and(|(g, t, e, _)| *g == dg && *t == vp_top && *e == vp_end)
745        {
746            client.cache_signs.as_ref().unwrap().3.clone()
747        } else {
748            let s = collect_diag_signs_range(highlighter, &rope, &row_starts, vp_top, vp_end);
749            client.cache_signs = Some((dg, vp_top, vp_end, s.clone()));
750            s
751        };
752
753        Some(RenderOutput {
754            buffer_id: id,
755            spans,
756            signs,
757            key: (dg, vp_top, viewport_height),
758            perf: PerfBreakdown::default(),
759        })
760    }
761
762    /// Resolve a path to its language name without loading a grammar.
763    pub fn name_for_path(&self, path: &Path) -> Option<String> {
764        self.directory.name_for_path(path)
765    }
766
767    /// Returns `true` if a client is tracked for the given buffer id.
768    #[doc(hidden)]
769    pub fn has_client(&self, id: BufferId) -> bool {
770        self.clients.contains_key(&id)
771    }
772
773    /// Dispatch a [`LoadEvent`] through a caller-supplied handler.
774    ///
775    /// # Examples
776    ///
777    /// ```rust
778    /// use hjkl_syntax::{LoadEvent, SyntaxLayer};
779    ///
780    /// let event = LoadEvent::Ready { id: 0, name: "rust".into() };
781    /// let mut got_ready = false;
782    /// let handled = SyntaxLayer::dispatch_load_event(&event, |ev| {
783    ///     use hjkl_syntax::LoadEventKind;
784    ///     match ev {
785    ///         LoadEventKind::Ready { id, name } => { got_ready = true; }
786    ///         LoadEventKind::Failed { .. } => {}
787    ///     }
788    /// });
789    /// assert!(handled);
790    /// assert!(got_ready);
791    /// ```
792    pub fn dispatch_load_event(
793        event: &LoadEvent,
794        mut handler: impl FnMut(LoadEventKind<'_>),
795    ) -> bool {
796        #[allow(unreachable_patterns)]
797        match event {
798            LoadEvent::Ready { id, name } => {
799                handler(LoadEventKind::Ready { id: *id, name });
800                true
801            }
802            LoadEvent::Failed { id, name, error } => {
803                handler(LoadEventKind::Failed {
804                    id: *id,
805                    name,
806                    error,
807                });
808                true
809            }
810            _ => false,
811        }
812    }
813}
814
815// ---------------------------------------------------------------------------
816// Helper: walk a row range against the retained tree
817// ---------------------------------------------------------------------------
818
819#[allow(clippy::too_many_arguments)]
820fn walk_rows(
821    highlighter: &mut Highlighter,
822    rope: &ropey::Rope,
823    row_starts: &[usize],
824    row_count: usize,
825    seg_start: usize,
826    seg_end: usize,
827    theme: &dyn Theme,
828    directory: &Arc<LanguageDirectory>,
829) -> Vec<Vec<(usize, usize, StyleSpec)>> {
830    let rope_len = rope.len_bytes();
831    let byte_start = row_starts.get(seg_start).copied().unwrap_or(rope_len);
832    let byte_end = row_starts
833        .get(seg_end)
834        .copied()
835        .unwrap_or(rope_len)
836        .min(rope_len)
837        .max(byte_start);
838
839    let mut flat_spans =
840        highlighter.highlight_range_with_injections_rope(rope, byte_start..byte_end, |name| {
841            directory.by_name(name)
842        });
843
844    let marker_pass = CommentMarkerPass::new();
845    marker_pass.apply_rope(&mut flat_spans, rope);
846    let hex_color_pass = HexColorPass::new();
847    hex_color_pass.apply_range_rope(&mut flat_spans, rope, byte_start..byte_end);
848
849    // Bucket spans into ONLY the viewport row range. The prior version
850    // called `build_by_row(..., row_count, ...)` and sliced the result,
851    // which allocated `row_count` empty inner Vecs (8.58 M on a huge
852    // file) just to throw away all but ~50 of them — that single line
853    // was ~24 % of per-keystroke CPU during a paste burst.
854    let _ = row_count; // kept in signature for the public build_by_row tests
855    build_by_row_range(&flat_spans, rope_len, row_starts, seg_start..seg_end, theme)
856}
857
858/// Viewport-bounded variant of [`build_by_row`]. Allocates exactly
859/// `row_range.len()` inner Vecs instead of one per document row. Spans
860/// whose byte range falls entirely outside `row_range` are skipped; spans
861/// that overlap have their per-row slices recorded with positions local
862/// to the viewport (so row `row_range.start` lands at index 0).
863fn build_by_row_range(
864    flat_spans: &[hjkl_bonsai::HighlightSpan],
865    source_len: usize,
866    row_starts: &[usize],
867    row_range: Range<usize>,
868    theme: &dyn Theme,
869) -> Vec<Vec<(usize, usize, StyleSpec)>> {
870    let seg_start = row_range.start;
871    let seg_end = row_range.end.min(row_starts.len());
872    if seg_end <= seg_start {
873        return Vec::new();
874    }
875    let mut by_row: Vec<Vec<(usize, usize, StyleSpec)>> = vec![Vec::new(); seg_end - seg_start];
876
877    for span in flat_spans {
878        let hex_style: Option<StyleSpec> = if span.capture() == HEX_COLOR_CAPTURE {
879            let bg = match span.metadata.get(HEX_BG_KEY) {
880                Some(MetaValue::Str(s)) => hjkl_theme::Color::from_hex_str(s).ok(),
881                _ => None,
882            };
883            let fg = match span.metadata.get(HEX_FG_KEY) {
884                Some(MetaValue::Str(s)) => hjkl_theme::Color::from_hex_str(s).ok(),
885                _ => None,
886            };
887            bg.map(|bg| StyleSpec {
888                fg,
889                bg: Some(bg),
890                modifiers: hjkl_theme::Modifiers::default(),
891            })
892        } else {
893            None
894        };
895
896        let style: StyleSpec = if let Some(s) = hex_style {
897            s
898        } else {
899            match theme.style(span.capture()) {
900                Some(s) => *s,
901                None => continue,
902            }
903        };
904
905        let span_start = span.byte_range.start;
906        let span_end = span.byte_range.end;
907
908        let start_row = row_starts
909            .partition_point(|&rs| rs <= span_start)
910            .saturating_sub(1);
911
912        let mut row = start_row.max(seg_start);
913        while row < seg_end {
914            let row_byte_start = row_starts[row];
915            let row_byte_end = row_starts
916                .get(row + 1)
917                .map(|&s| s.saturating_sub(1))
918                .unwrap_or(source_len);
919
920            if row_byte_start >= span_end {
921                break;
922            }
923
924            let local_start = span_start.saturating_sub(row_byte_start);
925            let local_end = span_end.min(row_byte_end) - row_byte_start;
926
927            if local_end > local_start {
928                by_row[row - seg_start].push((local_start, local_end, style));
929            }
930
931            row += 1;
932        }
933    }
934
935    by_row
936}
937
938// ---------------------------------------------------------------------------
939// Helper: build per-row span table (renderer-agnostic StyleSpec output)
940// ---------------------------------------------------------------------------
941
942/// Resolve flat highlight spans into a per-row span table sized to `row_count`.
943pub fn build_by_row(
944    flat_spans: &[hjkl_bonsai::HighlightSpan],
945    bytes: &[u8],
946    row_starts: &[usize],
947    row_count: usize,
948    theme: &dyn Theme,
949) -> Vec<Vec<(usize, usize, StyleSpec)>> {
950    let mut by_row: Vec<Vec<(usize, usize, StyleSpec)>> = vec![Vec::new(); row_count];
951
952    for span in flat_spans {
953        let hex_style: Option<StyleSpec> = if span.capture() == HEX_COLOR_CAPTURE {
954            let bg = match span.metadata.get(HEX_BG_KEY) {
955                Some(MetaValue::Str(s)) => hjkl_theme::Color::from_hex_str(s).ok(),
956                _ => None,
957            };
958            let fg = match span.metadata.get(HEX_FG_KEY) {
959                Some(MetaValue::Str(s)) => hjkl_theme::Color::from_hex_str(s).ok(),
960                _ => None,
961            };
962            bg.map(|bg| StyleSpec {
963                fg,
964                bg: Some(bg),
965                modifiers: hjkl_theme::Modifiers::default(),
966            })
967        } else {
968            None
969        };
970
971        let style: StyleSpec = if let Some(s) = hex_style {
972            s
973        } else {
974            match theme.style(span.capture()) {
975                Some(s) => *s,
976                None => continue,
977            }
978        };
979        let style = &style;
980
981        let span_start = span.byte_range.start;
982        let span_end = span.byte_range.end;
983
984        let start_row = row_starts
985            .partition_point(|&rs| rs <= span_start)
986            .saturating_sub(1);
987
988        let mut row = start_row;
989        while row < row_count {
990            let row_byte_start = row_starts[row];
991            let row_byte_end = row_starts
992                .get(row + 1)
993                .map(|&s| s.saturating_sub(1))
994                .unwrap_or(bytes.len());
995
996            if row_byte_start >= span_end {
997                break;
998            }
999
1000            let local_start = span_start.saturating_sub(row_byte_start);
1001            let local_end = span_end.min(row_byte_end) - row_byte_start;
1002
1003            if local_end > local_start {
1004                by_row[row].push((local_start, local_end, *style));
1005            }
1006
1007            row += 1;
1008        }
1009    }
1010
1011    by_row
1012}
1013
1014// ---------------------------------------------------------------------------
1015// Helper: collect diagnostic signs
1016// ---------------------------------------------------------------------------
1017
1018fn collect_diag_signs_range(
1019    h: &mut Highlighter,
1020    rope: &ropey::Rope,
1021    row_starts: &[usize],
1022    vp_top: usize,
1023    vp_end: usize,
1024) -> Vec<DiagSign> {
1025    let rope_len = rope.len_bytes();
1026    let byte_start = row_starts.get(vp_top).copied().unwrap_or(rope_len);
1027    let byte_end = row_starts.get(vp_end).copied().unwrap_or(rope_len);
1028    // parse_errors_range only needs the source bytes for harvesting error
1029    // node snippets in the message string. Materialise just the viewport
1030    // window (typically ≪ 100 KB) rather than the whole document.
1031    let window: String = if byte_start < byte_end && byte_end <= rope_len {
1032        rope.byte_slice(byte_start..byte_end).to_string()
1033    } else {
1034        String::new()
1035    };
1036    // Translate byte range into window-relative for parse_errors_range.
1037    let errors = h.parse_errors_range(window.as_bytes(), 0..(byte_end - byte_start));
1038    let mut signs: Vec<DiagSign> = Vec::new();
1039    let mut last_row: Option<usize> = None;
1040    for err in &errors {
1041        // Translate window-relative back to absolute.
1042        let abs_start = err.byte_range.start + byte_start;
1043        let r = row_starts
1044            .partition_point(|&rs| rs <= abs_start)
1045            .saturating_sub(1);
1046        if last_row == Some(r) {
1047            continue;
1048        }
1049        last_row = Some(r);
1050        signs.push(DiagSign::new(r, 'E', 100));
1051    }
1052    signs
1053}
1054
1055// ---------------------------------------------------------------------------
1056// Factory helpers
1057// ---------------------------------------------------------------------------
1058
1059/// Build a `SyntaxLayer` using the given theme + language directory.
1060pub fn layer_with_theme(
1061    theme: Arc<DotFallbackTheme>,
1062    directory: Arc<LanguageDirectory>,
1063) -> SyntaxLayer {
1064    SyntaxLayer::new(theme, directory)
1065}
1066
1067/// Build a `SyntaxLayer` with hjkl-bonsai's bundled dark theme.
1068#[cfg(test)]
1069pub fn default_layer() -> SyntaxLayer {
1070    let directory = Arc::new(LanguageDirectory::new().expect("language directory"));
1071    SyntaxLayer::new(Arc::new(DotFallbackTheme::dark()), directory)
1072}
1073
1074// ---------------------------------------------------------------------------
1075// Tests
1076// ---------------------------------------------------------------------------
1077
1078#[cfg(test)]
1079mod tests {
1080    use super::*;
1081    use hjkl_buffer::Buffer;
1082    use std::path::Path;
1083
1084    const TID: BufferId = 0;
1085
1086    // --- DiagSign ---
1087
1088    #[test]
1089    fn diag_sign_new_roundtrip() {
1090        let s = DiagSign::new(7, 'W', 50);
1091        assert_eq!(s.row, 7);
1092        assert_eq!(s.ch, 'W');
1093        assert_eq!(s.priority, 50);
1094    }
1095
1096    #[test]
1097    fn diag_sign_default_is_sensible() {
1098        let s = DiagSign::default();
1099        assert_eq!(s.row, 0);
1100        assert_eq!(s.ch, 'E');
1101        assert_eq!(s.priority, 0);
1102    }
1103
1104    // --- PerfBreakdown ---
1105
1106    #[test]
1107    fn perf_breakdown_default_zeros() {
1108        let p = PerfBreakdown::new();
1109        assert_eq!(p.source_build_us, 0);
1110        assert_eq!(p.parse_us, 0);
1111        assert_eq!(p.highlight_us, 0);
1112        assert_eq!(p.by_row_us, 0);
1113        assert_eq!(p.diag_us, 0);
1114    }
1115
1116    // --- SetLanguageOutcome ---
1117
1118    #[test]
1119    fn set_language_outcome_is_known() {
1120        assert!(SetLanguageOutcome::Ready.is_known());
1121        assert!(SetLanguageOutcome::Loading("rust".to_string()).is_known());
1122        assert!(!SetLanguageOutcome::Unknown.is_known());
1123    }
1124
1125    // --- RenderOutput ---
1126
1127    #[test]
1128    fn render_output_new_roundtrip() {
1129        let out = RenderOutput::new(
1130            99,
1131            vec![vec![]],
1132            vec![DiagSign::new(0, 'E', 100)],
1133            (7, 0, 30),
1134            PerfBreakdown::new(),
1135        );
1136        assert_eq!(out.buffer_id, 99);
1137        assert_eq!(out.key, (7, 0, 30));
1138        assert_eq!(out.signs.len(), 1);
1139    }
1140
1141    #[test]
1142    fn render_output_partial_eq_same() {
1143        let a = RenderOutput::new(
1144            0,
1145            vec![vec![(0, 5, StyleSpec::default())]],
1146            vec![],
1147            (1, 0, 10),
1148            PerfBreakdown::default(),
1149        );
1150        let b = a.clone();
1151        assert_eq!(a, b);
1152    }
1153
1154    // --- build_by_row ---
1155
1156    #[test]
1157    fn build_by_row_empty_spans_gives_empty_rows() {
1158        let by_row = build_by_row(
1159            &[],
1160            b"hello\nworld\n",
1161            &[0, 6, 12],
1162            2,
1163            &DotFallbackTheme::dark(),
1164        );
1165        assert_eq!(by_row.len(), 2);
1166        assert!(by_row[0].is_empty());
1167        assert!(by_row[1].is_empty());
1168    }
1169
1170    #[test]
1171    fn build_by_row_hex_color_uses_metadata_colors() {
1172        let bytes = b"--accent: #bb9af7;";
1173        let mut metadata = std::collections::HashMap::new();
1174        metadata.insert(
1175            HEX_BG_KEY.to_string(),
1176            MetaValue::Str("#bb9af7".to_string()),
1177        );
1178        metadata.insert(
1179            HEX_FG_KEY.to_string(),
1180            MetaValue::Str("#ffffff".to_string()),
1181        );
1182        let span = hjkl_bonsai::HighlightSpan {
1183            byte_range: 10..17,
1184            capture: HEX_COLOR_CAPTURE.to_string(),
1185            metadata,
1186        };
1187        let by_row = build_by_row(&[span], bytes, &[0], 1, &DotFallbackTheme::dark());
1188        assert_eq!(by_row.len(), 1);
1189        assert_eq!(by_row[0].len(), 1);
1190        let (_, _, style) = by_row[0][0];
1191        let bg = style.bg.expect("hex color must set background");
1192        assert_eq!((bg.r, bg.g, bg.b), (0xbb, 0x9a, 0xf7));
1193        let fg = style.fg.expect("hex color must set foreground");
1194        assert_eq!((fg.r, fg.g, fg.b), (0xff, 0xff, 0xff));
1195    }
1196
1197    #[test]
1198    fn build_by_row_hex_color_without_metadata_skips() {
1199        let span = hjkl_bonsai::HighlightSpan {
1200            byte_range: 0..3,
1201            capture: HEX_COLOR_CAPTURE.to_string(),
1202            metadata: std::collections::HashMap::new(),
1203        };
1204        let by_row = build_by_row(&[span], b"foo", &[0], 1, &DotFallbackTheme::dark());
1205        assert_eq!(by_row.len(), 1);
1206        assert!(by_row[0].is_empty());
1207    }
1208
1209    // --- SyntaxLayer basics (no network required) ---
1210
1211    #[test]
1212    fn render_viewport_with_no_language_returns_none() {
1213        let buf = Buffer::from_str("hello world");
1214        let mut layer = default_layer();
1215        assert!(
1216            !layer
1217                .set_language_for_path(TID, Path::new("a.unknownext"))
1218                .is_known()
1219        );
1220        assert!(layer.render_viewport(TID, &buf, 0, 10).is_none());
1221    }
1222
1223    #[test]
1224    fn apply_edits_with_no_language_is_noop() {
1225        let mut layer = default_layer();
1226        let edits = vec![hjkl_engine::ContentEdit {
1227            start_byte: 0,
1228            old_end_byte: 0,
1229            new_end_byte: 1,
1230            start_position: (0, 0),
1231            old_end_position: (0, 0),
1232            new_end_position: (0, 1),
1233        }];
1234        layer.apply_edits(TID, &edits);
1235        // No grammar attached → call must be a no-op (no panic).
1236    }
1237
1238    #[test]
1239    fn set_language_for_path_returns_unknown_for_unrecognized_extension() {
1240        let mut layer = default_layer();
1241        let outcome = layer.set_language_for_path(TID, Path::new("a.zzznope_not_real"));
1242        assert!(!outcome.is_known());
1243        assert!(matches!(outcome, SetLanguageOutcome::Unknown));
1244    }
1245
1246    #[test]
1247    fn poll_pending_loads_drains_ready_handles() {
1248        let mut layer = default_layer();
1249        let events = layer.poll_pending_loads();
1250        assert!(
1251            events.is_empty(),
1252            "expected no events with no pending loads"
1253        );
1254    }
1255
1256    #[test]
1257    fn forget_removes_client_state() {
1258        let mut layer = default_layer();
1259        layer.set_language_for_path(TID, Path::new("a.zzz_unknown"));
1260        layer.forget(TID);
1261        assert!(!layer.clients.contains_key(&TID));
1262    }
1263
1264    // --- Network-dependent tests (grammar needed) ---
1265
1266    #[test]
1267    #[ignore = "network + compiler: needs tree-sitter-rust grammar"]
1268    fn parse_and_render_small_rust_buffer() {
1269        let buf = Buffer::from_str("fn main() { let x = 1; }\n");
1270        let mut layer = default_layer();
1271        assert!(
1272            layer
1273                .set_language_for_path(TID, Path::new("a.rs"))
1274                .is_known()
1275        );
1276        let out = layer
1277            .render_viewport(TID, &buf, 0, 10)
1278            .expect("render output");
1279        assert!(
1280            out.spans.iter().any(|r| !r.is_empty()),
1281            "expected at least one styled span"
1282        );
1283    }
1284
1285    #[test]
1286    #[ignore = "network + compiler: needs tree-sitter-rust grammar"]
1287    fn diagnostics_emit_sign_for_syntax_error() {
1288        let buf = Buffer::from_str("fn main() {\nlet x = ;\n}\n");
1289        let mut layer = default_layer();
1290        layer.set_language_for_path(TID, Path::new("a.rs"));
1291        let out = layer.render_viewport(TID, &buf, 0, 10).unwrap();
1292        assert!(
1293            !out.signs.is_empty(),
1294            "expected at least one diagnostic sign for `let x = ;`"
1295        );
1296        assert!(
1297            out.signs.iter().any(|s| s.row == 1 && s.ch == 'E'),
1298            "expected an 'E' sign on row 1; got {:?}",
1299            out.signs
1300        );
1301    }
1302
1303    #[test]
1304    #[ignore = "network + compiler: needs tree-sitter-rust grammar"]
1305    fn incremental_path_matches_cold_for_small_edit() {
1306        let pre = Buffer::from_str("fn main() { let x = 1; }");
1307        let mut layer = default_layer();
1308        layer.set_language_for_path(TID, Path::new("a.rs"));
1309        let _ = layer.render_viewport(TID, &pre, 0, 10).unwrap();
1310        layer.apply_edits(
1311            TID,
1312            &[hjkl_engine::ContentEdit {
1313                start_byte: 3,
1314                old_end_byte: 3,
1315                new_end_byte: 4,
1316                start_position: (0, 3),
1317                old_end_position: (0, 3),
1318                new_end_position: (0, 4),
1319            }],
1320        );
1321        let post = Buffer::from_str("fn Ymain() { let x = 1; }");
1322        let inc = layer.render_viewport(TID, &post, 0, 10).unwrap();
1323        let mut cold_layer = default_layer();
1324        cold_layer.set_language_for_path(TID, Path::new("a.rs"));
1325        let cold = cold_layer.render_viewport(TID, &post, 0, 10).unwrap();
1326        assert_eq!(inc.spans, cold.spans);
1327    }
1328
1329    #[test]
1330    #[ignore = "network + compiler: needs tree-sitter-rust grammar"]
1331    fn forget_drops_buffer_state() {
1332        let buf = Buffer::from_str("fn main() {}");
1333        let mut layer = default_layer();
1334        layer.set_language_for_path(TID, Path::new("a.rs"));
1335        let _ = layer.render_viewport(TID, &buf, 0, 10).unwrap();
1336        assert!(layer.clients.contains_key(&TID));
1337        layer.forget(TID);
1338        assert!(!layer.clients.contains_key(&TID));
1339    }
1340}