1use anyhow::Result;
2use ck_core::Span;
3use serde::{Deserialize, Serialize};
4
5pub use ck_embed::TokenEstimator;
7
8fn estimate_tokens(text: &str) -> usize {
10 TokenEstimator::estimate_tokens(text)
11}
12
13pub fn get_model_chunk_config(model_name: Option<&str>) -> (usize, usize) {
16 let model = model_name.unwrap_or("nomic-embed-text-v1.5");
17
18 match model {
19 "BAAI/bge-small-en-v1.5" | "sentence-transformers/all-MiniLM-L6-v2" => {
21 (400, 80) }
23
24 "nomic-embed-text-v1" | "nomic-embed-text-v1.5" | "jina-embeddings-v2-base-code" => {
27 (1024, 200) }
29
30 "BAAI/bge-base-en-v1.5" | "BAAI/bge-large-en-v1.5" => {
32 (400, 80) }
34
35 _ => (1024, 200), }
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct StrideInfo {
43 pub original_chunk_id: String,
45 pub stride_index: usize,
47 pub total_strides: usize,
49 pub overlap_start: usize,
51 pub overlap_end: usize,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct Chunk {
57 pub span: Span,
58 pub text: String,
59 pub chunk_type: ChunkType,
60 pub stride_info: Option<StrideInfo>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
65pub enum ChunkType {
66 Text,
67 Function,
68 Class,
69 Method,
70 Module,
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum ParseableLanguage {
75 Python,
76 TypeScript,
77 JavaScript,
78 Haskell,
79 Rust,
80 Ruby,
81 Go,
82 CSharp,
83}
84
85impl std::fmt::Display for ParseableLanguage {
86 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
87 let name = match self {
88 ParseableLanguage::Python => "python",
89 ParseableLanguage::TypeScript => "typescript",
90 ParseableLanguage::JavaScript => "javascript",
91 ParseableLanguage::Haskell => "haskell",
92 ParseableLanguage::Rust => "rust",
93 ParseableLanguage::Ruby => "ruby",
94 ParseableLanguage::Go => "go",
95 ParseableLanguage::CSharp => "csharp",
96 };
97 write!(f, "{}", name)
98 }
99}
100
101impl TryFrom<ck_core::Language> for ParseableLanguage {
102 type Error = anyhow::Error;
103
104 fn try_from(lang: ck_core::Language) -> Result<Self, Self::Error> {
105 match lang {
106 ck_core::Language::Python => Ok(ParseableLanguage::Python),
107 ck_core::Language::TypeScript => Ok(ParseableLanguage::TypeScript),
108 ck_core::Language::JavaScript => Ok(ParseableLanguage::JavaScript),
109 ck_core::Language::Haskell => Ok(ParseableLanguage::Haskell),
110 ck_core::Language::Rust => Ok(ParseableLanguage::Rust),
111 ck_core::Language::Ruby => Ok(ParseableLanguage::Ruby),
112 ck_core::Language::Go => Ok(ParseableLanguage::Go),
113 ck_core::Language::CSharp => Ok(ParseableLanguage::CSharp),
114 _ => Err(anyhow::anyhow!(
115 "Language {:?} is not supported for parsing",
116 lang
117 )),
118 }
119 }
120}
121
122pub fn chunk_text(text: &str, language: Option<ck_core::Language>) -> Result<Vec<Chunk>> {
123 chunk_text_with_config(text, language, &ChunkConfig::default())
124}
125
126#[derive(Debug, Clone)]
128pub struct ChunkConfig {
129 pub max_tokens: usize,
131 pub stride_overlap: usize,
133 pub enable_striding: bool,
135}
136
137impl Default for ChunkConfig {
138 fn default() -> Self {
139 Self {
140 max_tokens: 8192, stride_overlap: 1024, enable_striding: true,
143 }
144 }
145}
146
147pub fn chunk_text_with_model(
149 text: &str,
150 language: Option<ck_core::Language>,
151 model_name: Option<&str>,
152) -> Result<Vec<Chunk>> {
153 let (target_tokens, overlap_tokens) = get_model_chunk_config(model_name);
154
155 let config = ChunkConfig {
157 max_tokens: target_tokens,
158 stride_overlap: overlap_tokens,
159 enable_striding: true,
160 };
161
162 chunk_text_with_config_and_model(text, language, &config, model_name)
163}
164
165pub fn chunk_text_with_config(
166 text: &str,
167 language: Option<ck_core::Language>,
168 config: &ChunkConfig,
169) -> Result<Vec<Chunk>> {
170 chunk_text_with_config_and_model(text, language, config, None)
171}
172
173fn chunk_text_with_config_and_model(
174 text: &str,
175 language: Option<ck_core::Language>,
176 config: &ChunkConfig,
177 model_name: Option<&str>,
178) -> Result<Vec<Chunk>> {
179 tracing::debug!(
180 "Chunking text with language: {:?}, length: {} chars, config: {:?}",
181 language,
182 text.len(),
183 config
184 );
185
186 let result = match language.map(ParseableLanguage::try_from) {
187 Some(Ok(lang)) => {
188 tracing::debug!("Using {} tree-sitter parser", lang);
189 chunk_language_with_model(text, lang, model_name)
190 }
191 Some(Err(_)) => {
192 tracing::debug!("Language not supported for parsing, using generic chunking strategy");
193 chunk_generic_with_token_config(text, model_name)
194 }
195 None => {
196 tracing::debug!("Using generic chunking strategy");
197 chunk_generic_with_token_config(text, model_name)
198 }
199 };
200
201 let mut chunks = result?;
202
203 if config.enable_striding {
205 chunks = apply_striding(chunks, config)?;
206 }
207
208 tracing::debug!("Successfully created {} final chunks", chunks.len());
209 Ok(chunks)
210}
211
212fn chunk_generic(text: &str) -> Result<Vec<Chunk>> {
213 chunk_generic_with_token_config(text, None)
214}
215
216fn chunk_generic_with_token_config(text: &str, model_name: Option<&str>) -> Result<Vec<Chunk>> {
217 let mut chunks = Vec::new();
218 let lines: Vec<&str> = text.lines().collect();
219
220 let (target_tokens, overlap_tokens) = get_model_chunk_config(model_name);
222
223 let avg_tokens_per_line = 10.0; let target_lines = ((target_tokens as f32) / avg_tokens_per_line) as usize;
227 let overlap_lines = ((overlap_tokens as f32) / avg_tokens_per_line) as usize;
228
229 let chunk_size = target_lines.max(5); let overlap = overlap_lines.max(1); let mut line_byte_offsets = Vec::with_capacity(lines.len() + 1);
234 line_byte_offsets.push(0);
235 let mut cumulative_offset = 0;
236 let mut byte_pos = 0;
237
238 for line in lines.iter() {
239 cumulative_offset += line.len();
240
241 let line_end_pos = byte_pos + line.len();
243 let newline_len = if line_end_pos < text.len() && text.as_bytes()[line_end_pos] == b'\r' {
244 if line_end_pos + 1 < text.len() && text.as_bytes()[line_end_pos + 1] == b'\n' {
245 2 } else {
247 1 }
249 } else if line_end_pos < text.len() && text.as_bytes()[line_end_pos] == b'\n' {
250 1 } else {
252 0 };
254
255 cumulative_offset += newline_len;
256 byte_pos = cumulative_offset;
257 line_byte_offsets.push(cumulative_offset);
258 }
259
260 let mut i = 0;
261 while i < lines.len() {
262 let end = (i + chunk_size).min(lines.len());
263 let chunk_lines = &lines[i..end];
264 let chunk_text = chunk_lines.join("\n");
265
266 let byte_start = line_byte_offsets[i];
267 let byte_end = line_byte_offsets[end];
268
269 chunks.push(Chunk {
270 span: Span {
271 byte_start,
272 byte_end,
273 line_start: i + 1,
274 line_end: end,
275 },
276 text: chunk_text,
277 chunk_type: ChunkType::Text,
278 stride_info: None,
279 });
280
281 i += chunk_size - overlap;
282 if i >= lines.len() {
283 break;
284 }
285 }
286
287 Ok(chunks)
288}
289
290fn chunk_language(text: &str, language: ParseableLanguage) -> Result<Vec<Chunk>> {
291 let mut parser = tree_sitter::Parser::new();
292
293 match language {
294 ParseableLanguage::Python => parser.set_language(&tree_sitter_python::language())?,
295 ParseableLanguage::TypeScript | ParseableLanguage::JavaScript => {
296 parser.set_language(&tree_sitter_typescript::language_typescript())?
297 }
298 ParseableLanguage::Haskell => parser.set_language(&tree_sitter_haskell::language())?,
299 ParseableLanguage::Rust => parser.set_language(&tree_sitter_rust::language())?,
300 ParseableLanguage::Ruby => parser.set_language(&tree_sitter_ruby::language())?,
301 ParseableLanguage::Go => parser.set_language(&tree_sitter_go::language())?,
302 ParseableLanguage::CSharp => parser.set_language(&tree_sitter_c_sharp::language())?,
303 }
304
305 let tree = parser
306 .parse(text, None)
307 .ok_or_else(|| anyhow::anyhow!("Failed to parse {} code", language))?;
308
309 let mut chunks = Vec::new();
310 let mut cursor = tree.root_node().walk();
311
312 extract_code_chunks(&mut cursor, text, &mut chunks, language);
313
314 if chunks.is_empty() {
315 return chunk_generic(text);
316 }
317
318 Ok(chunks)
319}
320
321fn chunk_language_with_model(
322 text: &str,
323 language: ParseableLanguage,
324 _model_name: Option<&str>,
325) -> Result<Vec<Chunk>> {
326 chunk_language(text, language)
330}
331
332fn extract_code_chunks(
333 cursor: &mut tree_sitter::TreeCursor,
334 source: &str,
335 chunks: &mut Vec<Chunk>,
336 language: ParseableLanguage,
337) {
338 let node = cursor.node();
339 let node_kind = node.kind();
340
341 let is_chunk = match language {
342 ParseableLanguage::Python => {
343 matches!(node_kind, "function_definition" | "class_definition")
344 }
345 ParseableLanguage::TypeScript | ParseableLanguage::JavaScript => matches!(
346 node_kind,
347 "function_declaration" | "class_declaration" | "method_definition" | "arrow_function"
348 ),
349 ParseableLanguage::Haskell => matches!(
350 node_kind,
351 "signature"
352 | "data_type"
353 | "newtype"
354 | "type_synonym"
355 | "type_family"
356 | "class"
357 | "instance"
358 ),
359 ParseableLanguage::Rust => matches!(
360 node_kind,
361 "function_item" | "impl_item" | "struct_item" | "enum_item" | "trait_item" | "mod_item"
362 ),
363 ParseableLanguage::Ruby => matches!(
364 node_kind,
365 "method" | "class" | "module" | "singleton_method"
366 ),
367 ParseableLanguage::Go => matches!(
368 node_kind,
369 "function_declaration"
370 | "method_declaration"
371 | "type_declaration"
372 | "var_declaration"
373 | "const_declaration"
374 ),
375 ParseableLanguage::CSharp => matches!(
376 node_kind,
377 "method_declaration"
378 | "class_declaration"
379 | "interface_declaration"
380 | "variable_declaration"
381 ),
382 };
383
384 if is_chunk {
385 let start_byte = node.start_byte();
386 let end_byte = node.end_byte();
387 let start_pos = node.start_position();
388 let end_pos = node.end_position();
389
390 let text = &source[start_byte..end_byte];
391
392 let chunk_type = match node_kind {
393 "function_definition"
394 | "function_declaration"
395 | "arrow_function"
396 | "function"
397 | "signature"
398 | "function_item"
399 | "def"
400 | "defp"
401 | "method"
402 | "singleton_method"
403 | "defn"
404 | "defn-" => ChunkType::Function,
405 "class_definition"
406 | "class_declaration"
407 | "instance_declaration"
408 | "class"
409 | "instance"
410 | "struct_item"
411 | "enum_item"
412 | "defstruct"
413 | "defrecord"
414 | "deftype"
415 | "type_declaration" => ChunkType::Class,
416 "method_definition" | "method_declaration" | "defmacro" => ChunkType::Method,
417 "data_type"
418 | "newtype"
419 | "type_synomym"
420 | "type_family"
421 | "impl_item"
422 | "trait_item"
423 | "mod_item"
424 | "defmodule"
425 | "module"
426 | "defprotocol"
427 | "interface_declaration"
428 | "ns"
429 | "var_declaration"
430 | "const_declaration"
431 | "variable_declaration" => ChunkType::Module,
432 _ => ChunkType::Text,
433 };
434
435 chunks.push(Chunk {
436 span: Span {
437 byte_start: start_byte,
438 byte_end: end_byte,
439 line_start: start_pos.row + 1,
440 line_end: end_pos.row + 1,
441 },
442 text: text.to_string(),
443 chunk_type,
444 stride_info: None,
445 });
446 }
447
448 if cursor.goto_first_child() {
449 loop {
450 extract_code_chunks(cursor, source, chunks, language);
451 if !cursor.goto_next_sibling() {
452 break;
453 }
454 }
455 cursor.goto_parent();
456 }
457}
458
459fn apply_striding(chunks: Vec<Chunk>, config: &ChunkConfig) -> Result<Vec<Chunk>> {
461 let mut result = Vec::new();
462
463 for chunk in chunks {
464 let estimated_tokens = estimate_tokens(&chunk.text);
465
466 if estimated_tokens <= config.max_tokens {
467 result.push(chunk);
469 } else {
470 tracing::debug!(
472 "Chunk with {} tokens exceeds limit of {}, applying striding",
473 estimated_tokens,
474 config.max_tokens
475 );
476
477 let strided_chunks = stride_large_chunk(chunk, config)?;
478 result.extend(strided_chunks);
479 }
480 }
481
482 Ok(result)
483}
484
485fn stride_large_chunk(chunk: Chunk, config: &ChunkConfig) -> Result<Vec<Chunk>> {
487 let text = &chunk.text;
488
489 if text.is_empty() {
491 return Ok(vec![chunk]);
492 }
493
494 let char_count = text.chars().count();
497 let estimated_tokens = estimate_tokens(text);
498 let chars_per_token = if estimated_tokens == 0 {
500 4.5 } else {
502 char_count as f32 / estimated_tokens as f32
503 };
504 let window_chars = ((config.max_tokens as f32 * 0.9) * chars_per_token) as usize; let overlap_chars = (config.stride_overlap as f32 * chars_per_token) as usize;
506 let stride_chars = window_chars.saturating_sub(overlap_chars);
507
508 if stride_chars == 0 {
509 return Err(anyhow::anyhow!("Stride size is too small"));
510 }
511
512 let char_byte_indices: Vec<(usize, char)> = text.char_indices().collect();
514 let mut strided_chunks = Vec::new();
517 let original_chunk_id = format!("{}:{}", chunk.span.byte_start, chunk.span.byte_end);
518 let mut start_char_idx = 0;
519 let mut stride_index = 0;
520
521 let total_strides = if char_count <= window_chars {
523 1
524 } else {
525 ((char_count - overlap_chars) as f32 / stride_chars as f32).ceil() as usize
526 };
527
528 while start_char_idx < char_count {
529 let end_char_idx = (start_char_idx + window_chars).min(char_count);
530
531 let start_byte_pos = char_byte_indices[start_char_idx].0;
533 let end_byte_pos = if end_char_idx < char_count {
534 char_byte_indices[end_char_idx].0
535 } else {
536 text.len()
537 };
538
539 let stride_text = &text[start_byte_pos..end_byte_pos];
540
541 let overlap_start = if stride_index > 0 { overlap_chars } else { 0 };
543 let overlap_end = if end_char_idx < char_count {
544 overlap_chars
545 } else {
546 0
547 };
548
549 let byte_offset_start = chunk.span.byte_start + start_byte_pos;
551 let byte_offset_end = chunk.span.byte_start + end_byte_pos;
552
553 let text_before_start = &text[..start_byte_pos];
555 let line_offset_start = text_before_start.lines().count().saturating_sub(1);
556 let stride_lines = stride_text.lines().count();
557
558 let stride_chunk = Chunk {
559 span: Span {
560 byte_start: byte_offset_start,
561 byte_end: byte_offset_end,
562 line_start: chunk.span.line_start + line_offset_start,
563 line_end: chunk.span.line_start
565 + line_offset_start
566 + stride_lines.saturating_sub(1),
567 },
568 text: stride_text.to_string(),
569 chunk_type: chunk.chunk_type.clone(),
570 stride_info: Some(StrideInfo {
571 original_chunk_id: original_chunk_id.clone(),
572 stride_index,
573 total_strides,
574 overlap_start,
575 overlap_end,
576 }),
577 };
578
579 strided_chunks.push(stride_chunk);
580
581 if end_char_idx >= char_count {
583 break;
584 }
585
586 start_char_idx += stride_chars;
587 stride_index += 1;
588 }
589
590 tracing::debug!(
591 "Created {} strides from chunk of {} tokens",
592 strided_chunks.len(),
593 estimate_tokens(text)
594 );
595
596 Ok(strided_chunks)
597}
598
599#[cfg(test)]
602mod tests {
603 use super::*;
604
605 #[test]
606 fn test_chunk_generic_byte_offsets() {
607 let text = "line 1\nline 2\nline 3\nline 4\nline 5";
609 let chunks = chunk_generic(text).unwrap();
610
611 assert!(!chunks.is_empty());
612
613 assert_eq!(chunks[0].span.byte_start, 0);
615
616 for chunk in &chunks {
618 let expected_len = chunk.text.len();
619 let actual_len = chunk.span.byte_end - chunk.span.byte_start;
620 assert_eq!(actual_len, expected_len);
621 }
622 }
623
624 #[test]
625 fn test_chunk_generic_large_file_performance() {
626 let lines: Vec<String> = (0..1000)
628 .map(|i| format!("Line {}: Some content here", i))
629 .collect();
630 let text = lines.join("\n");
631
632 let start = std::time::Instant::now();
633 let chunks = chunk_generic(&text).unwrap();
634 let duration = start.elapsed();
635
636 assert!(
638 duration.as_millis() < 100,
639 "Chunking took too long: {:?}",
640 duration
641 );
642 assert!(!chunks.is_empty());
643
644 for chunk in &chunks {
646 assert!(chunk.span.line_start > 0);
647 assert!(chunk.span.line_end >= chunk.span.line_start);
648 }
649 }
650
651 #[test]
652 fn test_chunk_rust() {
653 let rust_code = r#"
654pub struct Calculator {
655 memory: f64,
656}
657
658impl Calculator {
659 pub fn new() -> Self {
660 Calculator { memory: 0.0 }
661 }
662
663 pub fn add(&mut self, a: f64, b: f64) -> f64 {
664 a + b
665 }
666}
667
668fn main() {
669 let calc = Calculator::new();
670}
671
672pub mod utils {
673 pub fn helper() {}
674}
675"#;
676
677 let chunks = chunk_language(rust_code, ParseableLanguage::Rust).unwrap();
678 assert!(!chunks.is_empty());
679
680 let chunk_types: Vec<&ChunkType> = chunks.iter().map(|c| &c.chunk_type).collect();
682 assert!(chunk_types.contains(&&ChunkType::Class)); assert!(chunk_types.contains(&&ChunkType::Module)); assert!(chunk_types.contains(&&ChunkType::Function)); }
686
687 #[test]
688 fn test_chunk_ruby() {
689 let ruby_code = r#"
690class Calculator
691 def initialize
692 @memory = 0.0
693 end
694
695 def add(a, b)
696 a + b
697 end
698
699 def self.class_method
700 "class method"
701 end
702
703 private
704
705 def private_method
706 "private"
707 end
708end
709
710module Utils
711 def self.helper
712 "helper"
713 end
714end
715
716def main
717 calc = Calculator.new
718end
719"#;
720
721 let chunks = chunk_language(ruby_code, ParseableLanguage::Ruby).unwrap();
722 assert!(!chunks.is_empty());
723
724 let chunk_types: Vec<&ChunkType> = chunks.iter().map(|c| &c.chunk_type).collect();
726 assert!(chunk_types.contains(&&ChunkType::Class)); assert!(chunk_types.contains(&&ChunkType::Module)); assert!(chunk_types.contains(&&ChunkType::Function)); }
730
731 #[test]
732 fn test_language_detection_fallback() {
733 let generic_text = "Some text\nwith multiple lines\nto chunk generically";
735
736 let chunks_unknown = chunk_text(generic_text, None).unwrap();
737 let chunks_generic = chunk_generic(generic_text).unwrap();
738
739 assert_eq!(chunks_unknown.len(), chunks_generic.len());
741 assert_eq!(chunks_unknown[0].text, chunks_generic[0].text);
742 }
743
744 #[test]
745 fn test_chunk_go() {
746 let go_code = r#"
747package main
748
749import "fmt"
750
751const Pi = 3.14159
752
753var memory float64
754
755type Calculator struct {
756 memory float64
757}
758
759type Operation interface {
760 Calculate(a, b float64) float64
761}
762
763func NewCalculator() *Calculator {
764 return &Calculator{memory: 0.0}
765}
766
767func (c *Calculator) Add(a, b float64) float64 {
768 return a + b
769}
770
771func main() {
772 calc := NewCalculator()
773}
774"#;
775
776 let chunks = chunk_language(go_code, ParseableLanguage::Go).unwrap();
777 assert!(!chunks.is_empty());
778
779 let chunk_types: Vec<&ChunkType> = chunks.iter().map(|c| &c.chunk_type).collect();
781 assert!(chunk_types.contains(&&ChunkType::Module)); assert!(chunk_types.contains(&&ChunkType::Class)); assert!(chunk_types.contains(&&ChunkType::Function)); assert!(chunk_types.contains(&&ChunkType::Method)); }
786
787 #[test]
788 fn test_chunk_csharp() {
789 let csharp_code = r#"
790namespace Calculator;
791
792public interface ICalculator
793{
794 double Add(double x, double y);
795}
796
797public class Calculator
798{
799 public static const double PI = 3.14159;
800 private double _memory;
801
802 public Calculator()
803 {
804 _memory = 0.0;
805 }
806
807 public double Add(double x, double y)
808 {
809 return x + y;
810 }
811
812 public static void Main(string[] args)
813 {
814 var calc = new Calculator();
815 }
816}
817"#;
818
819 let chunks = chunk_language(csharp_code, ParseableLanguage::CSharp).unwrap();
820 assert!(!chunks.is_empty());
821
822 let chunk_types: Vec<&ChunkType> = chunks.iter().map(|c| &c.chunk_type).collect();
824 assert!(chunk_types.contains(&&ChunkType::Module)); assert!(chunk_types.contains(&&ChunkType::Class)); assert!(chunk_types.contains(&&ChunkType::Method)); }
828
829 #[test]
830 fn test_stride_large_chunk_empty_text() {
831 let empty_chunk = Chunk {
833 span: Span {
834 byte_start: 0,
835 byte_end: 0,
836 line_start: 1,
837 line_end: 1,
838 },
839 text: String::new(), chunk_type: ChunkType::Text,
841 stride_info: None,
842 };
843
844 let config = ChunkConfig::default();
845 let result = stride_large_chunk(empty_chunk.clone(), &config);
846
847 assert!(result.is_ok());
849 let chunks = result.unwrap();
850 assert_eq!(chunks.len(), 1);
851 assert_eq!(chunks[0].text, "");
852 }
853
854 #[test]
855 fn test_stride_large_chunk_zero_token_estimate() {
856 let chunk = Chunk {
858 span: Span {
859 byte_start: 0,
860 byte_end: 5,
861 line_start: 1,
862 line_end: 1,
863 },
864 text: " ".to_string(), chunk_type: ChunkType::Text,
866 stride_info: None,
867 };
868
869 let config = ChunkConfig::default();
870 let result = stride_large_chunk(chunk, &config);
871
872 assert!(result.is_ok());
874 }
875
876 #[test]
877 fn test_strided_chunk_line_calculation() {
878 let long_text = (1..=50).map(|i| format!("This is a longer line {} with more content to ensure token count is high enough", i)).collect::<Vec<_>>().join("\n");
881
882 let chunk = Chunk {
883 span: Span {
884 byte_start: 0,
885 byte_end: long_text.len(),
886 line_start: 1,
887 line_end: 50,
888 },
889 text: long_text,
890 chunk_type: ChunkType::Text,
891 stride_info: None,
892 };
893
894 let config = ChunkConfig {
895 max_tokens: 100, stride_overlap: 10, ..Default::default()
898 };
899
900 let result = stride_large_chunk(chunk, &config);
901 if let Err(e) = &result {
902 eprintln!("Stride error: {}", e);
903 }
904 assert!(result.is_ok());
905
906 let chunks = result.unwrap();
907 assert!(
908 chunks.len() > 1,
909 "Should create multiple chunks when striding"
910 );
911
912 for chunk in chunks {
913 assert!(chunk.span.line_end >= chunk.span.line_start);
916
917 let line_count = chunk.text.lines().count();
919 if line_count > 0 {
920 let calculated_line_span = chunk.span.line_end - chunk.span.line_start + 1;
921
922 assert!(
924 calculated_line_span <= line_count + 1,
925 "Line span {} should not exceed content lines {} by more than 1",
926 calculated_line_span,
927 line_count
928 );
929 }
930 }
931 }
932}