sakurs_core/api/
output.rs1use std::time::Duration;
4
5#[derive(Debug, Clone)]
7pub struct Output {
8 pub boundaries: Vec<Boundary>,
10 pub metadata: ProcessingMetadata,
12}
13
14#[derive(Debug, Clone)]
16pub struct Boundary {
17 pub offset: usize,
19 pub char_offset: usize,
21 pub confidence: f32,
23 pub context: Option<BoundaryContext>,
25}
26
27#[derive(Debug, Clone)]
29pub struct BoundaryContext {
30 pub before: String,
32 pub after: String,
34 pub reason: String,
36}
37
38#[derive(Debug, Clone)]
40pub struct ProcessingMetadata {
41 pub duration: Duration,
43 pub strategy_used: String,
45 pub chunks_processed: usize,
47 pub memory_peak: usize,
49 pub stats: ProcessingStats,
51}
52
53#[derive(Debug, Clone)]
55pub struct ProcessingStats {
56 pub bytes_processed: usize,
58 pub chars_processed: usize,
60 pub sentence_count: usize,
62 pub avg_sentence_length: f32,
64}
65
66impl Output {
67 pub(crate) fn from_delta_stack_result(
69 result: crate::application::DeltaStackResult,
70 text: &str,
71 duration: Duration,
72 ) -> Self {
73 let char_boundaries = Self::calculate_char_offsets(text, &result.boundaries);
75
76 let boundaries = result
77 .boundaries
78 .into_iter()
79 .zip(char_boundaries)
80 .map(|(offset, char_offset)| Boundary {
81 offset,
82 char_offset,
83 confidence: 1.0, context: None,
85 })
86 .collect::<Vec<_>>();
87
88 let sentence_count = boundaries.len();
89 let avg_sentence_length = if sentence_count > 0 {
90 text.chars().count() as f32 / sentence_count as f32
91 } else {
92 0.0
93 };
94
95 let strategy_used = if result.thread_count > 1 {
97 format!("parallel ({} threads)", result.thread_count)
98 } else {
99 "sequential".to_string()
100 };
101
102 Self {
103 boundaries,
104 metadata: ProcessingMetadata {
105 duration,
106 strategy_used,
107 chunks_processed: result.chunk_count,
108 memory_peak: 0, stats: ProcessingStats {
110 bytes_processed: text.len(),
111 chars_processed: text.chars().count(),
112 sentence_count,
113 avg_sentence_length,
114 },
115 },
116 }
117 }
118
119 fn calculate_char_offsets(text: &str, byte_offsets: &[usize]) -> Vec<usize> {
121 let mut char_offsets = Vec::with_capacity(byte_offsets.len());
122 let mut char_count = 0;
123 let mut byte_count = 0;
124
125 for (i, ch) in text.chars().enumerate() {
126 if byte_offsets.contains(&byte_count) {
127 char_offsets.push(i);
128 }
129 byte_count += ch.len_utf8();
130 char_count += 1;
131 }
132
133 if byte_offsets.contains(&byte_count) {
135 char_offsets.push(char_count);
136 }
137
138 char_offsets
139 }
140}