1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
use std::io::Write;
use crate::Result;
use crate::segment_encoder::SegmentEncoder;
use crate::segments::Segment;
/// A `SegmentEncoder` that writes segments as a single JSON array.
///
/// Design:
/// - Streams output directly to a `Write` implementation to avoid buffering
/// all segments in memory.
/// - The encoder is stateful so it can emit a well-formed JSON array incrementally.
///
/// Example output:
/// ```json
/// [
/// { "start": 0.0, "end": 1.2, "text": "hello" },
/// { "start": 1.2, "end": 2.5, "text": "world" }
/// ]
/// ```
pub struct JsonArrayEncoder<W: Write> {
/// The underlying writer receiving JSON output.
w: W,
/// Whether the opening `[` of the JSON array has been written.
started: bool,
/// Whether the next element will be the first element in the array.
/// This lets us correctly place commas between elements.
first: bool,
/// Whether the encoder has been closed.
/// Once closed, no further writes are allowed.
closed: bool,
}
impl<W: Write> JsonArrayEncoder<W> {
/// Create a new JSON array encoder that writes to the given writer.
///
/// At creation time:
/// - No output is written yet.
/// - The JSON array is opened lazily on the first write or on close.
pub fn new(w: W) -> Self {
Self {
w,
started: false,
first: true,
closed: false,
}
}
/// Write the opening `[` of the JSON array if it has not already been written.
///
/// Defers writing the opening bracket so that:
/// - Empty output still results in valid JSON (`[]`)
/// - Partial output is not emitted unless a segment is actually written
fn start_if_needed(&mut self) -> Result<()> {
if !self.started {
self.w.write_all(b"[")?;
self.started = true;
}
Ok(())
}
}
impl<W: Write> SegmentEncoder for JsonArrayEncoder<W> {
/// Serialize a single segment and append it to the JSON array.
fn write_segment(&mut self, seg: &Segment) -> Result<()> {
if self.closed {
return Err(crate::Error::invalid_input(
"cannot write segment: encoder is already closed",
));
}
// Ensure the JSON array has been started.
self.start_if_needed()?;
// Write a comma before every element except the first.
if !self.first {
self.w.write_all(b",")?;
}
self.first = false;
// Stream the segment directly into the writer as JSON.
serde_json::to_writer(&mut self.w, seg)?;
// Flush so streaming consumers (stdout, pipes, sockets) see output promptly.
self.w.flush()?;
Ok(())
}
/// Finalize the JSON array and flush the underlying writer.
///
/// This method is idempotent:
/// - Calling `close()` multiple times is safe.
/// - After closing, no further segments may be written.
fn close(&mut self) -> Result<()> {
if self.closed {
return Ok(());
}
// Ensure a valid JSON array even if no segments were written.
self.start_if_needed()?;
// Close the JSON array.
self.w.write_all(b"]")?;
self.w.flush()?;
self.closed = true;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn seg(start: f32, end: f32, text: &str) -> Segment {
Segment {
start_seconds: start,
end_seconds: end,
text: text.to_string(),
tokens: Vec::new(),
language_code: "en".to_string(),
next_speaker_turn: false,
}
}
#[test]
fn json_array_close_without_segments_emits_empty_array() -> anyhow::Result<()> {
let mut out = Vec::new();
let mut enc = JsonArrayEncoder::new(&mut out);
enc.close()?;
assert_eq!(std::str::from_utf8(&out)?, "[]");
Ok(())
}
#[test]
fn json_array_writes_valid_json_incrementally() -> anyhow::Result<()> {
let mut out = Vec::new();
let mut enc = JsonArrayEncoder::new(&mut out);
enc.write_segment(&seg(0.0, 1.0, "hello"))?;
enc.write_segment(&seg(1.0, 2.5, "world"))?;
enc.close()?;
let s = std::str::from_utf8(&out)?;
let parsed: serde_json::Value = serde_json::from_str(s)?;
let arr = parsed.as_array().expect("expected JSON array");
assert_eq!(arr.len(), 2);
assert_eq!(arr[0]["text"], "hello");
assert_eq!(arr[1]["text"], "world");
Ok(())
}
#[test]
fn json_array_close_is_idempotent() -> anyhow::Result<()> {
let mut out = Vec::new();
let mut enc = JsonArrayEncoder::new(&mut out);
enc.close()?;
enc.close()?;
assert_eq!(std::str::from_utf8(&out)?, "[]");
Ok(())
}
#[test]
fn json_array_write_after_close_errors() -> anyhow::Result<()> {
let mut out = Vec::new();
let mut enc = JsonArrayEncoder::new(&mut out);
enc.close()?;
let err = enc.write_segment(&seg(0.0, 1.0, "nope")).unwrap_err();
assert!(err.to_string().contains("already closed"));
Ok(())
}
}