prototext_core/serialize/encode_text/
mod.rs1use crate::helpers::{write_varint_ohb, WT_END_GROUP, WT_LEN, WT_START_GROUP};
7use memchr::memrchr;
8
9mod encode_annotation;
10mod fields;
11mod frame;
12mod placeholder;
13
14#[cfg(test)]
15use encode_annotation::parse_field_decl_into;
16use encode_annotation::{parse_annotation, Ann};
17use fields::{encode_packed_elem, encode_scalar_line, write_tag_ohb_local};
18use frame::Frame;
19use placeholder::{compact, fill_placeholder, write_placeholder};
20
21#[inline]
27fn extract_field_number(lhs: &str, ann: &Ann<'_>) -> u64 {
28 if let Some(fn_) = ann.field_number {
29 return fn_;
30 }
31 lhs.trim().parse::<u64>().unwrap_or(0)
32}
33
34#[inline]
39fn split_at_annotation(line: &str) -> (&str, &str) {
40 let b = line.as_bytes();
44 let mut end = b.len();
45 while let Some(p) = memrchr(b'#', &b[..end]) {
46 if p >= 2
47 && b[p - 1] == b' '
48 && b[p - 2] == b' '
49 && p + 2 < b.len()
50 && b[p + 1] == b'@'
51 && b[p + 2] == b' '
52 {
53 return (&line[..p - 2], &line[p + 3..]);
55 }
56 if b[..p].iter().all(|c| *c == b' ' || *c == b'\t')
59 && p + 2 < b.len()
60 && b[p + 1] == b'@'
61 && b[p + 2] == b' '
62 {
63 return ("", &line[p + 3..]);
64 }
65 end = p; }
67 (line, "")
68}
69
70pub fn encode_text_to_binary(text: &[u8]) -> Vec<u8> {
81 let capacity = (text.len() / 6).max(64);
82 let mut out = Vec::with_capacity(capacity);
83
84 let mut stack: Vec<Frame> = Vec::new();
85 let mut first_placeholder: Option<usize> = None;
86 let mut last_placeholder: Option<usize> = None;
87
88 let mut packed_field_number: u64 = 0;
96 let mut packed_tag_ohb: Option<u64> = None;
97 let mut packed_len_ohb: Option<u64> = None;
98 let mut packed_remaining: usize = 0;
99 let mut packed_payload: Vec<u8> = Vec::new();
100
101 let text_str = match std::str::from_utf8(text) {
103 Ok(s) => s,
104 Err(_) => return out,
105 };
106
107 let mut lines = text_str.lines();
108
109 lines.next();
111
112 for line in lines {
113 let line = line.trim_end(); if line.is_empty() {
116 continue;
117 }
118
119 let trimmed = line.trim_start();
128 if !trimmed.is_empty() && trimmed.bytes().all(|b| b == b'}' || b == b' ') {
129 for b in trimmed.bytes() {
130 if b == b'}' {
131 match stack.pop() {
132 Some(Frame::Message {
133 placeholder_start,
134 ohb,
135 content_start,
136 acw,
137 }) => {
138 let total_waste = fill_placeholder(
139 &mut out,
140 placeholder_start,
141 ohb,
142 content_start,
143 acw,
144 );
145 if let Some(parent) = stack.last_mut() {
147 *parent.acw_mut() += total_waste;
148 }
149 }
150 Some(Frame::Group {
151 field_number,
152 open_ended,
153 mismatched_end,
154 end_tag_ohb,
155 acw,
156 }) => {
157 if !open_ended {
158 let end_fn = mismatched_end.unwrap_or(field_number);
159 write_tag_ohb_local(end_fn, WT_END_GROUP, end_tag_ohb, &mut out);
160 }
161 if acw > 0 {
163 if let Some(parent) = stack.last_mut() {
164 *parent.acw_mut() += acw;
165 }
166 }
167 }
168 None => { }
169 }
170 }
171 }
172 continue;
173 }
174
175 let (value_part, ann_str) = split_at_annotation(line);
177
178 let vp_trimmed = value_part.trim_end();
182 let is_open_brace = vp_trimmed.ends_with(" {") || vp_trimmed == "{";
183
184 if is_open_brace {
185 let ann = parse_annotation(ann_str);
186
187 let lhs = vp_trimmed.trim_start().trim_end_matches('{').trim_end();
189
190 let field_number = extract_field_number(lhs, &ann);
191 let tag_ohb = ann.tag_overhang_count;
192
193 if ann.wire_type == "group" {
194 write_tag_ohb_local(field_number, WT_START_GROUP, tag_ohb, &mut out);
195 stack.push(Frame::Group {
196 field_number,
197 open_ended: ann.open_ended_group,
198 mismatched_end: ann.mismatched_group_end,
199 end_tag_ohb: ann.end_tag_overhang_count,
200 acw: 0,
201 });
202 } else {
203 write_tag_ohb_local(field_number, WT_LEN, tag_ohb, &mut out);
205 let ohb = ann.length_overhang_count.unwrap_or(0) as usize;
206 let (ph_start, content_start) =
207 write_placeholder(&mut out, ohb, &mut first_placeholder, &mut last_placeholder);
208 stack.push(Frame::Message {
209 placeholder_start: ph_start,
210 ohb,
211 content_start,
212 acw: 0,
213 });
214 }
215 continue;
216 }
217
218 let trimmed_vp = value_part.trim();
223 if trimmed_vp.is_empty() && !ann_str.is_empty() {
224 let ann = parse_annotation(ann_str);
226 if let Some(0) = ann.pack_size {
227 write_tag_ohb_local(
229 ann.field_number.unwrap_or(0),
230 WT_LEN,
231 ann.tag_overhang_count,
232 &mut out,
233 );
234 write_varint_ohb(0, ann.length_overhang_count, &mut out);
235 }
236 continue;
237 }
238
239 let Some(colon_pos) = value_part.find(':') else {
241 continue;
242 };
243 let lhs = value_part[..colon_pos].trim_start(); let value_str = value_part[colon_pos + 1..].trim();
245
246 let ann = parse_annotation(ann_str);
247 let field_number = extract_field_number(lhs, &ann);
248
249 if packed_remaining > 0 {
251 encode_packed_elem(value_str, &ann, &mut packed_payload);
252 packed_remaining -= 1;
253 if packed_remaining == 0 {
254 write_tag_ohb_local(packed_field_number, WT_LEN, packed_tag_ohb, &mut out);
256 write_varint_ohb(packed_payload.len() as u64, packed_len_ohb, &mut out);
257 out.extend_from_slice(&packed_payload);
258 packed_payload.clear();
259 }
260 continue;
261 }
262
263 if ann.is_packed {
265 if let Some(n) = ann.pack_size {
266 if n == 0 {
267 write_tag_ohb_local(field_number, WT_LEN, ann.tag_overhang_count, &mut out);
269 write_varint_ohb(0, ann.length_overhang_count, &mut out);
270 } else {
271 packed_field_number = field_number;
273 packed_tag_ohb = ann.tag_overhang_count;
274 packed_len_ohb = ann.length_overhang_count;
275 packed_remaining = n - 1; packed_payload.clear();
277 encode_packed_elem(value_str, &ann, &mut packed_payload);
278 if packed_remaining == 0 {
279 write_tag_ohb_local(packed_field_number, WT_LEN, packed_tag_ohb, &mut out);
281 write_varint_ohb(packed_payload.len() as u64, packed_len_ohb, &mut out);
282 out.extend_from_slice(&packed_payload);
283 packed_payload.clear();
284 }
285 }
286 continue;
287 }
288 }
289
290 encode_scalar_line(field_number, value_str, &ann, &mut out);
291 }
292
293 if let Some(first_ph) = first_placeholder {
296 compact(&mut out, first_ph);
297 }
298
299 #[cfg(debug_assertions)]
301 {
302 let ratio = out.len() as f64 / text.len().max(1) as f64;
303 eprintln!(
304 "[encode_text] input_len={} output_len={} ratio={:.2}",
305 text.len(),
306 out.len(),
307 ratio
308 );
309 }
310
311 out
312}
313
314#[cfg(test)]
317mod tests {
318 use super::*;
319
320 #[test]
323 fn split_bare() {
324 let (field, ann) = split_at_annotation("name: 42");
325 assert_eq!(field, "name: 42");
326 assert_eq!(ann, "");
327 }
328
329 #[test]
330 fn split_hash_at_space() {
331 let (field, ann) = split_at_annotation("name: 42 #@ varint = 1");
332 assert_eq!(field, "name: 42");
333 assert_eq!(ann, "varint = 1");
334 }
335
336 #[test]
337 fn split_hash_only() {
338 let (field, ann) = split_at_annotation("name: 42 #");
340 assert_eq!(field, "name: 42 #");
341 assert_eq!(ann, "");
342 }
343
344 #[test]
345 fn split_hash_at_end() {
346 let (field, ann) = split_at_annotation("name: 42 #@");
348 assert_eq!(field, "name: 42 #@");
349 assert_eq!(ann, "");
350 }
351
352 #[test]
353 fn split_hash_at_no_space() {
354 let (field, ann) = split_at_annotation("name: 42 #@x");
356 assert_eq!(field, "name: 42 #@x");
357 assert_eq!(ann, "");
358 }
359
360 fn make_ann() -> Ann<'static> {
363 Ann {
364 wire_type: "",
365 field_type: "",
366 field_number: None,
367 is_packed: false,
368 tag_overhang_count: None,
369 value_overhang_count: None,
370 length_overhang_count: None,
371 missing_bytes_count: None,
372 mismatched_group_end: None,
373 open_ended_group: false,
374 end_tag_overhang_count: None,
375 records_overhung_count: vec![],
376 neg_int32_truncated: false,
377 records_neg_int32_truncated: vec![],
378 enum_scalar_value: None,
379 enum_packed_values: vec![],
380 nan_bits: None,
381 pack_size: None,
382 elem_ohb: None,
383 elem_neg_trunc: false,
384 }
385 }
386
387 #[test]
388 fn parse_scalar_enum() {
389 let mut ann = make_ann();
390 parse_field_decl_into("Type(9) = 5", &mut ann);
391 assert_eq!(ann.field_type, "enum");
392 assert_eq!(ann.enum_scalar_value, Some(9));
393 assert_eq!(ann.field_number, Some(5));
394 }
395
396 #[test]
397 fn parse_scalar_enum_neg() {
398 let mut ann = make_ann();
399 parse_field_decl_into("Color(-1) = 3", &mut ann);
400 assert_eq!(ann.field_type, "enum");
401 assert_eq!(ann.enum_scalar_value, Some(-1));
402 assert_eq!(ann.field_number, Some(3));
403 }
404
405 #[test]
406 fn parse_packed_enum() {
407 let mut ann = make_ann();
408 parse_field_decl_into("Label([1, 2, 3]) [packed=true] = 4", &mut ann);
409 assert_eq!(ann.field_type, "enum");
410 assert!(ann.is_packed);
411 assert_eq!(ann.enum_packed_values, vec![1, 2, 3]);
412 assert_eq!(ann.field_number, Some(4));
413 }
414
415 #[test]
416 fn parse_primitive_int32() {
417 let mut ann = make_ann();
418 parse_field_decl_into("int32 = 25", &mut ann);
419 assert_eq!(ann.field_type, "int32");
420 assert_eq!(ann.field_number, Some(25));
421 assert_eq!(ann.enum_scalar_value, None);
422 }
423
424 #[test]
425 fn parse_enum_named_float() {
426 let mut ann = make_ann();
429 parse_field_decl_into("float(1) = 1", &mut ann);
430 assert_eq!(
431 ann.field_type, "enum",
432 "enum named 'float' must set field_type='enum', not 'float'"
433 );
434 assert_eq!(ann.enum_scalar_value, Some(1));
435 }
436
437 #[test]
440 fn enum_unknown_encodes_correctly() {
441 let input = b"#@ prototext: protoc\nkind: 99 #@ Type(99) = 1; ENUM_UNKNOWN\n";
445 let wire = encode_text_to_binary(input);
446 assert_eq!(
447 wire,
448 vec![0x08, 0x63],
449 "ENUM_UNKNOWN field 1 value 99: expected [0x08, 0x63]"
450 );
451 }
452}