use crate::helpers::{write_varint_ohb, WT_END_GROUP, WT_LEN, WT_START_GROUP};
use memchr::memrchr;
mod encode_annotation;
mod fields;
mod frame;
mod placeholder;
#[cfg(test)]
use encode_annotation::parse_field_decl_into;
use encode_annotation::{parse_annotation, Ann};
use fields::{encode_packed_elem, encode_scalar_line, write_tag_ohb_local};
use frame::Frame;
use placeholder::{compact, fill_placeholder, write_placeholder};
#[inline]
fn extract_field_number(lhs: &str, ann: &Ann<'_>) -> u64 {
if let Some(fn_) = ann.field_number {
return fn_;
}
lhs.trim().parse::<u64>().unwrap_or(0)
}
#[inline]
fn split_at_annotation(line: &str) -> (&str, &str) {
let b = line.as_bytes();
let mut end = b.len();
while let Some(p) = memrchr(b'#', &b[..end]) {
if p >= 2
&& b[p - 1] == b' '
&& b[p - 2] == b' '
&& p + 2 < b.len()
&& b[p + 1] == b'@'
&& b[p + 2] == b' '
{
return (&line[..p - 2], &line[p + 3..]);
}
if b[..p].iter().all(|c| *c == b' ' || *c == b'\t')
&& p + 2 < b.len()
&& b[p + 1] == b'@'
&& b[p + 2] == b' '
{
return ("", &line[p + 3..]);
}
end = p; }
(line, "")
}
pub fn encode_text_to_binary(text: &[u8]) -> Vec<u8> {
let capacity = (text.len() / 6).max(64);
let mut out = Vec::with_capacity(capacity);
let mut stack: Vec<Frame> = Vec::new();
let mut first_placeholder: Option<usize> = None;
let mut last_placeholder: Option<usize> = None;
let mut packed_field_number: u64 = 0;
let mut packed_tag_ohb: Option<u64> = None;
let mut packed_len_ohb: Option<u64> = None;
let mut packed_remaining: usize = 0;
let mut packed_payload: Vec<u8> = Vec::new();
let text_str = match std::str::from_utf8(text) {
Ok(s) => s,
Err(_) => return out,
};
let mut lines = text_str.lines();
lines.next();
for line in lines {
let line = line.trim_end();
if line.is_empty() {
continue;
}
let trimmed = line.trim_start();
if !trimmed.is_empty() && trimmed.bytes().all(|b| b == b'}' || b == b' ') {
for b in trimmed.bytes() {
if b == b'}' {
match stack.pop() {
Some(Frame::Message {
placeholder_start,
ohb,
content_start,
acw,
}) => {
let total_waste = fill_placeholder(
&mut out,
placeholder_start,
ohb,
content_start,
acw,
);
if let Some(parent) = stack.last_mut() {
*parent.acw_mut() += total_waste;
}
}
Some(Frame::Group {
field_number,
open_ended,
mismatched_end,
end_tag_ohb,
acw,
}) => {
if !open_ended {
let end_fn = mismatched_end.unwrap_or(field_number);
write_tag_ohb_local(end_fn, WT_END_GROUP, end_tag_ohb, &mut out);
}
if acw > 0 {
if let Some(parent) = stack.last_mut() {
*parent.acw_mut() += acw;
}
}
}
None => { }
}
}
}
continue;
}
let (value_part, ann_str) = split_at_annotation(line);
let vp_trimmed = value_part.trim_end();
let is_open_brace = vp_trimmed.ends_with(" {") || vp_trimmed == "{";
if is_open_brace {
let ann = parse_annotation(ann_str);
let lhs = vp_trimmed.trim_start().trim_end_matches('{').trim_end();
let field_number = extract_field_number(lhs, &ann);
let tag_ohb = ann.tag_overhang_count;
if ann.wire_type == "group" {
write_tag_ohb_local(field_number, WT_START_GROUP, tag_ohb, &mut out);
stack.push(Frame::Group {
field_number,
open_ended: ann.open_ended_group,
mismatched_end: ann.mismatched_group_end,
end_tag_ohb: ann.end_tag_overhang_count,
acw: 0,
});
} else {
write_tag_ohb_local(field_number, WT_LEN, tag_ohb, &mut out);
let ohb = ann.length_overhang_count.unwrap_or(0) as usize;
let (ph_start, content_start) =
write_placeholder(&mut out, ohb, &mut first_placeholder, &mut last_placeholder);
stack.push(Frame::Message {
placeholder_start: ph_start,
ohb,
content_start,
acw: 0,
});
}
continue;
}
let trimmed_vp = value_part.trim();
if trimmed_vp.is_empty() && !ann_str.is_empty() {
let ann = parse_annotation(ann_str);
if let Some(0) = ann.pack_size {
write_tag_ohb_local(
ann.field_number.unwrap_or(0),
WT_LEN,
ann.tag_overhang_count,
&mut out,
);
write_varint_ohb(0, ann.length_overhang_count, &mut out);
}
continue;
}
let Some(colon_pos) = value_part.find(':') else {
continue;
};
let lhs = value_part[..colon_pos].trim_start(); let value_str = value_part[colon_pos + 1..].trim();
let ann = parse_annotation(ann_str);
let field_number = extract_field_number(lhs, &ann);
if packed_remaining > 0 {
encode_packed_elem(value_str, &ann, &mut packed_payload);
packed_remaining -= 1;
if packed_remaining == 0 {
write_tag_ohb_local(packed_field_number, WT_LEN, packed_tag_ohb, &mut out);
write_varint_ohb(packed_payload.len() as u64, packed_len_ohb, &mut out);
out.extend_from_slice(&packed_payload);
packed_payload.clear();
}
continue;
}
if ann.is_packed {
if let Some(n) = ann.pack_size {
if n == 0 {
write_tag_ohb_local(field_number, WT_LEN, ann.tag_overhang_count, &mut out);
write_varint_ohb(0, ann.length_overhang_count, &mut out);
} else {
packed_field_number = field_number;
packed_tag_ohb = ann.tag_overhang_count;
packed_len_ohb = ann.length_overhang_count;
packed_remaining = n - 1; packed_payload.clear();
encode_packed_elem(value_str, &ann, &mut packed_payload);
if packed_remaining == 0 {
write_tag_ohb_local(packed_field_number, WT_LEN, packed_tag_ohb, &mut out);
write_varint_ohb(packed_payload.len() as u64, packed_len_ohb, &mut out);
out.extend_from_slice(&packed_payload);
packed_payload.clear();
}
}
continue;
}
}
encode_scalar_line(field_number, value_str, &ann, &mut out);
}
if let Some(first_ph) = first_placeholder {
compact(&mut out, first_ph);
}
#[cfg(debug_assertions)]
{
let ratio = out.len() as f64 / text.len().max(1) as f64;
eprintln!(
"[encode_text] input_len={} output_len={} ratio={:.2}",
text.len(),
out.len(),
ratio
);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn split_bare() {
let (field, ann) = split_at_annotation("name: 42");
assert_eq!(field, "name: 42");
assert_eq!(ann, "");
}
#[test]
fn split_hash_at_space() {
let (field, ann) = split_at_annotation("name: 42 #@ varint = 1");
assert_eq!(field, "name: 42");
assert_eq!(ann, "varint = 1");
}
#[test]
fn split_hash_only() {
let (field, ann) = split_at_annotation("name: 42 #");
assert_eq!(field, "name: 42 #");
assert_eq!(ann, "");
}
#[test]
fn split_hash_at_end() {
let (field, ann) = split_at_annotation("name: 42 #@");
assert_eq!(field, "name: 42 #@");
assert_eq!(ann, "");
}
#[test]
fn split_hash_at_no_space() {
let (field, ann) = split_at_annotation("name: 42 #@x");
assert_eq!(field, "name: 42 #@x");
assert_eq!(ann, "");
}
fn make_ann() -> Ann<'static> {
Ann {
wire_type: "",
field_type: "",
field_number: None,
is_packed: false,
tag_overhang_count: None,
value_overhang_count: None,
length_overhang_count: None,
missing_bytes_count: None,
mismatched_group_end: None,
open_ended_group: false,
end_tag_overhang_count: None,
records_overhung_count: vec![],
neg_int32_truncated: false,
records_neg_int32_truncated: vec![],
enum_scalar_value: None,
enum_packed_values: vec![],
nan_bits: None,
pack_size: None,
elem_ohb: None,
elem_neg_trunc: false,
}
}
#[test]
fn parse_scalar_enum() {
let mut ann = make_ann();
parse_field_decl_into("Type(9) = 5", &mut ann);
assert_eq!(ann.field_type, "enum");
assert_eq!(ann.enum_scalar_value, Some(9));
assert_eq!(ann.field_number, Some(5));
}
#[test]
fn parse_scalar_enum_neg() {
let mut ann = make_ann();
parse_field_decl_into("Color(-1) = 3", &mut ann);
assert_eq!(ann.field_type, "enum");
assert_eq!(ann.enum_scalar_value, Some(-1));
assert_eq!(ann.field_number, Some(3));
}
#[test]
fn parse_packed_enum() {
let mut ann = make_ann();
parse_field_decl_into("Label([1, 2, 3]) [packed=true] = 4", &mut ann);
assert_eq!(ann.field_type, "enum");
assert!(ann.is_packed);
assert_eq!(ann.enum_packed_values, vec![1, 2, 3]);
assert_eq!(ann.field_number, Some(4));
}
#[test]
fn parse_primitive_int32() {
let mut ann = make_ann();
parse_field_decl_into("int32 = 25", &mut ann);
assert_eq!(ann.field_type, "int32");
assert_eq!(ann.field_number, Some(25));
assert_eq!(ann.enum_scalar_value, None);
}
#[test]
fn parse_enum_named_float() {
let mut ann = make_ann();
parse_field_decl_into("float(1) = 1", &mut ann);
assert_eq!(
ann.field_type, "enum",
"enum named 'float' must set field_type='enum', not 'float'"
);
assert_eq!(ann.enum_scalar_value, Some(1));
}
#[test]
fn enum_unknown_encodes_correctly() {
let input = b"#@ prototext: protoc\nkind: 99 #@ Type(99) = 1; ENUM_UNKNOWN\n";
let wire = encode_text_to_binary(input);
assert_eq!(
wire,
vec![0x08, 0x63],
"ENUM_UNKNOWN field 1 value 99: expected [0x08, 0x63]"
);
}
}