Skip to main content

actr_web_protoc_codegen/
descriptor.rs

1//! Descriptor-based proto parsing.
2//!
3//! Runs `protoc` against the configured proto files with
4//! `--descriptor_set_out`, decodes the emitted `FileDescriptorSet` via
5//! `prost_types`, and converts the structured result into crate-local model
6//! types. This replaces the previous regex-based `.proto` text parser: the
7//! descriptor path is robust against comments, nested messages, streaming
8//! markers, and proto2/proto3 syntax differences.
9
10use crate::{ProtoField, ProtoMessage, ProtoMethod, ProtoService, error::CodegenError};
11use prost::Message;
12use prost_types::{
13    DescriptorProto, FieldDescriptorProto, FileDescriptorProto, FileDescriptorSet,
14    field_descriptor_proto,
15};
16use std::path::{Path, PathBuf};
17
18/// Invoke `protoc` to compile the given proto files into a
19/// `FileDescriptorSet`, then return the decoded structure.
20///
21/// `includes` is the list of `-I` search paths passed to `protoc`. If empty,
22/// each proto file's parent directory is used as a fallback include path so
23/// that relative imports still resolve.
24pub(crate) fn compile_to_descriptor_set(
25    proto_files: &[PathBuf],
26    includes: &[PathBuf],
27) -> crate::error::Result<FileDescriptorSet> {
28    use std::process::Command;
29
30    if proto_files.is_empty() {
31        return Ok(FileDescriptorSet::default());
32    }
33
34    // `protoc` writes the binary descriptor set to a temporary file; we then
35    // read and decode it. Using a process-unique name avoids clashes when the
36    // codegen library is used concurrently in tests.
37    let out_path = std::env::temp_dir().join(format!(
38        "actr-web-protoc-codegen-{}-{}.desc",
39        std::process::id(),
40        // Use the files' hash to disambiguate parallel compile calls.
41        fnv_hash(proto_files),
42    ));
43
44    let mut cmd = Command::new("protoc");
45    cmd.arg("--include_imports")
46        .arg("--include_source_info")
47        .arg(format!("--descriptor_set_out={}", out_path.display()));
48
49    let mut seen_includes: Vec<PathBuf> = Vec::new();
50    for inc in includes {
51        if !seen_includes.iter().any(|p| p == inc) {
52            seen_includes.push(inc.clone());
53        }
54    }
55    // Fallback: ensure each proto file is reachable via at least its parent,
56    // so relative imports in the proto resolve even when the caller did not
57    // pass an explicit include root.
58    for proto in proto_files {
59        if let Some(parent) = proto.parent().filter(|p| !p.as_os_str().is_empty()) {
60            let parent = parent.to_path_buf();
61            if !seen_includes.iter().any(|p| p == &parent) {
62                seen_includes.push(parent);
63            }
64        }
65    }
66
67    for inc in &seen_includes {
68        cmd.arg("-I").arg(inc);
69    }
70
71    for proto in proto_files {
72        cmd.arg(proto);
73    }
74
75    tracing::debug!("Running: {:?}", cmd);
76
77    let output = cmd
78        .output()
79        .map_err(|e| CodegenError::proto_parse(format!("failed to spawn protoc: {e}")))?;
80
81    if !output.status.success() {
82        // Best-effort cleanup; the file may not exist.
83        let _ = std::fs::remove_file(&out_path);
84        return Err(CodegenError::proto_parse(format!(
85            "protoc failed (status {}): {}",
86            output.status,
87            String::from_utf8_lossy(&output.stderr)
88        )));
89    }
90
91    let bytes = std::fs::read(&out_path).map_err(CodegenError::IoError)?;
92    let _ = std::fs::remove_file(&out_path);
93
94    let set = FileDescriptorSet::decode(bytes.as_slice())
95        .map_err(|e| CodegenError::proto_parse(format!("FileDescriptorSet decode failed: {e}")))?;
96
97    Ok(set)
98}
99
100/// Convert a `FileDescriptorProto` into the crate's `ProtoService` model.
101///
102/// Returns `None` when the file declares no service — callers treat this as
103/// "nothing to generate from this file".
104pub fn file_to_proto_service(file: &FileDescriptorProto) -> Option<ProtoService> {
105    // The crate currently assumes one service per proto file, matching the
106    // sibling `tools/protoc-gen/rust/` plugin. If a file declares more than
107    // one service only the first is used; remaining services are logged.
108    let service = file.service.first()?;
109    if file.service.len() > 1 {
110        tracing::warn!(
111            "{}: declares {} services, only the first ({}) is emitted",
112            file.name(),
113            file.service.len(),
114            service.name()
115        );
116    }
117
118    let package = if file.package().is_empty() {
119        "default".to_string()
120    } else {
121        file.package().to_string()
122    };
123
124    let methods = service
125        .method
126        .iter()
127        .map(method_to_proto_method)
128        .collect::<Vec<_>>();
129
130    let messages = file
131        .message_type
132        .iter()
133        .map(message_to_proto_message)
134        .collect::<Vec<_>>();
135
136    Some(ProtoService {
137        name: service.name().to_string(),
138        package,
139        methods,
140        messages,
141    })
142}
143
144/// Locate a top-level message by name and return its field list in the form
145/// `(field_name, proto_wire_type_token)` expected by the WASM scaffold
146/// generator.
147pub(crate) fn message_fields_for_scaffold(
148    file: &FileDescriptorProto,
149    message_name: &str,
150) -> Option<Vec<(String, String)>> {
151    let desc = file
152        .message_type
153        .iter()
154        .find(|m| m.name() == message_name)?;
155    Some(
156        desc.field
157            .iter()
158            .map(|f| (f.name().to_string(), scalar_type_token(f)))
159            .collect(),
160    )
161}
162
163/// Convert a `MethodDescriptorProto` into the crate's `ProtoMethod`.
164fn method_to_proto_method(method: &prost_types::MethodDescriptorProto) -> ProtoMethod {
165    ProtoMethod {
166        name: method.name().to_string(),
167        input_type: short_type_name(method.input_type()),
168        output_type: short_type_name(method.output_type()),
169        is_streaming: method.client_streaming() || method.server_streaming(),
170    }
171}
172
173/// Convert a top-level `DescriptorProto` into a `ProtoMessage`, flattening
174/// fields. Nested message types are ignored — they are not supported by the
175/// rest of the generator and preserving them here would silently change
176/// behaviour from the previous regex implementation, which also ignored
177/// nested message bodies.
178fn message_to_proto_message(message: &DescriptorProto) -> ProtoMessage {
179    let fields = message.field.iter().map(field_to_proto_field).collect();
180    ProtoMessage {
181        name: message.name().to_string(),
182        fields,
183    }
184}
185
186/// Convert a `FieldDescriptorProto` into the crate's `ProtoField`.
187///
188/// `is_optional` is only set when the source declared the field with an
189/// explicit `optional` keyword (proto2 or proto3 `optional`). Plain proto3
190/// scalars carry `Label::Optional` on the descriptor but are not surfaced as
191/// optional, matching the behaviour of the previous regex text parser.
192fn field_to_proto_field(field: &FieldDescriptorProto) -> ProtoField {
193    use field_descriptor_proto::Label;
194
195    let is_repeated = field.label() == Label::Repeated;
196    let is_optional = !is_repeated && field.proto3_optional();
197
198    ProtoField {
199        name: field.name().to_string(),
200        field_type: scalar_type_token(field),
201        number: field.number() as u32,
202        is_repeated,
203        is_optional,
204    }
205}
206
207/// Render the type name for a field as the token the rest of the codegen
208/// expects (e.g. "string", "int32", or a message type's short name).
209fn scalar_type_token(field: &FieldDescriptorProto) -> String {
210    use field_descriptor_proto::Type;
211
212    match field.r#type() {
213        Type::Double => "double".to_string(),
214        Type::Float => "float".to_string(),
215        Type::Int64 => "int64".to_string(),
216        Type::Uint64 => "uint64".to_string(),
217        Type::Int32 => "int32".to_string(),
218        Type::Fixed64 => "fixed64".to_string(),
219        Type::Fixed32 => "fixed32".to_string(),
220        Type::Bool => "bool".to_string(),
221        Type::String => "string".to_string(),
222        Type::Bytes => "bytes".to_string(),
223        Type::Uint32 => "uint32".to_string(),
224        Type::Sfixed32 => "sfixed32".to_string(),
225        Type::Sfixed64 => "sfixed64".to_string(),
226        Type::Sint32 => "sint32".to_string(),
227        Type::Sint64 => "sint64".to_string(),
228        Type::Message | Type::Enum | Type::Group => short_type_name(field.type_name()),
229    }
230}
231
232/// Strip the package prefix and leading dot from a fully-qualified proto type
233/// name, yielding the short symbol that downstream TypeScript / Rust emitters
234/// consume.
235fn short_type_name(raw: &str) -> String {
236    raw.trim_start_matches('.')
237        .rsplit('.')
238        .next()
239        .unwrap_or(raw)
240        .to_string()
241}
242
243/// Find the `FileDescriptorProto` matching a caller-supplied proto path.
244///
245/// `protoc` records files by the name it was invoked with (typically the
246/// path relative to an `-I` include root). We therefore try a few
247/// normalisations: exact match, basename match, and `ends_with` match.
248pub(crate) fn find_file<'a>(
249    set: &'a FileDescriptorSet,
250    proto_path: &Path,
251) -> Option<&'a FileDescriptorProto> {
252    let file_name = proto_path.file_name().and_then(|s| s.to_str());
253    set.file.iter().find(|f| {
254        let n = f.name();
255        n == proto_path.to_string_lossy()
256            || file_name.is_some_and(|b| n == b)
257            || file_name.is_some_and(|b| n.ends_with(b))
258    })
259}
260
261/// Minimal FNV-1a 32-bit hash of the proto paths. Used solely to build a
262/// per-invocation unique temp filename; not security-sensitive.
263fn fnv_hash(paths: &[PathBuf]) -> u32 {
264    let mut h: u32 = 0x811c_9dc5;
265    for p in paths {
266        for b in p.as_os_str().to_string_lossy().as_bytes() {
267            h ^= *b as u32;
268            h = h.wrapping_mul(0x0100_0193);
269        }
270        h ^= 0x2f; // path separator in hash
271        h = h.wrapping_mul(0x0100_0193);
272    }
273    h
274}