java_signatures/lib.rs
1//! Validates/Parses Java Type Signatures according to the syntax
2//! specified by the [JVM speicification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1).
3//!
4//! This crate does _not_ attempt to parse entire java classfiles.
5//! Instead, it focuses merely on parsing signature strings located as
6//! attributes in classfiles. For parse classfiles themselves we do
7//! already have a number of very nice crates on
8//! [crates.io.](https://crates.io/search?q=%23classfile%20%23java)
9//! Only some of them do parse the signature string and provide a
10//! model over them. This crate is supposed to supplement those
11//! classfile parsers which reveal the signatures as pure strings.
12//!
13//! The signature parsers provided by this create are strinct in the
14//! sense thaat the entirely input string must be matched according to
15//! the syntax rules. Leaing (whitespace) to trailing characters are
16//! not tolerated.
17//!
18//! Example:
19//! ```rust
20//! use java_signatures::{parse_class_signature, ReferenceType, ClassType, SimpleClassType};
21//!
22//! // ~ a signature corresponding to a `class Bar<T extends Serializable & Comparable<T>> {..}`
23//! // ~ to be obtained from a classfile using a corresponding parser, for example `cafebabe`
24//! let s = "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;";
25//! match parse_class_signature(s) {
26//! Ok(parsed) => {
27//! // ~ access to the individual parts of the signature
28//! assert_eq!(1, parsed.type_params.len());
29//! assert_eq!("T", parsed.type_params[0].name);
30//! assert!(parsed.type_params[0].class_bound.is_none());
31//! assert_eq!(2, parsed.type_params[0].iface_bounds.len());
32//! assert!(matches!(
33//! &parsed.type_params[0].iface_bounds[0],
34//! ReferenceType::Class(ClassType {
35//! base: SimpleClassType {
36//! name: "java/io/Serializable",
37//! ..
38//! },
39//! ..
40//! })
41//! ));
42//! // ...
43//!
44//! // ~ the `Display` implementation of the parsed
45//! // signature will produce the original signature
46//! // string again
47//! assert_eq!(s, format!("{parsed}"));
48//! }
49//! Err(e) => {
50//! eprintln!("invalid class signature:");
51//! eprintln!("> {}", e.signature());
52//! eprintln!("> {}^-- {e}", " ".repeat(e.position()));
53//! }
54//! }
55//! ```
56
57mod display;
58mod internal;
59
60// --------------------------------------------------------------------
61
62use std::fmt::Display;
63
64/// A parsed field signature; encodes the (possibly parameterized)
65/// type of a field, formal parameter, local variable, or record
66/// component declaration.
67///
68/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-610) for details.
69#[derive(Debug)]
70pub struct FieldSignature<'a>(pub ReferenceType<'a>);
71
72/// A parse class signature; encodes type information about a
73/// (possibly generic) class or interface declaration. It describes
74/// any type parameters of the class or interface, and lists its
75/// (possibly parameterized) direct superclass and direct
76/// superinterfaces, if any. A type parameter is described by its
77/// name, followed by any class bound and interface bounds.
78///
79/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-410) for details.
80#[derive(Debug)]
81pub struct ClassSignature<'a> {
82 pub type_params: Vec<TypeParameter<'a>>,
83 pub super_class: ClassType<'a>,
84 pub super_ifaces: Vec<ClassType<'a>>,
85}
86
87/// A parsed method signature; encodes type information about a
88/// (possibly generic) method declaration. It describes any type
89/// parameters of the method; the (possibly parameterized) types of
90/// any formal parameters; the (possibly parameterized) return type,
91/// if any; and the types of any exceptions declared in the method's
92/// throws clause.
93///
94/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-510) for details.
95pub struct MethodSignature<'a> {
96 pub type_params: Vec<TypeParameter<'a>>,
97 pub parameters: Vec<JavaType<'a>>,
98 pub result: ResultType<'a>,
99 pub throws: Vec<ThrowsType<'a>>,
100}
101
102/// Represents a primitive java type.
103pub use internal::BaseType;
104
105/// Represents a primitive or reference type.
106#[derive(Debug)]
107pub enum JavaType<'a> {
108 Base(BaseType),
109 Reference(ReferenceType<'a>),
110}
111
112impl<'a> JavaType<'a> {
113 fn from_internal(s: &'a str, internal: internal::JavaType) -> Self {
114 match internal {
115 internal::JavaType::Base(b) => JavaType::Base(b),
116 internal::JavaType::Reference(r) => {
117 JavaType::Reference(ReferenceType::from_internal(s, r))
118 }
119 }
120 }
121}
122
123/// Represents type variables in argument position, e.g. as part of method parameters.
124#[derive(Debug)]
125pub enum TypeArgument<'a> {
126 /// *; `<?>`
127 Unbounded,
128 /// (empty); `<ReferenceType>`
129 Default(ReferenceType<'a>),
130 /// extends; `<? extends ReferenceType>`
131 Extends(ReferenceType<'a>),
132 /// super; `<? super ReferenceType>`
133 Super(ReferenceType<'a>),
134}
135
136/// Represents a simple (ie. not nested) and possibly type-parametrized class type.
137#[derive(Debug)]
138pub struct SimpleClassType<'a> {
139 pub name: &'a str,
140 pub type_args: Vec<TypeArgument<'a>>,
141}
142
143impl<'a> SimpleClassType<'a> {
144 fn from_internal(s: &'a str, internal: internal::SimpleClassType) -> Self {
145 Self {
146 name: internal.0.apply(s),
147 type_args: internal
148 .1
149 .into_iter()
150 .map(|ta| match ta {
151 internal::TypeArgument::Unbounded => TypeArgument::Unbounded,
152 internal::TypeArgument::Default(ty) => {
153 TypeArgument::Default(ReferenceType::from_internal(s, ty))
154 }
155 internal::TypeArgument::Extends(ty) => {
156 TypeArgument::Extends(ReferenceType::from_internal(s, ty))
157 }
158 internal::TypeArgument::Super(ty) => {
159 TypeArgument::Super(ReferenceType::from_internal(s, ty))
160 }
161 })
162 .collect(),
163 }
164 }
165}
166
167/// Represents (a possibly nested, and possible type-parameterized)
168/// class type.
169///
170/// `base + nesting` together denote the ultimate path of the
171/// described class. This is, the concatenation of `base` and
172/// `nesting` represents the fully qualified class name with generic
173/// type information interspresed where necessary. Put another way,
174/// the last element of this concatenation denotes the simple name of
175/// the described class.
176///
177/// `base` alone represents the top-level class, while `nesting`
178/// denotes the recursive nesting within it.
179#[derive(Debug)]
180pub struct ClassType<'a> {
181 pub base: SimpleClassType<'a>,
182 pub nested: Vec<SimpleClassType<'a>>,
183}
184
185impl<'a> ClassType<'a> {
186 fn from_internal(s: &'a str, internal: internal::ClassType) -> Self {
187 Self {
188 base: SimpleClassType::from_internal(s, internal.0),
189 nested: internal
190 .1
191 .into_iter()
192 .map(|ty| SimpleClassType::from_internal(s, ty))
193 .collect(),
194 }
195 }
196}
197
198/// Represents an array type.
199#[derive(Debug)]
200pub struct ArrayType<'a> {
201 /// The dimention of the array. Always greater zero, by definition.
202 pub dimension: usize,
203 /// The type of the elements in the array.
204 pub ty: JavaType<'a>,
205}
206
207/// Represents a reference type, ie. a class, an array, or a type variable.
208#[derive(Debug)]
209pub enum ReferenceType<'a> {
210 /// a class type
211 Class(ClassType<'a>),
212 /// a type variable
213 Variable(&'a str),
214 /// an array type
215 Array(Box<ArrayType<'a>>),
216}
217
218impl<'a> ReferenceType<'a> {
219 fn from_internal(s: &'a str, internal: internal::ReferenceType) -> Self {
220 match internal {
221 internal::ReferenceType::Class(ty) => {
222 ReferenceType::Class(ClassType::from_internal(s, ty))
223 }
224 internal::ReferenceType::Variable(r) => ReferenceType::Variable(r.apply(s)),
225 internal::ReferenceType::Array { dimension, ty } => {
226 ReferenceType::Array(Box::new(ArrayType {
227 dimension,
228 ty: JavaType::from_internal(s, *ty),
229 }))
230 }
231 }
232 }
233}
234
235/// Represents type variables in declaration position, e.g. as part of
236/// a class or method declaration which introduces variable types.
237///
238/// Examples of type parameters `X, Y, Z`:
239/// - `class Foo<X, Y, Z> {...}`
240/// - `<X, Y, Z> void foo(...) { ... }`
241#[derive(Debug)]
242pub struct TypeParameter<'a> {
243 pub name: &'a str,
244 pub class_bound: Option<ReferenceType<'a>>,
245 pub iface_bounds: Vec<ReferenceType<'a>>,
246}
247
248impl<'a> TypeParameter<'a> {
249 fn from_internal(s: &'a str, internal: internal::TypeParameter) -> Self {
250 Self {
251 name: internal.name.apply(s),
252 class_bound: internal
253 .class_bound
254 .map(|bound| ReferenceType::from_internal(s, bound)),
255 iface_bounds: internal
256 .iface_bounds
257 .into_iter()
258 .map(|bound| ReferenceType::from_internal(s, bound))
259 .collect(),
260 }
261 }
262}
263
264/// Represents the type in method return position.
265pub enum ResultType<'a> {
266 VoidType,
267 ValueType(JavaType<'a>),
268}
269
270impl<'a> ResultType<'a> {
271 fn from_internal(s: &'a str, internal: internal::ResultType) -> Self {
272 match internal {
273 internal::ResultType::VoidType => ResultType::VoidType,
274 internal::ResultType::ValueType(ty) => {
275 ResultType::ValueType(JavaType::from_internal(s, ty))
276 }
277 }
278 }
279}
280
281/// Represents (exception) types in method "throws" declaration position.
282pub enum ThrowsType<'a> {
283 ClassType(ClassType<'a>),
284 TypeVariable(&'a str),
285}
286
287impl<'a> ThrowsType<'a> {
288 fn from_internal(s: &'a str, internal: internal::ThrowsType) -> Self {
289 match internal {
290 internal::ThrowsType::ClassType(ty) => {
291 ThrowsType::ClassType(ClassType::from_internal(s, ty))
292 }
293 internal::ThrowsType::TypeVariable(name) => ThrowsType::TypeVariable(name.apply(s)),
294 }
295 }
296}
297
298// --------------------------------------------------------------------
299
300/// Error signaling a signature parse failure. The error references
301/// the originally parsed string providing convenience methods to
302/// inspect where the error occurred.
303#[derive(Debug)]
304pub struct ParseError<'a> {
305 signature: &'a str,
306 internal: internal::ParseError,
307}
308
309impl<'a> Display for ParseError<'a> {
310 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311 write!(f, "{}", self.internal.error)
312 }
313}
314
315impl<'a> ParseError<'a> {
316 fn new(signature: &'a str, internal: internal::ParseError) -> Self {
317 Self {
318 signature,
319 internal,
320 }
321 }
322
323 /// Retrieves the error position in the parsed signature string.
324 pub fn position(&self) -> usize {
325 self.internal.position
326 }
327
328 /// Retrieves the parser error messages revealing what went wrong.
329 ///
330 /// The `Display` implementation of `ParseError` prints exactly
331 /// this (and only this) string.
332 pub fn message(&self) -> &str {
333 &self.internal.error
334 }
335
336 /// Retrieves the (parse) context in which the error
337 /// occurred. Useful only for debugging the parser.
338 pub fn context(&self) -> &str {
339 self.internal.context
340 }
341
342 /// Retrieves the original, full signature string which failed to
343 /// parsed and led to this error.
344 pub fn signature(&self) -> &'a str {
345 self.signature
346 }
347
348 /// Returns the unconsumed portion of the parsed signature string
349 /// at which the error occurred.
350 pub fn unconsumed(&self) -> &'a str {
351 &self.signature[self.internal.position..]
352 }
353
354 /// Retrieves the successfully consumed portion of the parsed
355 /// signature string.
356 pub fn consumed(&self) -> &'a str {
357 &self.signature[..self.internal.position]
358 }
359}
360
361/// An alias for `std::result::Result<T, ParseError>`
362pub type Result<'a, T> = std::result::Result<T, ParseError<'a>>;
363
364// --------------------------------------------------------------------
365
366/// Attempts to parse the given string as a [field signature.](FieldSignature)
367pub fn parse_field_signature(s: &str) -> Result<'_, FieldSignature<'_>> {
368 internal::parse(
369 "FieldSignature",
370 internal::consume_reference_type_signature,
371 s,
372 str::char_indices,
373 )
374 .map(|ty| FieldSignature(ReferenceType::from_internal(s, ty)))
375 .map_err(|e| ParseError::new(s, e))
376}
377
378/// Convenience method to parse the given string as a [field
379/// signature](FieldSignature) returning `true` upon success, `false`
380/// otherwise.
381pub fn is_field_signature(s: &str) -> bool {
382 parse_field_signature(s).is_ok()
383}
384
385// --------------------------------------------------------------------
386
387/// Attempts to parse the given string as a [class signature.](ClassSignature)
388pub fn parse_class_signature(s: &str) -> Result<ClassSignature<'_>> {
389 internal::parse(
390 "ClassSignature",
391 internal::consume_class_signature,
392 s,
393 str::char_indices,
394 )
395 .map(|parsed| ClassSignature {
396 type_params: parsed
397 .type_params
398 .into_iter()
399 .map(|p| TypeParameter::from_internal(s, p))
400 .collect(),
401 super_class: ClassType::from_internal(s, parsed.super_class),
402 super_ifaces: parsed
403 .super_ifaces
404 .into_iter()
405 .map(|ty| ClassType::from_internal(s, ty))
406 .collect(),
407 })
408 .map_err(|e| ParseError::new(s, e))
409}
410
411/// Convenience method to parse the given string as a [class
412/// signature](ClassSignature) returning `true` upon success, `false`
413/// otherwise.
414pub fn is_class_signature(s: &str) -> bool {
415 parse_class_signature(s).is_ok()
416}
417
418/// Attempts to parse the given string as a [method signature.](MethodSignature)
419pub fn parse_method_signature(s: &str) -> Result<MethodSignature<'_>> {
420 internal::parse(
421 "MethodSignature",
422 internal::consume_method_signature,
423 s,
424 str::char_indices,
425 )
426 .map(|parsed| MethodSignature {
427 type_params: parsed
428 .type_params
429 .into_iter()
430 .map(|p| TypeParameter::from_internal(s, p))
431 .collect(),
432 parameters: parsed
433 .parameters
434 .into_iter()
435 .map(|p| JavaType::from_internal(s, p))
436 .collect(),
437 result: ResultType::from_internal(s, parsed.result),
438 throws: parsed
439 .throws
440 .into_iter()
441 .map(|ty| ThrowsType::from_internal(s, ty))
442 .collect(),
443 })
444 .map_err(|e| ParseError::new(s, e))
445}
446
447/// Convenience method to parse the given string as a
448/// [method signature](MethodSignature) returning `true`
449/// upon success, `false` otherwise.
450pub fn is_method_signature(s: &str) -> bool {
451 parse_method_signature(s).is_ok()
452}
453
454#[cfg(test)]
455mod tests {
456 use super::*;
457
458 #[test]
459 fn test_is_field_signature() {
460 assert!(!is_field_signature(""));
461 assert!(!is_field_signature(" "));
462 assert!(!is_field_signature(";"));
463 assert!(!is_field_signature("<TT;>"));
464 // ~ byte; base-type to be rejected; field signatures parse only reference-types
465 assert!(!is_field_signature("B"));
466 for s in &[
467 "TT;", // T
468 "[[TT;", // T[][]
469 ] {
470 assert!(!is_field_signature(&s[..s.len() - 1]));
471 assert!(!is_field_signature(&format!("{} ", s)));
472 assert!(
473 is_field_signature(s),
474 "expected valid signature (but failed): {}",
475 s
476 );
477 }
478 }
479
480 #[test]
481 fn test_is_class_signature() {
482 for s in &[
483 "Ljava/lang/Enum<Lcom/google/common/base/CaseFormat;>;", // com.google.common.base.CaseFormat (abstract enum)
484 "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;", // class Bar<T extends Serializable & Comparable<T>> {..}
485 "<K:Ljava/lang/Object;V:Ljava/lang/Object;>Ljava/lang/Object;", // com/sun/org/apache/xalan/internal/xsltc/compiler/util/MultiHashtable.class (openjdk21)
486 "Ljava/lang/Object;Ljava/util/Map<Ljava/lang/String;Ljava/util/List<Ljava/lang/String;>;>;", // jdk.httpserver/com/sun/net/httpserver/Headers class signature (openjdk16)
487 "<D:Ljava/lang/Object;N::Lcom/sun/tools/javac/util/GraphUtils$DottableNode<TD;TN;>;>Lcom/sun/tools/javac/util/GraphUtils$NodeVisitor<TD;TN;Ljava/lang/StringBuilder;>;", // jdk.compiler/classes/com/sun/tools/javac/util/GraphUtils$DotVisitor.class (openjdk21)
488 "<OP::Ljdk/incubator/vector/VectorOperators$Operator;T:Ljava/lang/Object;>Ljava/lang/Object;", // jdk.incubator.vector/classes/jdk/incubator/vector/VectorOperators$ImplCache.class (openjdk21)
489 "<K:Ljava/lang/Object;>Ljdk/internal/loader/AbstractClassLoaderValue<Ljdk/internal/loader/AbstractClassLoaderValue<TCLV;TV;>.Sub<TK;>;TV;>;", // java.base/classes/jdk/internal/loader/AbstractClassLoaderValue$Sub.class (openjdk21)
490 "Ljava/lang/invoke/ClassSpecializer<Ljava/lang/invoke/BoundMethodHandle;Ljava/lang/String;Ljava/lang/invoke/BoundMethodHandle$SpeciesData;>.Factory;", // java.base/classes/java/lang/invoke/BoundMethodHandle$Specializer$Factory.class (openjdk21)
491 ] {
492 {
493 let s = &s[..s.len() - 1];
494 assert!(!is_class_signature(s), "failed to reject: `{}`", s);
495 }
496 {
497 let s = &format!("{} ", s);
498 assert!(!is_class_signature(s), "failed to reject: `{}`", s);
499 }
500 if let Err(e) = parse_class_signature(s) {
501 panic!("failed to recognize `{s}` as class signature: {e}");
502 }
503 }
504 }
505
506 #[test]
507 fn test_is_method_signature() {
508 for s in &[
509 "()TE;", // CopyOnWriteArrayList$COWSubListIterator#E next()
510 "(TE;)V", // CopyOnWriteArrayList$COWSubListIterator#add(E);
511 "(Ljava/util/function/Consumer<-TE;>;)V", // CopyOnWriteArrayList$COWSubListIterator#forEachRemaining(java.util.function.Consumer<? super E>)
512 "<T:Ljava/lang/Object;>([TT;)[TT;", // ArrayList#void sort(java.util.Comparator<? super E>);
513 ] {
514 {
515 let s = &s[..s.len() - 1];
516 assert!(!is_method_signature(s), "failed to reject: `{}`", s);
517 }
518 {
519 let s = &format!("{} ", s);
520 assert!(!is_method_signature(s), "failed to reject: `{}`", s);
521 }
522 assert!(is_method_signature(s), "failed to recognize: `{}`", s);
523 }
524 }
525}