java_signatures/lib.rs
1//! Validates/Parses Java Type Signatures according to the syntax
2//! specified by the [JVM speicification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1).
3//!
4//! This crate does _not_ attempt to parse entire java classfiles.
5//! Instead, it focuses merely on parsing signature strings located as
6//! attributes in classfiles. For parse classfiles themselves we do
7//! already have a number of very nice crates on
8//! [crates.io.](https://crates.io/search?q=%23classfile%20%23java)
9//! Only some of them do parse the signature string and provide a
10//! model over them. This crate is supposed to supplement those
11//! classfile parsers which reveal the signatures as pure strings.
12//!
13//! The signature parsers provided by this create are strinct in the
14//! sense thaat the entirely input string must be matched according to
15//! the syntax rules. Leaing (whitespace) to trailing characters are
16//! not tolerated.
17//!
18//! Example:
19//! ```rust
20//! // ~ a signature corresponding to a `class Bar<T extends Serializable & Comparable<T>> {..}`
21//! // ~ to be obtained from a classfile using a corresponding parser, for example `cafebabe`
22//! let s = "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;";
23//! match parse_class_signature(s) {
24//! Ok(parsed) => {
25//! // ~ access to the individual parts of the signature
26//! assert_eq!(1, parsed.type_params.len());
27//! assert_eq!("T", parsed.type_params[0].name);
28//! assert!(parsed.type_params[0].class_bound.is_none());
29//! assert_eq!(2, parsed.type_params[0].iface_bounds.len());
30//! assert!(matches!(
31//! &parsed.type_params[0].iface_bounds[0],
32//! ReferenceType::Class(ClassType {
33//! base: SimpleClassType {
34//! name: "java/io/Serializable",
35//! ..
36//! },
37//! ..
38//! })
39//! ));
40//! // ...
41//!
42//! // ~ the `Display` implementation of the parsed
43//! // signature will produce the original signature
44//! // string again
45//! assert_eq!(s, format!("{parsed}"));
46//! }
47//! Err(e) => {
48//! eprintln!("invalid class signature:");
49//! eprintln!("> {}", e.signature());
50//! eprintln!("> {}^-- {e}", " ".repeat(e.position()));
51//! }
52//! }
53//! ```
54
55mod display;
56mod internal;
57
58// --------------------------------------------------------------------
59
60use std::fmt::Display;
61
62/// A parsed field signature; encodes the (possibly parameterized)
63/// type of a field, formal parameter, local variable, or record
64/// component declaration.
65///
66/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-610) for details.
67#[derive(Debug)]
68pub struct FieldSignature<'a>(pub ReferenceType<'a>);
69
70/// A parse class signature; encodes type information about a
71/// (possibly generic) class or interface declaration. It describes
72/// any type parameters of the class or interface, and lists its
73/// (possibly parameterized) direct superclass and direct
74/// superinterfaces, if any. A type parameter is described by its
75/// name, followed by any class bound and interface bounds.
76///
77/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-410) for details.
78#[derive(Debug)]
79pub struct ClassSignature<'a> {
80 pub type_params: Vec<TypeParameter<'a>>,
81 pub super_class: ClassType<'a>,
82 pub super_ifaces: Vec<ClassType<'a>>,
83}
84
85/// A parsed method signature; encodes type information about a
86/// (possibly generic) method declaration. It describes any type
87/// parameters of the method; the (possibly parameterized) types of
88/// any formal parameters; the (possibly parameterized) return type,
89/// if any; and the types of any exceptions declared in the method's
90/// throws clause.
91///
92/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-510) for details.
93pub struct MethodSignature<'a> {
94 pub type_params: Vec<TypeParameter<'a>>,
95 pub parameters: Vec<JavaType<'a>>,
96 pub result: ResultType<'a>,
97 pub throws: Vec<ThrowsType<'a>>,
98}
99
100/// Represents a primitive java type.
101pub use internal::BaseType;
102
103/// Represents a primitive or reference type.
104#[derive(Debug)]
105pub enum JavaType<'a> {
106 Base(BaseType),
107 Reference(ReferenceType<'a>),
108}
109
110impl<'a> JavaType<'a> {
111 fn from_internal(s: &'a str, internal: internal::JavaType) -> Self {
112 match internal {
113 internal::JavaType::Base(b) => JavaType::Base(b),
114 internal::JavaType::Reference(r) => {
115 JavaType::Reference(ReferenceType::from_internal(s, r))
116 }
117 }
118 }
119}
120
121/// Represents type variables in argument position, e.g. as part of method parameters.
122#[derive(Debug)]
123pub enum TypeArgument<'a> {
124 /// *; `<?>`
125 Unbounded,
126 /// (empty); `<ReferenceType>`
127 Default(ReferenceType<'a>),
128 /// extends; `<? extends ReferenceType>`
129 Extends(ReferenceType<'a>),
130 /// super; `<? super ReferenceType>`
131 Super(ReferenceType<'a>),
132}
133
134/// Represents a simple (ie. not nested) and possibly type-parametrized class type.
135#[derive(Debug)]
136pub struct SimpleClassType<'a> {
137 pub name: &'a str,
138 pub type_args: Vec<TypeArgument<'a>>,
139}
140
141impl<'a> SimpleClassType<'a> {
142 fn from_internal(s: &'a str, internal: internal::SimpleClassType) -> Self {
143 Self {
144 name: internal.0.apply(s),
145 type_args: internal
146 .1
147 .into_iter()
148 .map(|ta| match ta {
149 internal::TypeArgument::Unbounded => TypeArgument::Unbounded,
150 internal::TypeArgument::Default(ty) => {
151 TypeArgument::Default(ReferenceType::from_internal(s, ty))
152 }
153 internal::TypeArgument::Extends(ty) => {
154 TypeArgument::Extends(ReferenceType::from_internal(s, ty))
155 }
156 internal::TypeArgument::Super(ty) => {
157 TypeArgument::Super(ReferenceType::from_internal(s, ty))
158 }
159 })
160 .collect(),
161 }
162 }
163}
164
165/// Represents (a possibly nested, and possible type-parameterized)
166/// class type.
167///
168/// `base + nesting` together denote the ultimate path of the
169/// described class. This is, the concatenation of `base` and
170/// `nesting` represents the fully qualified class name with generic
171/// type information interspresed where necessary. Put another way,
172/// the last element of this concatenation denotes the simple name of
173/// the described class.
174///
175/// `base` alone represents the top-level class, while `nesting`
176/// denotes the recursive nesting within it.
177#[derive(Debug)]
178pub struct ClassType<'a> {
179 pub base: SimpleClassType<'a>,
180 pub nested: Vec<SimpleClassType<'a>>,
181}
182
183impl<'a> ClassType<'a> {
184 fn from_internal(s: &'a str, internal: internal::ClassType) -> Self {
185 Self {
186 base: SimpleClassType::from_internal(s, internal.0),
187 nested: internal
188 .1
189 .into_iter()
190 .map(|ty| SimpleClassType::from_internal(s, ty))
191 .collect(),
192 }
193 }
194}
195
196/// Represents an array type.
197#[derive(Debug)]
198pub struct ArrayType<'a> {
199 /// The dimention of the array. Always greater zero, by definition.
200 pub dimension: usize,
201 /// The type of the elements in the array.
202 pub ty: JavaType<'a>,
203}
204
205/// Represents a reference type, ie. a class, an array, or a type variable.
206#[derive(Debug)]
207pub enum ReferenceType<'a> {
208 /// a class type
209 Class(ClassType<'a>),
210 /// a type variable
211 Variable(&'a str),
212 /// an array type
213 Array(Box<ArrayType<'a>>),
214}
215
216impl<'a> ReferenceType<'a> {
217 fn from_internal(s: &'a str, internal: internal::ReferenceType) -> Self {
218 match internal {
219 internal::ReferenceType::Class(ty) => {
220 ReferenceType::Class(ClassType::from_internal(s, ty))
221 }
222 internal::ReferenceType::Variable(r) => ReferenceType::Variable(r.apply(s)),
223 internal::ReferenceType::Array { dimension, ty } => {
224 ReferenceType::Array(Box::new(ArrayType {
225 dimension,
226 ty: JavaType::from_internal(s, *ty),
227 }))
228 }
229 }
230 }
231}
232
233/// Represents type variables in declaration position, e.g. as part of
234/// a class or method declaration which introduces variable types.
235///
236/// Examples of type parameters `X, Y, Z`:
237/// - `class Foo<X, Y, Z> {...}`
238/// - `<X, Y, Z> void foo(...) { ... }`
239#[derive(Debug)]
240pub struct TypeParameter<'a> {
241 pub name: &'a str,
242 pub class_bound: Option<ReferenceType<'a>>,
243 pub iface_bounds: Vec<ReferenceType<'a>>,
244}
245
246impl<'a> TypeParameter<'a> {
247 fn from_internal(s: &'a str, internal: internal::TypeParameter) -> Self {
248 Self {
249 name: internal.name.apply(s),
250 class_bound: internal
251 .class_bound
252 .map(|bound| ReferenceType::from_internal(s, bound)),
253 iface_bounds: internal
254 .iface_bounds
255 .into_iter()
256 .map(|bound| ReferenceType::from_internal(s, bound))
257 .collect(),
258 }
259 }
260}
261
262/// Represents the type in method return position.
263pub enum ResultType<'a> {
264 VoidType,
265 ValueType(JavaType<'a>),
266}
267
268impl<'a> ResultType<'a> {
269 fn from_internal(s: &'a str, internal: internal::ResultType) -> Self {
270 match internal {
271 internal::ResultType::VoidType => ResultType::VoidType,
272 internal::ResultType::ValueType(ty) => {
273 ResultType::ValueType(JavaType::from_internal(s, ty))
274 }
275 }
276 }
277}
278
279/// Represents (exception) types in method "throws" declaration position.
280pub enum ThrowsType<'a> {
281 ClassType(ClassType<'a>),
282 TypeVariable(&'a str),
283}
284
285impl<'a> ThrowsType<'a> {
286 fn from_internal(s: &'a str, internal: internal::ThrowsType) -> Self {
287 match internal {
288 internal::ThrowsType::ClassType(ty) => {
289 ThrowsType::ClassType(ClassType::from_internal(s, ty))
290 }
291 internal::ThrowsType::TypeVariable(name) => ThrowsType::TypeVariable(name.apply(s)),
292 }
293 }
294}
295
296// --------------------------------------------------------------------
297
298/// Error signaling a signature parse failure. The error references
299/// the originally parsed string providing convenience methods to
300/// inspect where the error occurred.
301#[derive(Debug)]
302pub struct ParseError<'a> {
303 signature: &'a str,
304 internal: internal::ParseError,
305}
306
307impl<'a> Display for ParseError<'a> {
308 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309 write!(f, "{}", self.internal.error)
310 }
311}
312
313impl<'a> ParseError<'a> {
314 fn new(signature: &'a str, internal: internal::ParseError) -> Self {
315 Self {
316 signature,
317 internal,
318 }
319 }
320
321 /// Retrieves the error position in the parsed signature string.
322 pub fn position(&self) -> usize {
323 self.internal.position
324 }
325
326 /// Retrieves the parser error messages revealing what went wrong.
327 ///
328 /// The `Display` implementation of `ParseError` prints exactly
329 /// this (and only this) string.
330 pub fn message(&self) -> &str {
331 &self.internal.error
332 }
333
334 /// Retrieves the (parse) context in which the error
335 /// occurred. Useful only for debugging the parser.
336 pub fn context(&self) -> &str {
337 self.internal.context
338 }
339
340 /// Retrieves the original, full signature string which failed to
341 /// parsed and led to this error.
342 pub fn signature(&self) -> &'a str {
343 self.signature
344 }
345
346 /// Returns the unconsumed portion of the parsed signature string
347 /// at which the error occurred.
348 pub fn unconsumed(&self) -> &'a str {
349 &self.signature[self.internal.position..]
350 }
351
352 /// Retrieves the successfully consumed portion of the parsed
353 /// signature string.
354 pub fn consumed(&self) -> &'a str {
355 &self.signature[..self.internal.position]
356 }
357}
358
359/// An alias for `std::result::Result<T, ParseError>`
360pub type Result<'a, T> = std::result::Result<T, ParseError<'a>>;
361
362// --------------------------------------------------------------------
363
364/// Attempts to parse the given string as a [field signature.](FieldSignature)
365pub fn parse_field_signature(s: &str) -> Result<'_, FieldSignature<'_>> {
366 internal::parse(
367 "FieldSignature",
368 internal::consume_reference_type_signature,
369 s,
370 str::char_indices,
371 )
372 .map(|ty| FieldSignature(ReferenceType::from_internal(s, ty)))
373 .map_err(|e| ParseError::new(s, e))
374}
375
376/// Convenience method to parse the given string as a [field
377/// signature](FieldSignature) returning `true` upon success, `false`
378/// otherwise.
379pub fn is_field_signature(s: &str) -> bool {
380 parse_field_signature(s).is_ok()
381}
382
383// --------------------------------------------------------------------
384
385/// Attempts to parse the given string as a [class signature.](ClassSignature)
386pub fn parse_class_signature(s: &str) -> Result<ClassSignature<'_>> {
387 internal::parse(
388 "ClassSignature",
389 internal::consume_class_signature,
390 s,
391 str::char_indices,
392 )
393 .map(|parsed| ClassSignature {
394 type_params: parsed
395 .type_params
396 .into_iter()
397 .map(|p| TypeParameter::from_internal(s, p))
398 .collect(),
399 super_class: ClassType::from_internal(s, parsed.super_class),
400 super_ifaces: parsed
401 .super_ifaces
402 .into_iter()
403 .map(|ty| ClassType::from_internal(s, ty))
404 .collect(),
405 })
406 .map_err(|e| ParseError::new(s, e))
407}
408
409/// Convenience method to parse the given string as a [class
410/// signature](ClassSignature) returning `true` upon success, `false`
411/// otherwise.
412pub fn is_class_signature(s: &str) -> bool {
413 parse_class_signature(s).is_ok()
414}
415
416/// Attempts to parse the given string as a [method signature.](MethodSignature)
417pub fn parse_method_signature(s: &str) -> Result<MethodSignature<'_>> {
418 internal::parse(
419 "MethodSignature",
420 internal::consume_method_signature,
421 s,
422 str::char_indices,
423 )
424 .map(|parsed| MethodSignature {
425 type_params: parsed
426 .type_params
427 .into_iter()
428 .map(|p| TypeParameter::from_internal(s, p))
429 .collect(),
430 parameters: parsed
431 .parameters
432 .into_iter()
433 .map(|p| JavaType::from_internal(s, p))
434 .collect(),
435 result: ResultType::from_internal(s, parsed.result),
436 throws: parsed
437 .throws
438 .into_iter()
439 .map(|ty| ThrowsType::from_internal(s, ty))
440 .collect(),
441 })
442 .map_err(|e| ParseError::new(s, e))
443}
444
445/// Convenience method to parse the given string as a
446/// [method signature](MethodSignature) returning `true`
447/// upon success, `false` otherwise.
448pub fn is_method_signature(s: &str) -> bool {
449 parse_method_signature(s).is_ok()
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455
456 #[test]
457 fn test_is_field_signature() {
458 assert!(!is_field_signature(""));
459 assert!(!is_field_signature(" "));
460 assert!(!is_field_signature(";"));
461 assert!(!is_field_signature("<TT;>"));
462 // ~ byte; base-type to be rejected; field signatures parse only reference-types
463 assert!(!is_field_signature("B"));
464 for s in &[
465 "TT;", // T
466 "[[TT;", // T[][]
467 ] {
468 assert!(!is_field_signature(&s[..s.len() - 1]));
469 assert!(!is_field_signature(&format!("{} ", s)));
470 assert!(
471 is_field_signature(s),
472 "expected valid signature (but failed): {}",
473 s
474 );
475 }
476 }
477
478 #[test]
479 fn test_is_class_signature() {
480 for s in &[
481 "Ljava/lang/Enum<Lcom/google/common/base/CaseFormat;>;", // com.google.common.base.CaseFormat (abstract enum)
482 "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;", // class Bar<T extends Serializable & Comparable<T>> {..}
483 "<K:Ljava/lang/Object;V:Ljava/lang/Object;>Ljava/lang/Object;", // com/sun/org/apache/xalan/internal/xsltc/compiler/util/MultiHashtable.class (openjdk21)
484 "Ljava/lang/Object;Ljava/util/Map<Ljava/lang/String;Ljava/util/List<Ljava/lang/String;>;>;", // jdk.httpserver/com/sun/net/httpserver/Headers class signature (openjdk16)
485 "<D:Ljava/lang/Object;N::Lcom/sun/tools/javac/util/GraphUtils$DottableNode<TD;TN;>;>Lcom/sun/tools/javac/util/GraphUtils$NodeVisitor<TD;TN;Ljava/lang/StringBuilder;>;", // jdk.compiler/classes/com/sun/tools/javac/util/GraphUtils$DotVisitor.class (openjdk21)
486 "<OP::Ljdk/incubator/vector/VectorOperators$Operator;T:Ljava/lang/Object;>Ljava/lang/Object;", // jdk.incubator.vector/classes/jdk/incubator/vector/VectorOperators$ImplCache.class (openjdk21)
487 "<K:Ljava/lang/Object;>Ljdk/internal/loader/AbstractClassLoaderValue<Ljdk/internal/loader/AbstractClassLoaderValue<TCLV;TV;>.Sub<TK;>;TV;>;", // java.base/classes/jdk/internal/loader/AbstractClassLoaderValue$Sub.class (openjdk21)
488 "Ljava/lang/invoke/ClassSpecializer<Ljava/lang/invoke/BoundMethodHandle;Ljava/lang/String;Ljava/lang/invoke/BoundMethodHandle$SpeciesData;>.Factory;", // java.base/classes/java/lang/invoke/BoundMethodHandle$Specializer$Factory.class (openjdk21)
489 ] {
490 {
491 let s = &s[..s.len() - 1];
492 assert!(!is_class_signature(s), "failed to reject: `{}`", s);
493 }
494 {
495 let s = &format!("{} ", s);
496 assert!(!is_class_signature(s), "failed to reject: `{}`", s);
497 }
498 if let Err(e) = parse_class_signature(s) {
499 panic!("failed to recognize `{s}` as class signature: {e}");
500 }
501 }
502 }
503
504 #[test]
505 fn test_is_method_signature() {
506 for s in &[
507 "()TE;", // CopyOnWriteArrayList$COWSubListIterator#E next()
508 "(TE;)V", // CopyOnWriteArrayList$COWSubListIterator#add(E);
509 "(Ljava/util/function/Consumer<-TE;>;)V", // CopyOnWriteArrayList$COWSubListIterator#forEachRemaining(java.util.function.Consumer<? super E>)
510 "<T:Ljava/lang/Object;>([TT;)[TT;", // ArrayList#void sort(java.util.Comparator<? super E>);
511 ] {
512 {
513 let s = &s[..s.len() - 1];
514 assert!(!is_method_signature(s), "failed to reject: `{}`", s);
515 }
516 {
517 let s = &format!("{} ", s);
518 assert!(!is_method_signature(s), "failed to reject: `{}`", s);
519 }
520 assert!(is_method_signature(s), "failed to recognize: `{}`", s);
521 }
522 }
523}