java_signatures/
lib.rs

1//! Validates/Parses Java Type Signatures according to the syntax
2//! specified by the [JVM speicification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1).
3//!
4//! This crate does _not_ attempt to parse entire java classfiles.
5//! Instead, it focuses merely on parsing signature strings located as
6//! attributes in classfiles.  For parse classfiles themselves we do
7//! already have a number of very nice crates on
8//! [crates.io.](https://crates.io/search?q=%23classfile%20%23java)
9//! Only some of them do parse the signature string and provide a
10//! model over them.  This crate is supposed to supplement those
11//! classfile parsers which reveal the signatures as pure strings.
12//!
13//! The signature parsers provided by this create are strinct in the
14//! sense thaat the entirely input string must be matched according to
15//! the syntax rules. Leaing (whitespace) to trailing characters are
16//! not tolerated.
17//!
18//! Example:
19//! ```rust
20//! // ~ a signature corresponding to a `class Bar<T extends Serializable & Comparable<T>> {..}`
21//! // ~ to be obtained from a classfile using a corresponding parser, for example `cafebabe`
22//! let s = "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;";
23//! match parse_class_signature(s) {
24//!     Ok(parsed) => {
25//!         // ~ access to the individual parts of the signature
26//!         assert_eq!(1, parsed.type_params.len());
27//!         assert_eq!("T", parsed.type_params[0].name);
28//!         assert!(parsed.type_params[0].class_bound.is_none());
29//!         assert_eq!(2, parsed.type_params[0].iface_bounds.len());
30//!         assert!(matches!(
31//!             &parsed.type_params[0].iface_bounds[0],
32//!             ReferenceType::Class(ClassType {
33//!                 base: SimpleClassType {
34//!                     name: "java/io/Serializable",
35//!                     ..
36//!                 },
37//!                 ..
38//!             })
39//!         ));
40//!         // ...
41//!
42//!         // ~ the `Display` implementation of the parsed
43//!         // signature will produce the original signature
44//!         // string again
45//!         assert_eq!(s, format!("{parsed}"));
46//!     }
47//!     Err(e) => {
48//!         eprintln!("invalid class signature:");
49//!         eprintln!("> {}", e.signature());
50//!         eprintln!("> {}^-- {e}", " ".repeat(e.position()));
51//!     }
52//! }
53//! ```
54
55mod display;
56mod internal;
57
58// --------------------------------------------------------------------
59
60use std::fmt::Display;
61
62/// A parsed field signature; encodes the (possibly parameterized)
63/// type of a field, formal parameter, local variable, or record
64/// component declaration.
65///
66/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-610) for details.
67#[derive(Debug)]
68pub struct FieldSignature<'a>(pub ReferenceType<'a>);
69
70/// A parse class signature; encodes type information about a
71/// (possibly generic) class or interface declaration. It describes
72/// any type parameters of the class or interface, and lists its
73/// (possibly parameterized) direct superclass and direct
74/// superinterfaces, if any. A type parameter is described by its
75/// name, followed by any class bound and interface bounds.
76///
77/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-410) for details.
78#[derive(Debug)]
79pub struct ClassSignature<'a> {
80    pub type_params: Vec<TypeParameter<'a>>,
81    pub super_class: ClassType<'a>,
82    pub super_ifaces: Vec<ClassType<'a>>,
83}
84
85/// A parsed method signature; encodes type information about a
86/// (possibly generic) method declaration. It describes any type
87/// parameters of the method; the (possibly parameterized) types of
88/// any formal parameters; the (possibly parameterized) return type,
89/// if any; and the types of any exceptions declared in the method's
90/// throws clause.
91///
92/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-510) for details.
93pub struct MethodSignature<'a> {
94    pub type_params: Vec<TypeParameter<'a>>,
95    pub parameters: Vec<JavaType<'a>>,
96    pub result: ResultType<'a>,
97    pub throws: Vec<ThrowsType<'a>>,
98}
99
100/// Represents a primitive java type.
101pub use internal::BaseType;
102
103/// Represents a primitive or reference type.
104#[derive(Debug)]
105pub enum JavaType<'a> {
106    Base(BaseType),
107    Reference(ReferenceType<'a>),
108}
109
110impl<'a> JavaType<'a> {
111    fn from_internal(s: &'a str, internal: internal::JavaType) -> Self {
112        match internal {
113            internal::JavaType::Base(b) => JavaType::Base(b),
114            internal::JavaType::Reference(r) => {
115                JavaType::Reference(ReferenceType::from_internal(s, r))
116            }
117        }
118    }
119}
120
121/// Represents type variables in argument position, e.g. as part of method parameters.
122#[derive(Debug)]
123pub enum TypeArgument<'a> {
124    /// *; `<?>`
125    Unbounded,
126    /// (empty); `<ReferenceType>`
127    Default(ReferenceType<'a>),
128    /// extends; `<? extends ReferenceType>`
129    Extends(ReferenceType<'a>),
130    /// super; `<? super ReferenceType>`
131    Super(ReferenceType<'a>),
132}
133
134/// Represents a simple (ie. not nested) and possibly type-parametrized class type.
135#[derive(Debug)]
136pub struct SimpleClassType<'a> {
137    pub name: &'a str,
138    pub type_args: Vec<TypeArgument<'a>>,
139}
140
141impl<'a> SimpleClassType<'a> {
142    fn from_internal(s: &'a str, internal: internal::SimpleClassType) -> Self {
143        Self {
144            name: internal.0.apply(s),
145            type_args: internal
146                .1
147                .into_iter()
148                .map(|ta| match ta {
149                    internal::TypeArgument::Unbounded => TypeArgument::Unbounded,
150                    internal::TypeArgument::Default(ty) => {
151                        TypeArgument::Default(ReferenceType::from_internal(s, ty))
152                    }
153                    internal::TypeArgument::Extends(ty) => {
154                        TypeArgument::Extends(ReferenceType::from_internal(s, ty))
155                    }
156                    internal::TypeArgument::Super(ty) => {
157                        TypeArgument::Super(ReferenceType::from_internal(s, ty))
158                    }
159                })
160                .collect(),
161        }
162    }
163}
164
165/// Represents (a possibly nested, and possible type-parameterized)
166/// class type.
167///
168/// `base + nesting` together denote the ultimate path of the
169/// described class.  This is, the concatenation of `base` and
170/// `nesting` represents the fully qualified class name with generic
171/// type information interspresed where necessary.  Put another way,
172/// the last element of this concatenation denotes the simple name of
173/// the described class.
174///
175/// `base` alone represents the top-level class, while `nesting`
176/// denotes the recursive nesting within it.
177#[derive(Debug)]
178pub struct ClassType<'a> {
179    pub base: SimpleClassType<'a>,
180    pub nested: Vec<SimpleClassType<'a>>,
181}
182
183impl<'a> ClassType<'a> {
184    fn from_internal(s: &'a str, internal: internal::ClassType) -> Self {
185        Self {
186            base: SimpleClassType::from_internal(s, internal.0),
187            nested: internal
188                .1
189                .into_iter()
190                .map(|ty| SimpleClassType::from_internal(s, ty))
191                .collect(),
192        }
193    }
194}
195
196/// Represents an array type.
197#[derive(Debug)]
198pub struct ArrayType<'a> {
199    /// The dimention of the array. Always greater zero, by definition.
200    pub dimension: usize,
201    /// The type of the elements in the array.
202    pub ty: JavaType<'a>,
203}
204
205/// Represents a reference type, ie. a class, an array, or a type variable.
206#[derive(Debug)]
207pub enum ReferenceType<'a> {
208    /// a class type
209    Class(ClassType<'a>),
210    /// a type variable
211    Variable(&'a str),
212    /// an array type
213    Array(Box<ArrayType<'a>>),
214}
215
216impl<'a> ReferenceType<'a> {
217    fn from_internal(s: &'a str, internal: internal::ReferenceType) -> Self {
218        match internal {
219            internal::ReferenceType::Class(ty) => {
220                ReferenceType::Class(ClassType::from_internal(s, ty))
221            }
222            internal::ReferenceType::Variable(r) => ReferenceType::Variable(r.apply(s)),
223            internal::ReferenceType::Array { dimension, ty } => {
224                ReferenceType::Array(Box::new(ArrayType {
225                    dimension,
226                    ty: JavaType::from_internal(s, *ty),
227                }))
228            }
229        }
230    }
231}
232
233/// Represents type variables in declaration position, e.g. as part of
234/// a class or method declaration which introduces variable types.
235///
236/// Examples of type parameters `X, Y, Z`:
237/// - `class Foo<X, Y, Z> {...}`
238/// - `<X, Y, Z> void foo(...) { ... }`
239#[derive(Debug)]
240pub struct TypeParameter<'a> {
241    pub name: &'a str,
242    pub class_bound: Option<ReferenceType<'a>>,
243    pub iface_bounds: Vec<ReferenceType<'a>>,
244}
245
246impl<'a> TypeParameter<'a> {
247    fn from_internal(s: &'a str, internal: internal::TypeParameter) -> Self {
248        Self {
249            name: internal.name.apply(s),
250            class_bound: internal
251                .class_bound
252                .map(|bound| ReferenceType::from_internal(s, bound)),
253            iface_bounds: internal
254                .iface_bounds
255                .into_iter()
256                .map(|bound| ReferenceType::from_internal(s, bound))
257                .collect(),
258        }
259    }
260}
261
262/// Represents the type in method return position.
263pub enum ResultType<'a> {
264    VoidType,
265    ValueType(JavaType<'a>),
266}
267
268impl<'a> ResultType<'a> {
269    fn from_internal(s: &'a str, internal: internal::ResultType) -> Self {
270        match internal {
271            internal::ResultType::VoidType => ResultType::VoidType,
272            internal::ResultType::ValueType(ty) => {
273                ResultType::ValueType(JavaType::from_internal(s, ty))
274            }
275        }
276    }
277}
278
279/// Represents (exception) types in method "throws" declaration position.
280pub enum ThrowsType<'a> {
281    ClassType(ClassType<'a>),
282    TypeVariable(&'a str),
283}
284
285impl<'a> ThrowsType<'a> {
286    fn from_internal(s: &'a str, internal: internal::ThrowsType) -> Self {
287        match internal {
288            internal::ThrowsType::ClassType(ty) => {
289                ThrowsType::ClassType(ClassType::from_internal(s, ty))
290            }
291            internal::ThrowsType::TypeVariable(name) => ThrowsType::TypeVariable(name.apply(s)),
292        }
293    }
294}
295
296// --------------------------------------------------------------------
297
298/// Error signaling a signature parse failure. The error references
299/// the originally parsed string providing convenience methods to
300/// inspect where the error occurred.
301#[derive(Debug)]
302pub struct ParseError<'a> {
303    signature: &'a str,
304    internal: internal::ParseError,
305}
306
307impl<'a> Display for ParseError<'a> {
308    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309        write!(f, "{}", self.internal.error)
310    }
311}
312
313impl<'a> ParseError<'a> {
314    fn new(signature: &'a str, internal: internal::ParseError) -> Self {
315        Self {
316            signature,
317            internal,
318        }
319    }
320
321    /// Retrieves the error position in the parsed signature string.
322    pub fn position(&self) -> usize {
323        self.internal.position
324    }
325
326    /// Retrieves the parser error messages revealing what went wrong.
327    ///
328    /// The `Display` implementation of `ParseError` prints exactly
329    /// this (and only this) string.
330    pub fn message(&self) -> &str {
331        &self.internal.error
332    }
333
334    /// Retrieves the (parse) context in which the error
335    /// occurred. Useful only for debugging the parser.
336    pub fn context(&self) -> &str {
337        self.internal.context
338    }
339
340    /// Retrieves the original, full signature string which failed to
341    /// parsed and led to this error.
342    pub fn signature(&self) -> &'a str {
343        self.signature
344    }
345
346    /// Returns the unconsumed portion of the parsed signature string
347    /// at which the error occurred.
348    pub fn unconsumed(&self) -> &'a str {
349        &self.signature[self.internal.position..]
350    }
351
352    /// Retrieves the successfully consumed portion of the parsed
353    /// signature string.
354    pub fn consumed(&self) -> &'a str {
355        &self.signature[..self.internal.position]
356    }
357}
358
359/// An alias for `std::result::Result<T, ParseError>`
360pub type Result<'a, T> = std::result::Result<T, ParseError<'a>>;
361
362// --------------------------------------------------------------------
363
364/// Attempts to parse the given string as a [field signature.](FieldSignature)
365pub fn parse_field_signature(s: &str) -> Result<'_, FieldSignature<'_>> {
366    internal::parse(
367        "FieldSignature",
368        internal::consume_reference_type_signature,
369        s,
370        str::char_indices,
371    )
372    .map(|ty| FieldSignature(ReferenceType::from_internal(s, ty)))
373    .map_err(|e| ParseError::new(s, e))
374}
375
376/// Convenience method to parse the given string as a [field
377/// signature](FieldSignature) returning `true` upon success, `false`
378/// otherwise.
379pub fn is_field_signature(s: &str) -> bool {
380    parse_field_signature(s).is_ok()
381}
382
383// --------------------------------------------------------------------
384
385/// Attempts to parse the given string as a [class signature.](ClassSignature)
386pub fn parse_class_signature(s: &str) -> Result<ClassSignature<'_>> {
387    internal::parse(
388        "ClassSignature",
389        internal::consume_class_signature,
390        s,
391        str::char_indices,
392    )
393    .map(|parsed| ClassSignature {
394        type_params: parsed
395            .type_params
396            .into_iter()
397            .map(|p| TypeParameter::from_internal(s, p))
398            .collect(),
399        super_class: ClassType::from_internal(s, parsed.super_class),
400        super_ifaces: parsed
401            .super_ifaces
402            .into_iter()
403            .map(|ty| ClassType::from_internal(s, ty))
404            .collect(),
405    })
406    .map_err(|e| ParseError::new(s, e))
407}
408
409/// Convenience method to parse the given string as a [class
410/// signature](ClassSignature) returning `true` upon success, `false`
411/// otherwise.
412pub fn is_class_signature(s: &str) -> bool {
413    parse_class_signature(s).is_ok()
414}
415
416/// Attempts to parse the given string as a [method signature.](MethodSignature)
417pub fn parse_method_signature(s: &str) -> Result<MethodSignature<'_>> {
418    internal::parse(
419        "MethodSignature",
420        internal::consume_method_signature,
421        s,
422        str::char_indices,
423    )
424    .map(|parsed| MethodSignature {
425        type_params: parsed
426            .type_params
427            .into_iter()
428            .map(|p| TypeParameter::from_internal(s, p))
429            .collect(),
430        parameters: parsed
431            .parameters
432            .into_iter()
433            .map(|p| JavaType::from_internal(s, p))
434            .collect(),
435        result: ResultType::from_internal(s, parsed.result),
436        throws: parsed
437            .throws
438            .into_iter()
439            .map(|ty| ThrowsType::from_internal(s, ty))
440            .collect(),
441    })
442    .map_err(|e| ParseError::new(s, e))
443}
444
445/// Convenience method to parse the given string as a
446/// [method signature](MethodSignature) returning `true`
447/// upon success, `false` otherwise.
448pub fn is_method_signature(s: &str) -> bool {
449    parse_method_signature(s).is_ok()
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    #[test]
457    fn test_is_field_signature() {
458        assert!(!is_field_signature(""));
459        assert!(!is_field_signature(" "));
460        assert!(!is_field_signature(";"));
461        assert!(!is_field_signature("<TT;>"));
462        // ~ byte; base-type to be rejected; field signatures parse only reference-types
463        assert!(!is_field_signature("B"));
464        for s in &[
465            "TT;",   // T
466            "[[TT;", // T[][]
467        ] {
468            assert!(!is_field_signature(&s[..s.len() - 1]));
469            assert!(!is_field_signature(&format!("{} ", s)));
470            assert!(
471                is_field_signature(s),
472                "expected valid signature (but failed): {}",
473                s
474            );
475        }
476    }
477
478    #[test]
479    fn test_is_class_signature() {
480        for s in &[
481            "Ljava/lang/Enum<Lcom/google/common/base/CaseFormat;>;", // com.google.common.base.CaseFormat (abstract enum)
482            "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;", // class Bar<T extends Serializable & Comparable<T>> {..}
483            "<K:Ljava/lang/Object;V:Ljava/lang/Object;>Ljava/lang/Object;", // com/sun/org/apache/xalan/internal/xsltc/compiler/util/MultiHashtable.class (openjdk21)
484            "Ljava/lang/Object;Ljava/util/Map<Ljava/lang/String;Ljava/util/List<Ljava/lang/String;>;>;", // jdk.httpserver/com/sun/net/httpserver/Headers class signature (openjdk16)
485            "<D:Ljava/lang/Object;N::Lcom/sun/tools/javac/util/GraphUtils$DottableNode<TD;TN;>;>Lcom/sun/tools/javac/util/GraphUtils$NodeVisitor<TD;TN;Ljava/lang/StringBuilder;>;", // jdk.compiler/classes/com/sun/tools/javac/util/GraphUtils$DotVisitor.class (openjdk21)
486            "<OP::Ljdk/incubator/vector/VectorOperators$Operator;T:Ljava/lang/Object;>Ljava/lang/Object;", // jdk.incubator.vector/classes/jdk/incubator/vector/VectorOperators$ImplCache.class (openjdk21)
487            "<K:Ljava/lang/Object;>Ljdk/internal/loader/AbstractClassLoaderValue<Ljdk/internal/loader/AbstractClassLoaderValue<TCLV;TV;>.Sub<TK;>;TV;>;", // java.base/classes/jdk/internal/loader/AbstractClassLoaderValue$Sub.class (openjdk21)
488            "Ljava/lang/invoke/ClassSpecializer<Ljava/lang/invoke/BoundMethodHandle;Ljava/lang/String;Ljava/lang/invoke/BoundMethodHandle$SpeciesData;>.Factory;", // java.base/classes/java/lang/invoke/BoundMethodHandle$Specializer$Factory.class (openjdk21)
489        ] {
490            {
491                let s = &s[..s.len() - 1];
492                assert!(!is_class_signature(s), "failed to reject: `{}`", s);
493            }
494            {
495                let s = &format!("{} ", s);
496                assert!(!is_class_signature(s), "failed to reject: `{}`", s);
497            }
498            if let Err(e) = parse_class_signature(s) {
499                panic!("failed to recognize `{s}` as class signature: {e}");
500            }
501        }
502    }
503
504    #[test]
505    fn test_is_method_signature() {
506        for s in &[
507            "()TE;",                                  // CopyOnWriteArrayList$COWSubListIterator#E next()
508            "(TE;)V", // CopyOnWriteArrayList$COWSubListIterator#add(E);
509            "(Ljava/util/function/Consumer<-TE;>;)V", // CopyOnWriteArrayList$COWSubListIterator#forEachRemaining(java.util.function.Consumer<? super E>)
510            "<T:Ljava/lang/Object;>([TT;)[TT;", // ArrayList#void sort(java.util.Comparator<? super E>);
511        ] {
512            {
513                let s = &s[..s.len() - 1];
514                assert!(!is_method_signature(s), "failed to reject: `{}`", s);
515            }
516            {
517                let s = &format!("{} ", s);
518                assert!(!is_method_signature(s), "failed to reject: `{}`", s);
519            }
520            assert!(is_method_signature(s), "failed to recognize: `{}`", s);
521        }
522    }
523}