java_signatures/
lib.rs

1//! Validates/Parses Java Type Signatures according to the syntax
2//! specified by the [JVM speicification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1).
3//!
4//! This crate does _not_ attempt to parse entire java classfiles.
5//! Instead, it focuses merely on parsing signature strings located as
6//! attributes in classfiles.  For parse classfiles themselves we do
7//! already have a number of very nice crates on
8//! [crates.io.](https://crates.io/search?q=%23classfile%20%23java)
9//! Only some of them do parse the signature string and provide a
10//! model over them.  This crate is supposed to supplement those
11//! classfile parsers which reveal the signatures as pure strings.
12//!
13//! The signature parsers provided by this create are strinct in the
14//! sense thaat the entirely input string must be matched according to
15//! the syntax rules. Leaing (whitespace) to trailing characters are
16//! not tolerated.
17//!
18//! Example:
19//! ```rust
20//! use java_signatures::{parse_class_signature, ReferenceType, ClassType, SimpleClassType};
21//!
22//! // ~ a signature corresponding to a `class Bar<T extends Serializable & Comparable<T>> {..}`
23//! // ~ to be obtained from a classfile using a corresponding parser, for example `cafebabe`
24//! let s = "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;";
25//! match parse_class_signature(s) {
26//!     Ok(parsed) => {
27//!         // ~ access to the individual parts of the signature
28//!         assert_eq!(1, parsed.type_params.len());
29//!         assert_eq!("T", parsed.type_params[0].name);
30//!         assert!(parsed.type_params[0].class_bound.is_none());
31//!         assert_eq!(2, parsed.type_params[0].iface_bounds.len());
32//!         assert!(matches!(
33//!             &parsed.type_params[0].iface_bounds[0],
34//!             ReferenceType::Class(ClassType {
35//!                 base: SimpleClassType {
36//!                     name: "java/io/Serializable",
37//!                     ..
38//!                 },
39//!                 ..
40//!             })
41//!         ));
42//!         // ...
43//!
44//!         // ~ the `Display` implementation of the parsed
45//!         // signature will produce the original signature
46//!         // string again
47//!         assert_eq!(s, format!("{parsed}"));
48//!     }
49//!     Err(e) => {
50//!         eprintln!("invalid class signature:");
51//!         eprintln!("> {}", e.signature());
52//!         eprintln!("> {}^-- {e}", " ".repeat(e.position()));
53//!     }
54//! }
55//! ```
56
57mod display;
58mod internal;
59
60// --------------------------------------------------------------------
61
62use std::fmt::Display;
63
64/// A parsed field signature; encodes the (possibly parameterized)
65/// type of a field, formal parameter, local variable, or record
66/// component declaration.
67///
68/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-610) for details.
69#[derive(Debug)]
70pub struct FieldSignature<'a>(pub ReferenceType<'a>);
71
72/// A parse class signature; encodes type information about a
73/// (possibly generic) class or interface declaration. It describes
74/// any type parameters of the class or interface, and lists its
75/// (possibly parameterized) direct superclass and direct
76/// superinterfaces, if any. A type parameter is described by its
77/// name, followed by any class bound and interface bounds.
78///
79/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-410) for details.
80#[derive(Debug)]
81pub struct ClassSignature<'a> {
82    pub type_params: Vec<TypeParameter<'a>>,
83    pub super_class: ClassType<'a>,
84    pub super_ifaces: Vec<ClassType<'a>>,
85}
86
87/// A parsed method signature; encodes type information about a
88/// (possibly generic) method declaration. It describes any type
89/// parameters of the method; the (possibly parameterized) types of
90/// any formal parameters; the (possibly parameterized) return type,
91/// if any; and the types of any exceptions declared in the method's
92/// throws clause.
93///
94/// See the [specification](https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.7.9.1-510) for details.
95pub struct MethodSignature<'a> {
96    pub type_params: Vec<TypeParameter<'a>>,
97    pub parameters: Vec<JavaType<'a>>,
98    pub result: ResultType<'a>,
99    pub throws: Vec<ThrowsType<'a>>,
100}
101
102/// Represents a primitive java type.
103pub use internal::BaseType;
104
105/// Represents a primitive or reference type.
106#[derive(Debug)]
107pub enum JavaType<'a> {
108    Base(BaseType),
109    Reference(ReferenceType<'a>),
110}
111
112impl<'a> JavaType<'a> {
113    fn from_internal(s: &'a str, internal: internal::JavaType) -> Self {
114        match internal {
115            internal::JavaType::Base(b) => JavaType::Base(b),
116            internal::JavaType::Reference(r) => {
117                JavaType::Reference(ReferenceType::from_internal(s, r))
118            }
119        }
120    }
121}
122
123/// Represents type variables in argument position, e.g. as part of method parameters.
124#[derive(Debug)]
125pub enum TypeArgument<'a> {
126    /// *; `<?>`
127    Unbounded,
128    /// (empty); `<ReferenceType>`
129    Default(ReferenceType<'a>),
130    /// extends; `<? extends ReferenceType>`
131    Extends(ReferenceType<'a>),
132    /// super; `<? super ReferenceType>`
133    Super(ReferenceType<'a>),
134}
135
136/// Represents a simple (ie. not nested) and possibly type-parametrized class type.
137#[derive(Debug)]
138pub struct SimpleClassType<'a> {
139    pub name: &'a str,
140    pub type_args: Vec<TypeArgument<'a>>,
141}
142
143impl<'a> SimpleClassType<'a> {
144    fn from_internal(s: &'a str, internal: internal::SimpleClassType) -> Self {
145        Self {
146            name: internal.0.apply(s),
147            type_args: internal
148                .1
149                .into_iter()
150                .map(|ta| match ta {
151                    internal::TypeArgument::Unbounded => TypeArgument::Unbounded,
152                    internal::TypeArgument::Default(ty) => {
153                        TypeArgument::Default(ReferenceType::from_internal(s, ty))
154                    }
155                    internal::TypeArgument::Extends(ty) => {
156                        TypeArgument::Extends(ReferenceType::from_internal(s, ty))
157                    }
158                    internal::TypeArgument::Super(ty) => {
159                        TypeArgument::Super(ReferenceType::from_internal(s, ty))
160                    }
161                })
162                .collect(),
163        }
164    }
165}
166
167/// Represents (a possibly nested, and possible type-parameterized)
168/// class type.
169///
170/// `base + nesting` together denote the ultimate path of the
171/// described class.  This is, the concatenation of `base` and
172/// `nesting` represents the fully qualified class name with generic
173/// type information interspresed where necessary.  Put another way,
174/// the last element of this concatenation denotes the simple name of
175/// the described class.
176///
177/// `base` alone represents the top-level class, while `nesting`
178/// denotes the recursive nesting within it.
179#[derive(Debug)]
180pub struct ClassType<'a> {
181    pub base: SimpleClassType<'a>,
182    pub nested: Vec<SimpleClassType<'a>>,
183}
184
185impl<'a> ClassType<'a> {
186    fn from_internal(s: &'a str, internal: internal::ClassType) -> Self {
187        Self {
188            base: SimpleClassType::from_internal(s, internal.0),
189            nested: internal
190                .1
191                .into_iter()
192                .map(|ty| SimpleClassType::from_internal(s, ty))
193                .collect(),
194        }
195    }
196}
197
198/// Represents an array type.
199#[derive(Debug)]
200pub struct ArrayType<'a> {
201    /// The dimention of the array. Always greater zero, by definition.
202    pub dimension: usize,
203    /// The type of the elements in the array.
204    pub ty: JavaType<'a>,
205}
206
207/// Represents a reference type, ie. a class, an array, or a type variable.
208#[derive(Debug)]
209pub enum ReferenceType<'a> {
210    /// a class type
211    Class(ClassType<'a>),
212    /// a type variable
213    Variable(&'a str),
214    /// an array type
215    Array(Box<ArrayType<'a>>),
216}
217
218impl<'a> ReferenceType<'a> {
219    fn from_internal(s: &'a str, internal: internal::ReferenceType) -> Self {
220        match internal {
221            internal::ReferenceType::Class(ty) => {
222                ReferenceType::Class(ClassType::from_internal(s, ty))
223            }
224            internal::ReferenceType::Variable(r) => ReferenceType::Variable(r.apply(s)),
225            internal::ReferenceType::Array { dimension, ty } => {
226                ReferenceType::Array(Box::new(ArrayType {
227                    dimension,
228                    ty: JavaType::from_internal(s, *ty),
229                }))
230            }
231        }
232    }
233}
234
235/// Represents type variables in declaration position, e.g. as part of
236/// a class or method declaration which introduces variable types.
237///
238/// Examples of type parameters `X, Y, Z`:
239/// - `class Foo<X, Y, Z> {...}`
240/// - `<X, Y, Z> void foo(...) { ... }`
241#[derive(Debug)]
242pub struct TypeParameter<'a> {
243    pub name: &'a str,
244    pub class_bound: Option<ReferenceType<'a>>,
245    pub iface_bounds: Vec<ReferenceType<'a>>,
246}
247
248impl<'a> TypeParameter<'a> {
249    fn from_internal(s: &'a str, internal: internal::TypeParameter) -> Self {
250        Self {
251            name: internal.name.apply(s),
252            class_bound: internal
253                .class_bound
254                .map(|bound| ReferenceType::from_internal(s, bound)),
255            iface_bounds: internal
256                .iface_bounds
257                .into_iter()
258                .map(|bound| ReferenceType::from_internal(s, bound))
259                .collect(),
260        }
261    }
262}
263
264/// Represents the type in method return position.
265pub enum ResultType<'a> {
266    VoidType,
267    ValueType(JavaType<'a>),
268}
269
270impl<'a> ResultType<'a> {
271    fn from_internal(s: &'a str, internal: internal::ResultType) -> Self {
272        match internal {
273            internal::ResultType::VoidType => ResultType::VoidType,
274            internal::ResultType::ValueType(ty) => {
275                ResultType::ValueType(JavaType::from_internal(s, ty))
276            }
277        }
278    }
279}
280
281/// Represents (exception) types in method "throws" declaration position.
282pub enum ThrowsType<'a> {
283    ClassType(ClassType<'a>),
284    TypeVariable(&'a str),
285}
286
287impl<'a> ThrowsType<'a> {
288    fn from_internal(s: &'a str, internal: internal::ThrowsType) -> Self {
289        match internal {
290            internal::ThrowsType::ClassType(ty) => {
291                ThrowsType::ClassType(ClassType::from_internal(s, ty))
292            }
293            internal::ThrowsType::TypeVariable(name) => ThrowsType::TypeVariable(name.apply(s)),
294        }
295    }
296}
297
298// --------------------------------------------------------------------
299
300/// Error signaling a signature parse failure. The error references
301/// the originally parsed string providing convenience methods to
302/// inspect where the error occurred.
303#[derive(Debug)]
304pub struct ParseError<'a> {
305    signature: &'a str,
306    internal: internal::ParseError,
307}
308
309impl<'a> Display for ParseError<'a> {
310    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
311        write!(f, "{}", self.internal.error)
312    }
313}
314
315impl<'a> ParseError<'a> {
316    fn new(signature: &'a str, internal: internal::ParseError) -> Self {
317        Self {
318            signature,
319            internal,
320        }
321    }
322
323    /// Retrieves the error position in the parsed signature string.
324    pub fn position(&self) -> usize {
325        self.internal.position
326    }
327
328    /// Retrieves the parser error messages revealing what went wrong.
329    ///
330    /// The `Display` implementation of `ParseError` prints exactly
331    /// this (and only this) string.
332    pub fn message(&self) -> &str {
333        &self.internal.error
334    }
335
336    /// Retrieves the (parse) context in which the error
337    /// occurred. Useful only for debugging the parser.
338    pub fn context(&self) -> &str {
339        self.internal.context
340    }
341
342    /// Retrieves the original, full signature string which failed to
343    /// parsed and led to this error.
344    pub fn signature(&self) -> &'a str {
345        self.signature
346    }
347
348    /// Returns the unconsumed portion of the parsed signature string
349    /// at which the error occurred.
350    pub fn unconsumed(&self) -> &'a str {
351        &self.signature[self.internal.position..]
352    }
353
354    /// Retrieves the successfully consumed portion of the parsed
355    /// signature string.
356    pub fn consumed(&self) -> &'a str {
357        &self.signature[..self.internal.position]
358    }
359}
360
361/// An alias for `std::result::Result<T, ParseError>`
362pub type Result<'a, T> = std::result::Result<T, ParseError<'a>>;
363
364// --------------------------------------------------------------------
365
366/// Attempts to parse the given string as a [field signature.](FieldSignature)
367pub fn parse_field_signature(s: &str) -> Result<'_, FieldSignature<'_>> {
368    internal::parse(
369        "FieldSignature",
370        internal::consume_reference_type_signature,
371        s,
372        str::char_indices,
373    )
374    .map(|ty| FieldSignature(ReferenceType::from_internal(s, ty)))
375    .map_err(|e| ParseError::new(s, e))
376}
377
378/// Convenience method to parse the given string as a [field
379/// signature](FieldSignature) returning `true` upon success, `false`
380/// otherwise.
381pub fn is_field_signature(s: &str) -> bool {
382    parse_field_signature(s).is_ok()
383}
384
385// --------------------------------------------------------------------
386
387/// Attempts to parse the given string as a [class signature.](ClassSignature)
388pub fn parse_class_signature(s: &str) -> Result<ClassSignature<'_>> {
389    internal::parse(
390        "ClassSignature",
391        internal::consume_class_signature,
392        s,
393        str::char_indices,
394    )
395    .map(|parsed| ClassSignature {
396        type_params: parsed
397            .type_params
398            .into_iter()
399            .map(|p| TypeParameter::from_internal(s, p))
400            .collect(),
401        super_class: ClassType::from_internal(s, parsed.super_class),
402        super_ifaces: parsed
403            .super_ifaces
404            .into_iter()
405            .map(|ty| ClassType::from_internal(s, ty))
406            .collect(),
407    })
408    .map_err(|e| ParseError::new(s, e))
409}
410
411/// Convenience method to parse the given string as a [class
412/// signature](ClassSignature) returning `true` upon success, `false`
413/// otherwise.
414pub fn is_class_signature(s: &str) -> bool {
415    parse_class_signature(s).is_ok()
416}
417
418/// Attempts to parse the given string as a [method signature.](MethodSignature)
419pub fn parse_method_signature(s: &str) -> Result<MethodSignature<'_>> {
420    internal::parse(
421        "MethodSignature",
422        internal::consume_method_signature,
423        s,
424        str::char_indices,
425    )
426    .map(|parsed| MethodSignature {
427        type_params: parsed
428            .type_params
429            .into_iter()
430            .map(|p| TypeParameter::from_internal(s, p))
431            .collect(),
432        parameters: parsed
433            .parameters
434            .into_iter()
435            .map(|p| JavaType::from_internal(s, p))
436            .collect(),
437        result: ResultType::from_internal(s, parsed.result),
438        throws: parsed
439            .throws
440            .into_iter()
441            .map(|ty| ThrowsType::from_internal(s, ty))
442            .collect(),
443    })
444    .map_err(|e| ParseError::new(s, e))
445}
446
447/// Convenience method to parse the given string as a
448/// [method signature](MethodSignature) returning `true`
449/// upon success, `false` otherwise.
450pub fn is_method_signature(s: &str) -> bool {
451    parse_method_signature(s).is_ok()
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[test]
459    fn test_is_field_signature() {
460        assert!(!is_field_signature(""));
461        assert!(!is_field_signature(" "));
462        assert!(!is_field_signature(";"));
463        assert!(!is_field_signature("<TT;>"));
464        // ~ byte; base-type to be rejected; field signatures parse only reference-types
465        assert!(!is_field_signature("B"));
466        for s in &[
467            "TT;",   // T
468            "[[TT;", // T[][]
469        ] {
470            assert!(!is_field_signature(&s[..s.len() - 1]));
471            assert!(!is_field_signature(&format!("{} ", s)));
472            assert!(
473                is_field_signature(s),
474                "expected valid signature (but failed): {}",
475                s
476            );
477        }
478    }
479
480    #[test]
481    fn test_is_class_signature() {
482        for s in &[
483            "Ljava/lang/Enum<Lcom/google/common/base/CaseFormat;>;", // com.google.common.base.CaseFormat (abstract enum)
484            "<T::Ljava/io/Serializable;:Ljava/lang/Comparable<TT;>;>Ljava/lang/Object;", // class Bar<T extends Serializable & Comparable<T>> {..}
485            "<K:Ljava/lang/Object;V:Ljava/lang/Object;>Ljava/lang/Object;", // com/sun/org/apache/xalan/internal/xsltc/compiler/util/MultiHashtable.class (openjdk21)
486            "Ljava/lang/Object;Ljava/util/Map<Ljava/lang/String;Ljava/util/List<Ljava/lang/String;>;>;", // jdk.httpserver/com/sun/net/httpserver/Headers class signature (openjdk16)
487            "<D:Ljava/lang/Object;N::Lcom/sun/tools/javac/util/GraphUtils$DottableNode<TD;TN;>;>Lcom/sun/tools/javac/util/GraphUtils$NodeVisitor<TD;TN;Ljava/lang/StringBuilder;>;", // jdk.compiler/classes/com/sun/tools/javac/util/GraphUtils$DotVisitor.class (openjdk21)
488            "<OP::Ljdk/incubator/vector/VectorOperators$Operator;T:Ljava/lang/Object;>Ljava/lang/Object;", // jdk.incubator.vector/classes/jdk/incubator/vector/VectorOperators$ImplCache.class (openjdk21)
489            "<K:Ljava/lang/Object;>Ljdk/internal/loader/AbstractClassLoaderValue<Ljdk/internal/loader/AbstractClassLoaderValue<TCLV;TV;>.Sub<TK;>;TV;>;", // java.base/classes/jdk/internal/loader/AbstractClassLoaderValue$Sub.class (openjdk21)
490            "Ljava/lang/invoke/ClassSpecializer<Ljava/lang/invoke/BoundMethodHandle;Ljava/lang/String;Ljava/lang/invoke/BoundMethodHandle$SpeciesData;>.Factory;", // java.base/classes/java/lang/invoke/BoundMethodHandle$Specializer$Factory.class (openjdk21)
491        ] {
492            {
493                let s = &s[..s.len() - 1];
494                assert!(!is_class_signature(s), "failed to reject: `{}`", s);
495            }
496            {
497                let s = &format!("{} ", s);
498                assert!(!is_class_signature(s), "failed to reject: `{}`", s);
499            }
500            if let Err(e) = parse_class_signature(s) {
501                panic!("failed to recognize `{s}` as class signature: {e}");
502            }
503        }
504    }
505
506    #[test]
507    fn test_is_method_signature() {
508        for s in &[
509            "()TE;",                                  // CopyOnWriteArrayList$COWSubListIterator#E next()
510            "(TE;)V", // CopyOnWriteArrayList$COWSubListIterator#add(E);
511            "(Ljava/util/function/Consumer<-TE;>;)V", // CopyOnWriteArrayList$COWSubListIterator#forEachRemaining(java.util.function.Consumer<? super E>)
512            "<T:Ljava/lang/Object;>([TT;)[TT;", // ArrayList#void sort(java.util.Comparator<? super E>);
513        ] {
514            {
515                let s = &s[..s.len() - 1];
516                assert!(!is_method_signature(s), "failed to reject: `{}`", s);
517            }
518            {
519                let s = &format!("{} ", s);
520                assert!(!is_method_signature(s), "failed to reject: `{}`", s);
521            }
522            assert!(is_method_signature(s), "failed to recognize: `{}`", s);
523        }
524    }
525}