tokenizers/utils/
mod.rs

1pub(crate) mod cache;
2#[cfg(feature = "http")]
3pub(crate) mod from_pretrained;
4
5#[cfg(feature = "unstable_wasm")]
6mod fancy;
7#[cfg(feature = "unstable_wasm")]
8pub use fancy::SysRegex;
9#[cfg(not(feature = "unstable_wasm"))]
10mod onig;
11#[cfg(not(feature = "unstable_wasm"))]
12pub use crate::utils::onig::SysRegex;
13
14pub mod iter;
15pub mod padding;
16pub mod parallelism;
17pub(crate) mod progress;
18pub mod truncation;
19
20use serde::{Serialize, Serializer};
21use std::collections::{BTreeMap, HashMap};
22
23pub(crate) fn ordered_map<S, K, V>(
24    value: &HashMap<K, V>,
25    serializer: S,
26) -> std::result::Result<S::Ok, S::Error>
27where
28    S: Serializer,
29    K: Serialize + std::cmp::Ord,
30    V: Serialize,
31{
32    let ordered: BTreeMap<_, _> = value.iter().collect();
33    ordered.serialize(serializer)
34}
35
36macro_rules! impl_enum_from (
37    ($from_ty:ty, $enum:ty, $variant:ident) => {
38        impl From<$from_ty> for $enum {
39            fn from(from: $from_ty) -> Self {
40                <$enum>::$variant(from)
41            }
42        }
43    }
44);
45
46/// Implement `serde::{Serialize, Serializer}` with `#[serde(tag = "type")]` attribute for a given struct.
47/// Panic when a json string being deserilized misses field `type`.
48///
49/// # Examples
50///
51/// ```
52/// # #[macro_use] extern crate tokenizers;
53/// use serde::{Serialize, Deserialize};
54///
55/// fn main() {
56///    impl_serde_type!{
57///        #[derive(Debug)]
58///        struct Point {
59///            x: i32,
60///            #[serde(default = "default_y")]
61///            y: i32,
62///        }
63///    }
64///    fn default_y() -> i32 {
65///        5
66///    }
67///
68///    let point = Point { x: 1, y: 2 };
69///    let serialized_s = r#"{"type":"Point","x":1,"y":2}"#;
70///    assert_eq!(serde_json::to_string(&point).unwrap(), serialized_s);
71/// }
72/// ```
73///
74/// ```should_panic
75/// # #[macro_use] extern crate tokenizers;
76/// use serde::{Serialize, Deserialize};
77///
78/// fn main() {
79///    impl_serde_type!{
80///        #[derive(Debug)]
81///        struct Point1D {
82///            x: i32,
83///        }
84///    }
85///
86///    let serialized_s = r#"{"x":1}"#;
87///    let deserialized: Point1D = serde_json::from_str(serialized_s).unwrap();
88/// }
89/// ```
90///
91/// # Examples (unit structs)
92///
93/// ```
94/// # #[macro_use] extern crate tokenizers;
95/// use serde::{Serialize, Deserialize};
96///
97/// fn main() {
98///    impl_serde_type!{
99///        struct Unit;
100///    }
101///
102///    let unit = Unit;
103///    let serialized_s = r#"{"type":"Unit"}"#;
104///    assert_eq!(serde_json::to_string(&unit).unwrap(), serialized_s);
105/// }
106/// ```
107///
108/// ```should_panic
109/// # #[macro_use] extern crate tokenizers;
110/// use serde::{Serialize, Deserialize};
111///
112/// fn main() {
113///    impl_serde_type!{
114///        struct Unit;
115///    }
116///
117///    let serialized_s = r#"{"some_field":1}"#;
118///    let deserialized: Unit = serde_json::from_str(serialized_s).unwrap();
119/// }
120/// ```
121#[macro_export]
122macro_rules! impl_serde_type{
123    (
124     $(#[$meta:meta])*
125     $vis:vis struct $struct_name:ident {
126        $(
127        $(#[$field_meta:meta])*
128        $field_vis:vis $field_name:ident : $field_type:ty
129        ),*$(,)+
130    }
131    ) => {
132        paste::paste!{
133            $(#[$meta])*
134            #[derive(Serialize, Deserialize)]
135            #[serde(tag = "type", from = $struct_name "Deserializer")]
136            $vis struct $struct_name{
137                $(
138                    $(#[$field_meta])*
139                    $field_vis $field_name : $field_type,
140                )*
141            }
142
143            #[doc(hidden)]
144            $(#[$meta])*
145            #[derive(Deserialize)]
146            #[serde(tag = "type", remote = $struct_name "")]
147            struct [<$struct_name Def>]{
148                $(
149                    $(#[$field_meta])*
150                    $field_vis $field_name : $field_type,
151                )*
152            }
153
154            #[doc(hidden)]
155            #[derive(Deserialize)]
156            enum [<$struct_name Type>] {
157                $struct_name,
158            }
159
160            #[doc(hidden)]
161            #[derive(Deserialize)]
162            struct [<$struct_name Deserializer>] {
163                #[allow(dead_code)]
164                r#type: [<$struct_name Type>],
165                #[serde(flatten, with = $struct_name "Def")]
166                r#struct: $struct_name,
167            }
168
169            #[doc(hidden)]
170            impl std::convert::From<[<$struct_name Deserializer>]> for $struct_name {
171                fn from(v: [<$struct_name Deserializer>]) -> Self {
172                    v.r#struct
173                }
174            }
175        }
176    };
177    (
178     $(#[$meta:meta])*
179     $vis:vis struct $struct_name:ident;
180    ) => {
181        paste::paste!{
182            $(#[$meta])*
183            $vis struct $struct_name;
184
185            impl serde::Serialize for $struct_name {
186                fn serialize<S>(&self, serializer: S)  -> std::result::Result<S::Ok, S::Error> where
187                    S: serde::ser::Serializer {
188                    let helper = [<$struct_name Helper>]{r#type: [<$struct_name Type>]::$struct_name};
189                    helper.serialize(serializer)
190                }
191            }
192
193            impl<'de> serde::Deserialize<'de> for $struct_name {
194                fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
195                where
196                    D: serde::Deserializer<'de>,
197                {
198                    let _helper = [<$struct_name Helper>]::deserialize(deserializer)?;
199                    Ok($struct_name)
200                }
201            }
202
203            #[derive(serde::Serialize, serde::Deserialize)]
204            enum [<$struct_name Type>] {
205                $struct_name,
206            }
207
208            #[derive(serde::Serialize, serde::Deserialize)]
209            struct [<$struct_name Helper>] {
210                #[allow(dead_code)]
211                r#type: [<$struct_name Type>],
212            }
213        }
214    }
215}
216
217// Re-export macro_rules_attribute
218pub use macro_rules_attribute::macro_rules_attribute;