rust_icu_umsg/lib.rs
1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Locale-aware message formatting.
16//!
17//! Implementation of the text formatting code from the ICU4C
18//! [`umsg.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html) header.
19//! Skip to the section ["Example use"](#example-use) below if you want to see it in action.
20//!
21//! The library inherits all pattern and formatting specifics from the corresponding [ICU C++
22//! API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1MessageFormat.html).
23//!
24//! This is the support for [MessageFormat](http://userguide.icu-project.org/formatparse/messages)
25//! message formatting. The `MessageFormat` uses ICU data to format text properly based on the
26//! locale selected at formatter initialization. This includes formatting dates, times,
27//! currencies, and other text.
28//!
29//! > **Note:** The `MessageFormat` library does not handle loading the format patterns in the
30//! > appropriate language. This task is left to the application author.
31//!
32//! # Example use
33//!
34//! The example below shows how to format values into an English text. For more detail about
35//! formatting specifics see [message_format!].
36//!
37//! ```ignore
38//! use rust_icu_sys as sys;
39//! use rust_icu_common as common;
40//! use rust_icu_ustring as ustring;
41//! use rust_icu_uloc as uloc;
42//! use rust_icu_umsg::{self as umsg, message_format};
43//! # use rust_icu_ucal as ucal;
44//! # use std::convert::TryFrom;
45//! #
46//! # struct TzSave(String);
47//! # impl Drop for TzSave {
48//! # fn drop(&mut self) {
49//! # ucal::set_default_time_zone(&self.0);
50//! # }
51//! # }
52//!
53//! fn testfn() -> Result<(), common::Error> {
54//! # let _ = TzSave(ucal::get_default_time_zone()?);
55//! # ucal::set_default_time_zone("Europe/Amsterdam")?;
56//! let loc = uloc::ULoc::try_from("en-US-u-tz-uslax")?;
57//! let msg = ustring::UChar::try_from(
58//! r"Formatted double: {0,number,##.#},
59//! Formatted integer: {1,number,integer},
60//! Formatted string: {2},
61//! Date: {3,date,full}",
62//! )?;
63//!
64//! let fmt = umsg::UMessageFormat::try_from(&msg, &loc)?;
65//! let hello = ustring::UChar::try_from("Hello! Добар дан!")?;
66//! let result = umsg::message_format!(
67//! fmt,
68//! { 43.4 => Double },
69//! { 31337 => Integer },
70//! { hello => String },
71//! { 0.0 => Date },
72//! )?;
73//!
74//! assert_eq!(
75//! r"Formatted double: 43.4,
76//! Formatted integer: 31,337,
77//! Formatted string: Hello! Добар дан!,
78//! Date: Thursday, January 1, 1970",
79//! result
80//! );
81//! Ok(())
82//! }
83//! # fn main() -> Result<(), common::Error> {
84//! # testfn()
85//! # }
86//! ```
87
88use {
89 rust_icu_common as common, rust_icu_sys as sys, rust_icu_sys::*, rust_icu_uloc as uloc,
90 rust_icu_ustring as ustring, std::convert::TryFrom,
91};
92
93use sealed::Sealed;
94
95#[doc(hidden)]
96pub use {rust_icu_sys as __sys, rust_icu_ustring as __ustring, std as __std};
97
98/// The implementation of the ICU `UMessageFormat*`.
99///
100/// Use the [UMessageFormat::try_from] to create a message formatter for a given message pattern in
101/// the [Messageformat](http://userguide.icu-project.org/formatparse/messages) and a specified
102/// locale. Use the macro [message_format!] to actually format the arguments.
103///
104/// [UMessageFormat] supports very few methods when compared to the wealth of functions that one
105/// can see in
106/// [`umsg.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html). It is
107/// not clear that other functions available there offer significantly more functionality than is
108/// given here.
109///
110/// If, however, you find that the set of methods implemented at the moment are not adequate, feel
111/// free to provide a [pull request](https://github.com/google/rust_icu/pulls) implementing what
112/// you need.
113///
114/// Implements `UMessageFormat`.
115#[derive(Debug)]
116pub struct UMessageFormat {
117 rep: std::rc::Rc<Rep>,
118}
119
120// An internal representation of the message formatter, used to allow cloning.
121#[derive(Debug)]
122struct Rep {
123 rep: *mut sys::UMessageFormat,
124}
125
126impl Drop for Rep {
127 /// Drops the content of [sys::UMessageFormat] and releases its memory.
128 ///
129 /// Implements `umsg_close`.
130 fn drop(&mut self) {
131 unsafe {
132 versioned_function!(umsg_close)(self.rep);
133 }
134 }
135}
136
137impl Clone for UMessageFormat {
138 /// Implements `umsg_clone`.
139 fn clone(&self) -> Self {
140 // Note this is not OK if UMessageFormat ever grows mutable methods.
141 UMessageFormat {
142 rep: self.rep.clone(),
143 }
144 }
145}
146
147impl UMessageFormat {
148 /// Creates a new message formatter.
149 ///
150 /// A single message formatter is created per each pattern-locale combination. Mutable methods
151 /// from [`umsg`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html)
152 /// are not implemented, and for now requires that all formatting be separate.
153 ///
154 /// Implements `umsg_open`.
155 pub fn try_from(
156 pattern: &ustring::UChar,
157 locale: &uloc::ULoc,
158 ) -> Result<UMessageFormat, common::Error> {
159 let pstr = pattern.as_c_ptr();
160 let loc = locale.as_c_str();
161 let mut status = common::Error::OK_CODE;
162 let mut parse_status = common::NO_PARSE_ERROR;
163
164 let rep = unsafe {
165 assert!(common::Error::is_ok(status));
166 versioned_function!(umsg_open)(
167 pstr,
168 pattern.len() as i32,
169 loc.as_ptr(),
170 &mut parse_status,
171 &mut status,
172 )
173 };
174 common::Error::ok_or_warning(status)?;
175 common::parse_ok(parse_status)?;
176 Ok(UMessageFormat {
177 rep: std::rc::Rc::new(Rep { rep }),
178 })
179 }
180}
181
182/// Given a formatter, formats the passed arguments into the formatter's message.
183///
184/// The general usage pattern for the formatter is as follows, assuming that `formatter`
185/// is an appropriately initialized [UMessageFormat]:
186///
187/// ``` ignore
188/// use rust_icu_umsg as umsg;
189/// // let result = umsg::message_format!(
190/// // formatter, [{ value => <type_assertion> }, ...]);
191/// let result = umsg::message_format!(formatter, { 31337 => Double });
192/// ```
193///
194/// Each fragment `{ value => <type_assertion> }` represents a single positional parameter binding
195/// for the pattern in `formatter`. The first fragment corresponds to the positional parameter `0`
196/// (which, if an integer, would be referred to as `{0,number,integer}` in a MessageFormat
197/// pattern). Since the original C API that this rust library is generated for uses variadic
198/// functions for parameter passing, it is very important that the programmer matches the actual
199/// parameter types to the types that are expected in the pattern.
200///
201/// > **Note:** If the types of parameter bindings do not match the expectations in the pattern,
202/// > memory corruption may occur, so tread lightly here.
203///
204/// In general this is very brittle, and an API in a more modern lanugage, or a contemporary C++
205/// flavor would probably take a different route were the library to be written today. The rust
206/// binding tries to make the API use a bit more palatable by requiring that the programmer
207/// explicitly specifies a type for each of the parameters to be passed into the formatter.
208///
209/// The supported types are not those of a full rust system, but rather a very restricted subset
210/// of types that MessageFormat supports:
211///
212/// | Type | Rust Type | Notes |
213/// | ---- | --------- | ----------- |
214/// | Double | `f64` | Any numeric parameter not specifically designated as different type, is always a double. See section below on Doubles. |
215/// | String | [rust_icu_ustring::UChar] | |
216/// | Integer | `i32` | |
217/// | Date | [rust_icu_sys::UDate] (alias for `f64`) | Is used to format dates. Depending on the date format requested in the pattern used in [UMessageFormat], the end result of date formatting could be one of a wide variety of [date formats](http://userguide.icu-project.org/formatparse/datetime).|
218///
219/// ## Double as numeric parameter
220///
221/// According to the [ICU documentation for
222/// `umsg_format`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html#a90a4b5fe778754e5da52f7c2e5fd6048):
223///
224/// > for all numeric arguments double is assumed unless the type is explicitly
225/// > integer (long). All choice format arguments must be of type double.
226///
227/// ## Strings
228///
229/// We determined by code inspection that the string format must be `rust_icu_ustring::UChar`.
230///
231/// # Example use
232///
233/// ```
234/// use rust_icu_sys as sys;
235/// use rust_icu_common as common;
236/// use rust_icu_ustring as ustring;
237/// use rust_icu_uloc as uloc;
238/// use rust_icu_umsg::{self as umsg, message_format};
239/// # use rust_icu_ucal as ucal;
240/// # use std::convert::TryFrom;
241/// #
242/// # struct TzSave(String);
243/// # impl Drop for TzSave {
244/// # // Restore the system time zone upon exit.
245/// # fn drop(&mut self) {
246/// # ucal::set_default_time_zone(&self.0);
247/// # }
248/// # }
249///
250/// fn testfn() -> Result<(), common::Error> {
251/// # let _ = TzSave(ucal::get_default_time_zone()?);
252/// # ucal::set_default_time_zone("Europe/Amsterdam")?;
253/// let loc = uloc::ULoc::try_from("en-US")?;
254/// let msg = ustring::UChar::try_from(
255/// r"Formatted double: {0,number,##.#},
256/// Formatted integer: {1,number,integer},
257/// Formatted string: {2},
258/// Date: {3,date,full}",
259/// )?;
260///
261/// let fmt = umsg::UMessageFormat::try_from(&msg, &loc)?;
262/// let hello = ustring::UChar::try_from("Hello! Добар дан!")?;
263/// let result = umsg::message_format!(
264/// fmt,
265/// { 43.4 => Double },
266/// { 31337 => Integer },
267/// { hello => String },
268/// { 0.0 => Date },
269/// )?;
270///
271/// assert_eq!(
272/// r"Formatted double: 43.4,
273/// Formatted integer: 31,337,
274/// Formatted string: Hello! Добар дан!,
275/// Date: Thursday, January 1, 1970",
276/// result
277/// );
278/// Ok(())
279/// }
280/// # fn main() -> Result<(), common::Error> {
281/// # testfn()
282/// # }
283/// ```
284///
285/// Implements `umsg_format`.
286/// Implements `umsg_vformat`.
287#[macro_export]
288macro_rules! message_format {
289 ($dest:expr $(,)?) => {
290 $crate::__std::compile_error!("you should not format a message without parameters")
291 };
292 ($dest:expr, $( {$arg:expr => $t:ident} ),+ $(,)?) => {
293 unsafe {
294 $crate::format_args(&$dest, ($($crate::checkarg!($arg, $t),)*))
295 }
296 };
297}
298
299#[doc(hidden)]
300#[macro_export]
301macro_rules! checkarg {
302 ($e:expr, Double) => {{
303 let x: $crate::__std::primitive::f64 = $e;
304 x
305 }};
306 ($e:expr, String) => {{
307 let x: $crate::__ustring::UChar = $e;
308 x
309 }};
310 ($e:expr, Integer) => {{
311 let x: $crate::__std::primitive::i32 = $e;
312 x
313 }};
314 ($e:expr, Long) => {{
315 let x: $crate::__std::primitive::i64 = $e;
316 x
317 }};
318 ($e:expr, Date) => {{
319 let x: $crate::__sys::UDate = $e;
320 x
321 }};
322}
323
324#[doc(hidden)]
325pub unsafe fn format_args(
326 fmt: &UMessageFormat,
327 args: impl FormatArgs,
328) -> Result<String, common::Error> {
329 const CAP: usize = 1024;
330 let mut status = common::Error::OK_CODE;
331 let mut result = ustring::UChar::new_with_capacity(CAP);
332
333 let total_size =
334 args.format(fmt.rep.rep, result.as_mut_c_ptr(), CAP as i32, &mut status) as usize;
335 common::Error::ok_or_warning(status)?;
336
337 result.resize(total_size);
338
339 if total_size > CAP {
340 args.format(
341 fmt.rep.rep,
342 result.as_mut_c_ptr(),
343 total_size as i32,
344 &mut status,
345 );
346 common::Error::ok_or_warning(status)?;
347 }
348 String::try_from(&result)
349}
350
351mod sealed {
352 pub trait Sealed {}
353}
354
355/// Traits for types that can be passed to the umsg_format variadic function.
356#[doc(hidden)]
357pub trait FormatArg: Sealed {
358 type Raw;
359 fn to_raw(&self) -> Self::Raw;
360}
361
362impl Sealed for f64 {}
363impl FormatArg for f64 {
364 type Raw = f64;
365 fn to_raw(&self) -> Self::Raw {
366 *self
367 }
368}
369
370impl Sealed for ustring::UChar {}
371impl FormatArg for ustring::UChar {
372 type Raw = *const UChar;
373 fn to_raw(&self) -> Self::Raw {
374 self.as_c_ptr()
375 }
376}
377
378impl Sealed for i32 {}
379impl FormatArg for i32 {
380 type Raw = i32;
381 fn to_raw(&self) -> Self::Raw {
382 *self
383 }
384}
385
386impl Sealed for i64 {}
387impl FormatArg for i64 {
388 type Raw = i64;
389 fn to_raw(&self) -> Self::Raw {
390 *self
391 }
392}
393
394/// Trait for tuples of elements implementing `FormatArg`.
395#[doc(hidden)]
396pub trait FormatArgs: Sealed {
397 #[doc(hidden)]
398 unsafe fn format(
399 &self,
400 fmt: *const sys::UMessageFormat,
401 result: *mut UChar,
402 result_length: i32,
403 status: *mut UErrorCode,
404 ) -> i32;
405}
406
407macro_rules! impl_format_args_for_tuples {
408 ($(($($param:ident),*),)*) => {
409 $(
410 impl<$($param: FormatArg,)*> Sealed for ($($param,)*) {}
411 impl<$($param: FormatArg,)*> FormatArgs for ($($param,)*) {
412 unsafe fn format(
413 &self,
414 fmt: *const sys::UMessageFormat,
415 result: *mut UChar,
416 result_length: i32,
417 status: *mut UErrorCode,
418 ) -> i32 {
419 #[allow(non_snake_case)]
420 let ($($param,)*) = self;
421 $(
422 #[allow(non_snake_case)]
423 let $param = $crate::FormatArg::to_raw($param);
424 )*
425
426 versioned_function!(umsg_format)(
427 fmt,
428 result,
429 result_length,
430 status,
431 $($param,)*
432 )
433 }
434 }
435 )*
436 }
437}
438
439impl_format_args_for_tuples! {
440 (A),
441 (A, B),
442 (A, B, C),
443 (A, B, C, D),
444 (A, B, C, D, E),
445 (A, B, C, D, E, F),
446 (A, B, C, D, E, F, G),
447 (A, B, C, D, E, F, G, H),
448 (A, B, C, D, E, F, G, H, I),
449 (A, B, C, D, E, F, G, H, I, J),
450 (A, B, C, D, E, F, G, H, I, J, K),
451 (A, B, C, D, E, F, G, H, I, J, K, L),
452 (A, B, C, D, E, F, G, H, I, J, K, L, M),
453 (A, B, C, D, E, F, G, H, I, J, K, L, M, N),
454 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O),
455 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P),
456 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q),
457 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R),
458 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S),
459 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T),
460 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U),
461 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V),
462 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W),
463 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X),
464 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y),
465 (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z),
466}
467
468#[cfg(test)]
469mod tests {
470 use super::*;
471 use rust_icu_ucal as ucal;
472
473 struct TzSave(String);
474
475 impl Drop for TzSave {
476 // Restore the system time zone upon exit.
477 fn drop(&mut self) {
478 ucal::set_default_time_zone(&self.0).expect("timezone set success");
479 }
480 }
481
482 #[test]
483 fn tzsave() -> Result<(), common::Error> {
484 let _ = TzSave(ucal::get_default_time_zone()?);
485 ucal::set_default_time_zone("Europe/Amsterdam")?;
486 Ok(())
487 }
488
489 #[test]
490 fn basic() -> Result<(), common::Error> {
491 let _ = TzSave(ucal::get_default_time_zone()?);
492 ucal::set_default_time_zone("Europe/Amsterdam")?;
493
494 let loc = uloc::ULoc::try_from("en-US")?;
495 let msg = ustring::UChar::try_from(
496 r"Formatted double: {0,number,##.#},
497 Formatted integer: {1,number,integer},
498 Formatted string: {2},
499 Date: {3,date,full}",
500 )?;
501
502 let fmt = crate::UMessageFormat::try_from(&msg, &loc)?;
503 let hello = ustring::UChar::try_from("Hello! Добар дан!")?;
504 let value: i32 = 31337;
505 let result = message_format!(
506 fmt,
507 { 43.4 => Double },
508 { value => Integer },
509 { hello => String },
510 { 0.0 => Date }
511 )?;
512
513 assert_eq!(
514 r"Formatted double: 43.4,
515 Formatted integer: 31,337,
516 Formatted string: Hello! Добар дан!,
517 Date: Thursday, January 1, 1970",
518 result
519 );
520 Ok(())
521 }
522
523 #[test]
524 fn clone() -> Result<(), common::Error> {
525 let loc = uloc::ULoc::try_from("en-US-u-tz-uslax")?;
526 let msg = ustring::UChar::try_from(r"Formatted double: {0,number,##.#}")?;
527
528 let fmt = crate::UMessageFormat::try_from(&msg, &loc)?;
529 #[allow(clippy::redundant_clone)]
530 let result = message_format!(fmt.clone(), { 43.43 => Double })?;
531 assert_eq!(r"Formatted double: 43.4", result);
532 Ok(())
533 }
534}