Skip to main content

rust_icu_umsg/
lib.rs

1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # Locale-aware message formatting.
16//!
17//! Implementation of the text formatting code from the ICU4C
18//! [`umsg.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html) header.
19//! Skip to the section ["Example use"](#example-use) below if you want to see it in action.
20//!
21//! The library inherits all pattern and formatting specifics from the corresponding [ICU C++
22//! API](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1MessageFormat.html).
23//!
24//! This is the support for [MessageFormat](http://userguide.icu-project.org/formatparse/messages)
25//! message formatting.  The `MessageFormat` uses ICU data to format text properly based on the
26//! locale selected at formatter initialization.  This includes formatting dates, times,
27//! currencies, and other text.
28//!
29//! > **Note:** The `MessageFormat` library does not handle loading the format patterns in the
30//! > appropriate language.  This task is left to the application author.
31//!
32//! # Example use
33//!
34//! The example below shows how to format values into an English text.  For more detail about
35//! formatting specifics see [message_format!].
36//!
37//! ```ignore
38//! use rust_icu_sys as sys;
39//! use rust_icu_common as common;
40//! use rust_icu_ustring as ustring;
41//! use rust_icu_uloc as uloc;
42//! use rust_icu_umsg::{self as umsg, message_format};
43//! # use rust_icu_ucal as ucal;
44//! # use std::convert::TryFrom;
45//! #
46//! # struct TzSave(String);
47//! # impl Drop for TzSave {
48//! #    fn drop(&mut self) {
49//! #        ucal::set_default_time_zone(&self.0);
50//! #    }
51//! # }
52//!
53//! fn testfn() -> Result<(), common::Error> {
54//! #   let _ = TzSave(ucal::get_default_time_zone()?);
55//! #   ucal::set_default_time_zone("Europe/Amsterdam")?;
56//!     let loc = uloc::ULoc::try_from("en-US-u-tz-uslax")?;
57//!     let msg = ustring::UChar::try_from(
58//!       r"Formatted double: {0,number,##.#},
59//!         Formatted integer: {1,number,integer},
60//!         Formatted string: {2},
61//!         Date: {3,date,full}",
62//!     )?;
63//!
64//!     let fmt = umsg::UMessageFormat::try_from(&msg, &loc)?;
65//!     let hello = ustring::UChar::try_from("Hello! Добар дан!")?;
66//!     let result = umsg::message_format!(
67//!       fmt,
68//!       { 43.4 => Double },
69//!       { 31337 => Integer },
70//!       { hello => String },
71//!       { 0.0 => Date },
72//!     )?;
73//!
74//!     assert_eq!(
75//!       r"Formatted double: 43.4,
76//!         Formatted integer: 31,337,
77//!         Formatted string: Hello! Добар дан!,
78//!         Date: Thursday, January 1, 1970",
79//!       result
80//!     );
81//!     Ok(())
82//! }
83//! # fn main() -> Result<(), common::Error> {
84//! #   testfn()
85//! # }
86//! ```
87
88use {
89    rust_icu_common as common, rust_icu_sys as sys, rust_icu_sys::*, rust_icu_uloc as uloc,
90    rust_icu_ustring as ustring, std::convert::TryFrom,
91};
92
93use sealed::Sealed;
94
95#[doc(hidden)]
96pub use {rust_icu_sys as __sys, rust_icu_ustring as __ustring, std as __std};
97
98/// The implementation of the ICU `UMessageFormat*`.
99///
100/// Use the [UMessageFormat::try_from] to create a message formatter for a given message pattern in
101/// the [Messageformat](http://userguide.icu-project.org/formatparse/messages) and a specified
102/// locale.  Use the macro [message_format!] to actually format the arguments.
103///
104/// [UMessageFormat] supports very few methods when compared to the wealth of functions that one
105/// can see in
106/// [`umsg.h`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html).  It is
107/// not clear that other functions available there offer significantly more functionality than is
108/// given here.
109///
110/// If, however, you find that the set of methods implemented at the moment are not adequate, feel
111/// free to provide a [pull request](https://github.com/google/rust_icu/pulls) implementing what
112/// you need.
113///
114/// Implements `UMessageFormat`.
115#[derive(Debug)]
116pub struct UMessageFormat {
117    rep: std::rc::Rc<Rep>,
118}
119
120// An internal representation of the message formatter, used to allow cloning.
121#[derive(Debug)]
122struct Rep {
123    rep: *mut sys::UMessageFormat,
124}
125
126impl Drop for Rep {
127    /// Drops the content of [sys::UMessageFormat] and releases its memory.
128    ///
129    /// Implements `umsg_close`.
130    fn drop(&mut self) {
131        unsafe {
132            versioned_function!(umsg_close)(self.rep);
133        }
134    }
135}
136
137impl Clone for UMessageFormat {
138    /// Implements `umsg_clone`.
139    fn clone(&self) -> Self {
140        // Note this is not OK if UMessageFormat ever grows mutable methods.
141        UMessageFormat {
142            rep: self.rep.clone(),
143        }
144    }
145}
146
147impl UMessageFormat {
148    /// Creates a new message formatter.
149    ///
150    /// A single message formatter is created per each pattern-locale combination. Mutable methods
151    /// from [`umsg`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html)
152    /// are not implemented, and for now requires that all formatting be separate.
153    ///
154    /// Implements `umsg_open`.
155    pub fn try_from(
156        pattern: &ustring::UChar,
157        locale: &uloc::ULoc,
158    ) -> Result<UMessageFormat, common::Error> {
159        let pstr = pattern.as_c_ptr();
160        let loc = locale.as_c_str();
161        let mut status = common::Error::OK_CODE;
162        let mut parse_status = common::NO_PARSE_ERROR;
163
164        let rep = unsafe {
165            assert!(common::Error::is_ok(status));
166            versioned_function!(umsg_open)(
167                pstr,
168                pattern.len() as i32,
169                loc.as_ptr(),
170                &mut parse_status,
171                &mut status,
172            )
173        };
174        common::Error::ok_or_warning(status)?;
175        common::parse_ok(parse_status)?;
176        Ok(UMessageFormat {
177            rep: std::rc::Rc::new(Rep { rep }),
178        })
179    }
180}
181
182/// Given a formatter, formats the passed arguments into the formatter's message.
183///
184/// The general usage pattern for the formatter is as follows, assuming that `formatter`
185/// is an appropriately initialized [UMessageFormat]:
186///
187/// ``` ignore
188/// use rust_icu_umsg as umsg;
189/// // let result = umsg::message_format!(
190/// //     formatter, [{ value => <type_assertion> }, ...]);
191/// let result = umsg::message_format!(formatter, { 31337 => Double });
192/// ```
193///
194/// Each fragment `{ value => <type_assertion> }` represents a single positional parameter binding
195/// for the pattern in `formatter`.  The first fragment corresponds to the positional parameter `0`
196/// (which, if an integer, would be referred to as `{0,number,integer}` in a MessageFormat
197/// pattern).  Since the original C API that this rust library is generated for uses variadic
198/// functions for parameter passing, it is very important that the programmer matches the actual
199/// parameter types to the types that are expected in the pattern.
200///
201/// > **Note:** If the types of parameter bindings do not match the expectations in the pattern,
202/// > memory corruption may occur, so tread lightly here.
203///
204/// In general this is very brittle, and an API in a more modern lanugage, or a contemporary C++
205/// flavor would probably take a different route were the library to be written today.  The rust
206/// binding tries to make the API use a bit more palatable by requiring that the programmer
207/// explicitly specifies a type for each of the parameters to be passed into the formatter.
208///
209/// The supported types are not those of a full rust system, but rather a very restricted subset
210/// of types that MessageFormat supports:
211///
212/// | Type | Rust Type | Notes |
213/// | ---- | --------- | ----------- |
214/// | Double | `f64` | Any numeric parameter not specifically designated as different type, is always a double. See section below on Doubles. |
215/// | String | [rust_icu_ustring::UChar] | |
216/// | Integer | `i32` | |
217/// | Date | [rust_icu_sys::UDate] (alias for `f64`) | Is used to format dates.  Depending on the date format requested in the pattern used in [UMessageFormat], the end result of date formatting could be one of a wide variety of [date formats](http://userguide.icu-project.org/formatparse/datetime).|
218///
219/// ## Double as numeric parameter
220///
221/// According to the [ICU documentation for
222/// `umsg_format`](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/umsg_8h.html#a90a4b5fe778754e5da52f7c2e5fd6048):
223///
224/// > for all numeric arguments double is assumed unless the type is explicitly
225/// > integer (long).  All choice format arguments must be of type double.
226///
227/// ## Strings
228///
229/// We determined by code inspection that the string format must be `rust_icu_ustring::UChar`.
230///
231/// # Example use
232///
233/// ```
234/// use rust_icu_sys as sys;
235/// use rust_icu_common as common;
236/// use rust_icu_ustring as ustring;
237/// use rust_icu_uloc as uloc;
238/// use rust_icu_umsg::{self as umsg, message_format};
239/// # use rust_icu_ucal as ucal;
240/// # use std::convert::TryFrom;
241/// #
242/// # struct TzSave(String);
243/// # impl Drop for TzSave {
244/// #    // Restore the system time zone upon exit.
245/// #    fn drop(&mut self) {
246/// #        ucal::set_default_time_zone(&self.0);
247/// #    }
248/// # }
249///
250/// fn testfn() -> Result<(), common::Error> {
251/// # let _ = TzSave(ucal::get_default_time_zone()?);
252/// # ucal::set_default_time_zone("Europe/Amsterdam")?;
253///   let loc = uloc::ULoc::try_from("en-US")?;
254///   let msg = ustring::UChar::try_from(
255///     r"Formatted double: {0,number,##.#},
256///       Formatted integer: {1,number,integer},
257///       Formatted string: {2},
258///       Date: {3,date,full}",
259///   )?;
260///
261///   let fmt = umsg::UMessageFormat::try_from(&msg, &loc)?;
262///   let hello = ustring::UChar::try_from("Hello! Добар дан!")?;
263///   let result = umsg::message_format!(
264///     fmt,
265///     { 43.4 => Double },
266///     { 31337 => Integer },
267///     { hello => String },
268///     { 0.0 => Date },
269///   )?;
270///
271///   assert_eq!(
272///     r"Formatted double: 43.4,
273///       Formatted integer: 31,337,
274///       Formatted string: Hello! Добар дан!,
275///       Date: Thursday, January 1, 1970",
276///     result
277///   );
278/// Ok(())
279/// }
280/// # fn main() -> Result<(), common::Error> {
281/// #   testfn()
282/// # }
283/// ```
284///
285/// Implements `umsg_format`.
286/// Implements `umsg_vformat`.
287#[macro_export]
288macro_rules! message_format {
289    ($dest:expr $(,)?) => {
290        $crate::__std::compile_error!("you should not format a message without parameters")
291    };
292    ($dest:expr, $( {$arg:expr => $t:ident} ),+ $(,)?) => {
293        unsafe {
294            $crate::format_args(&$dest, ($($crate::checkarg!($arg, $t),)*))
295        }
296    };
297}
298
299#[doc(hidden)]
300#[macro_export]
301macro_rules! checkarg {
302    ($e:expr, Double) => {{
303        let x: $crate::__std::primitive::f64 = $e;
304        x
305    }};
306    ($e:expr, String) => {{
307        let x: $crate::__ustring::UChar = $e;
308        x
309    }};
310    ($e:expr, Integer) => {{
311        let x: $crate::__std::primitive::i32 = $e;
312        x
313    }};
314    ($e:expr, Long) => {{
315        let x: $crate::__std::primitive::i64 = $e;
316        x
317    }};
318    ($e:expr, Date) => {{
319        let x: $crate::__sys::UDate = $e;
320        x
321    }};
322}
323
324#[doc(hidden)]
325pub unsafe fn format_args(
326    fmt: &UMessageFormat,
327    args: impl FormatArgs,
328) -> Result<String, common::Error> {
329    const CAP: usize = 1024;
330    let mut status = common::Error::OK_CODE;
331    let mut result = ustring::UChar::new_with_capacity(CAP);
332
333    let total_size =
334        args.format(fmt.rep.rep, result.as_mut_c_ptr(), CAP as i32, &mut status) as usize;
335    common::Error::ok_or_warning(status)?;
336
337    result.resize(total_size);
338
339    if total_size > CAP {
340        args.format(
341            fmt.rep.rep,
342            result.as_mut_c_ptr(),
343            total_size as i32,
344            &mut status,
345        );
346        common::Error::ok_or_warning(status)?;
347    }
348    String::try_from(&result)
349}
350
351mod sealed {
352    pub trait Sealed {}
353}
354
355/// Traits for types that can be passed to the umsg_format variadic function.
356#[doc(hidden)]
357pub trait FormatArg: Sealed {
358    type Raw;
359    fn to_raw(&self) -> Self::Raw;
360}
361
362impl Sealed for f64 {}
363impl FormatArg for f64 {
364    type Raw = f64;
365    fn to_raw(&self) -> Self::Raw {
366        *self
367    }
368}
369
370impl Sealed for ustring::UChar {}
371impl FormatArg for ustring::UChar {
372    type Raw = *const UChar;
373    fn to_raw(&self) -> Self::Raw {
374        self.as_c_ptr()
375    }
376}
377
378impl Sealed for i32 {}
379impl FormatArg for i32 {
380    type Raw = i32;
381    fn to_raw(&self) -> Self::Raw {
382        *self
383    }
384}
385
386impl Sealed for i64 {}
387impl FormatArg for i64 {
388    type Raw = i64;
389    fn to_raw(&self) -> Self::Raw {
390        *self
391    }
392}
393
394/// Trait for tuples of elements implementing `FormatArg`.
395#[doc(hidden)]
396pub trait FormatArgs: Sealed {
397    #[doc(hidden)]
398    unsafe fn format(
399        &self,
400        fmt: *const sys::UMessageFormat,
401        result: *mut UChar,
402        result_length: i32,
403        status: *mut UErrorCode,
404    ) -> i32;
405}
406
407macro_rules! impl_format_args_for_tuples {
408    ($(($($param:ident),*),)*) => {
409        $(
410            impl<$($param: FormatArg,)*> Sealed for ($($param,)*) {}
411            impl<$($param: FormatArg,)*> FormatArgs for ($($param,)*) {
412                unsafe fn format(
413                    &self,
414                    fmt: *const sys::UMessageFormat,
415                    result: *mut UChar,
416                    result_length: i32,
417                    status: *mut UErrorCode,
418                ) -> i32 {
419                    #[allow(non_snake_case)]
420                    let ($($param,)*) = self;
421                    $(
422                        #[allow(non_snake_case)]
423                        let $param = $crate::FormatArg::to_raw($param);
424                    )*
425
426                    versioned_function!(umsg_format)(
427                        fmt,
428                        result,
429                        result_length,
430                        status,
431                        $($param,)*
432                    )
433                }
434            }
435        )*
436    }
437}
438
439impl_format_args_for_tuples! {
440    (A),
441    (A, B),
442    (A, B, C),
443    (A, B, C, D),
444    (A, B, C, D, E),
445    (A, B, C, D, E, F),
446    (A, B, C, D, E, F, G),
447    (A, B, C, D, E, F, G, H),
448    (A, B, C, D, E, F, G, H, I),
449    (A, B, C, D, E, F, G, H, I, J),
450    (A, B, C, D, E, F, G, H, I, J, K),
451    (A, B, C, D, E, F, G, H, I, J, K, L),
452    (A, B, C, D, E, F, G, H, I, J, K, L, M),
453    (A, B, C, D, E, F, G, H, I, J, K, L, M, N),
454    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O),
455    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P),
456    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q),
457    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R),
458    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S),
459    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T),
460    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U),
461    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V),
462    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W),
463    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X),
464    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y),
465    (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z),
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use rust_icu_ucal as ucal;
472
473    struct TzSave(String);
474
475    impl Drop for TzSave {
476        // Restore the system time zone upon exit.
477        fn drop(&mut self) {
478            ucal::set_default_time_zone(&self.0).expect("timezone set success");
479        }
480    }
481
482    #[test]
483    fn tzsave() -> Result<(), common::Error> {
484        let _ = TzSave(ucal::get_default_time_zone()?);
485        ucal::set_default_time_zone("Europe/Amsterdam")?;
486        Ok(())
487    }
488
489    #[test]
490    fn basic() -> Result<(), common::Error> {
491        let _ = TzSave(ucal::get_default_time_zone()?);
492        ucal::set_default_time_zone("Europe/Amsterdam")?;
493
494        let loc = uloc::ULoc::try_from("en-US")?;
495        let msg = ustring::UChar::try_from(
496            r"Formatted double: {0,number,##.#},
497              Formatted integer: {1,number,integer},
498              Formatted string: {2},
499              Date: {3,date,full}",
500        )?;
501
502        let fmt = crate::UMessageFormat::try_from(&msg, &loc)?;
503        let hello = ustring::UChar::try_from("Hello! Добар дан!")?;
504        let value: i32 = 31337;
505        let result = message_format!(
506            fmt,
507            { 43.4 => Double },
508            { value => Integer },
509            { hello => String },
510            { 0.0 => Date }
511        )?;
512
513        assert_eq!(
514            r"Formatted double: 43.4,
515              Formatted integer: 31,337,
516              Formatted string: Hello! Добар дан!,
517              Date: Thursday, January 1, 1970",
518            result
519        );
520        Ok(())
521    }
522
523    #[test]
524    fn clone() -> Result<(), common::Error> {
525        let loc = uloc::ULoc::try_from("en-US-u-tz-uslax")?;
526        let msg = ustring::UChar::try_from(r"Formatted double: {0,number,##.#}")?;
527
528        let fmt = crate::UMessageFormat::try_from(&msg, &loc)?;
529        #[allow(clippy::redundant_clone)]
530        let result = message_format!(fmt.clone(), { 43.43 => Double })?;
531        assert_eq!(r"Formatted double: 43.4", result);
532        Ok(())
533    }
534}