leetcode_rust/problems/p000_0xx/
p000_008.rs

1//! # Description
2//!
3//! Implement the `myAtoi(string s)` function, which converts a string to a
4//! 32-bit signed integer (similar to C/C++'s `atoi` function).
5//!
6//! The algorithm for `myAtoi(string s)` is as follows:
7//!
8//! Read in and ignore any leading whitespace.
9//!
10//! Check if the next character (if not already at the end of the string) is
11//! '`-`' or '`+`'. Read this character in if it is either. This determines if the
12//! final result is negative or positive respectively. Assume the result is
13//! positive if neither is present.
14//!
15//! Read in next the characters until the next non-digit character or the end
16//! of the input is reached. The rest of the string is ignored.
17//! 
18//! Convert these digits into an integer (i.e. "123" -> 123, "0032" -> 32). If
19//! no digits were read, then the integer is `0`. Change the sign as necessary
20//! (from step 2).
21//!
22//! If the integer is out of the 32-bit signed integer range 
23//! $[-2^{31}, 2^{31} - 1]$, then clamp the integer so that it remains in the 
24//! range. Specifically, integers less than $-2^{31}$ should be clamped to 
25//! $-2^{31}$, and integers greater than $2^{31} - 1$ should be clamped to 
26//! $2^{31} - 1$.
27//! 
28//! Return the integer as the final result.
29//! 
30//! Note:
31//!
32//! Only the space character '` `' is considered a whitespace character.
33//! Do not ignore any characters other than the leading whitespace or the rest
34//! of the string after the digits.
35//!  
36//!
37//! Example 1:
38//! ```plain
39//! Input: s = "42"
40//! Output: 42
41//! Explanation: The underlined characters are what is read in, the caret is the
42//! current reader position.
43//! Step 1: "42" (no characters read because there is no leading whitespace)
44//!          ^
45//! Step 2: "42" (no characters read because there is neither a '-' nor '+')
46//!          ^
47//! Step 3: "42" ("42" is read in)
48//!            ^
49//! The parsed integer is 42.
50//! Since 42 is in the range $[-2^{31}, 2^{31} - 1]$, the final result is 42.
51//! ```
52//!
53//! Example 2:
54//! ```plain
55//! Input: s = "   -42"
56//! Output: -42
57//! Explanation:
58//! Step 1: "   -42" (leading whitespace is read and ignored)
59//!             ^
60//! Step 2: "   -42" ('-' is read, so the result should be negative)
61//!              ^
62//! Step 3: "   -42" ("42" is read in)
63//!                ^
64//! The parsed integer is -42.
65//! Since -42 is in the range $[-2^{31}, 2^{31} - 1]$, the final result is -42.
66//! ```
67//!
68//! Example 3:
69//! ```plain
70//! Input: s = "4193 with words"
71//! Output: 4193
72//! Explanation:
73//! Step 1: "4193 with words" (no characters read because there is no leading whitespace)
74//!          ^
75//! Step 2: "4193 with words" (no characters read because there is neither a '-' nor '+')
76//!          ^
77//! Step 3: "4193 with words" ("4193" is read in; reading stops because the next character is a non-digit)
78//!              ^
79//! The parsed integer is 4193.
80//! Since 4193 is in the range $[-2^{31}, 2^{31} - 1]$, the final result is 4193.
81//! ```
82//!
83//! Constraints:
84//!
85//! - `0 $\leqslant$ s.length $\leqslant$ 200`
86//! - `s` consists of English letters (lower-case and upper-case), digits 
87//! (`0-9`), '` `', '`+`', '`-`', and '`.`'.
88//! 
89//! Source: <https://leetcode.com/problems/string-to-integer-atoi/>
90
91////////////////////////////////////////////////////////////////////////////////
92
93/// Convert string to 32-bit integer
94///
95/// # Arguments
96/// * `s` - input string
97/// 
98/// # Examples
99/// ```
100/// use leetcode_rust::problems::p000_0xx::p000_008::my_atoi;
101/// 
102/// assert!(my_atoi(String::from("-12.5")) == -12);
103/// ```
104pub fn my_atoi(s: String) -> i32 {
105    // Upper bound for possitive i32. The last element should be increased
106    // if given string indicates a negative i32 (no sign symbol included).
107    let mut threshold_val: [u8; 10] = [50, 49, 52, 55, 52, 56, 51, 54, 52, 55];
108
109    // Indicates whether the given string starts with a negative integer.
110    let mut is_negative = false;
111
112    // Indicates existence of digits in given string.
113    let mut has_digits = false;
114
115    // Current index position during scanning of input string.
116    let mut curr_idx: usize = 0;
117
118    // Indicates whether scanned part should be treated as an integer.
119    let mut is_started = false;
120
121    // Temp value during looping and return value after looping complete.
122    let mut val: i32 = 0;
123
124    // Bytes form of input string. For faster comparison and computation.
125    let s_bytes = s.as_bytes();
126
127    // Indicates whether the scanned part correspond to an overflowed interger.
128    let mut is_overflow = false;
129
130    // In some cases, we should just ignore overflow detection because the
131    // parsed digits are simply less than same digit in possitive / negative
132    // overflow threshold value respectively.
133    let mut is_overflow_ignored = false;
134
135    // Used to check if given string starts with exactly same digits comparing
136    // with threshold.
137    let mut is_full_match = true;
138
139    // A vector containing all parsed and valid digits.
140    let mut val_vec: Vec<u8> = vec![];
141
142    // Loop forever.
143    // Looping through all characters in sequence or escape during looping are
144    // controlled by additional flags.
145    loop {
146        // Guard condition, exit looping when no more characters to check.
147        if curr_idx == s_bytes.len() {
148            break;
149        }
150
151        if s_bytes[curr_idx] == 32 {
152            // This is a whitespace character, check its position.
153            if !is_started {
154                // Leading whitespace, ignore it
155                curr_idx += 1;
156                continue;
157            } else {
158                // None leading whitespace, end of reading because we already
159                // found some significant digits.
160                break;
161            }
162        }
163
164        if [43u8, 45u8].contains(&s_bytes[curr_idx]) {
165            // Check positive and negative signs.
166            if has_digits {
167                // Signs after digits (even after 0 is not allowed)
168                // e.g. `12-222`, `0+123`
169                break;
170            }
171            if !is_started {
172                // Should only adjust sign when this is the very first valid
173                // symbol in the given string.
174                if s_bytes[curr_idx] == 45 {
175                    // Adjust flag and set new overflow threshold.
176                    is_negative = true;
177                    threshold_val = [50, 49, 52, 55, 52, 56, 51, 54, 52, 56];
178                }
179                // Setup flag to avoid `-+` sequences.
180                is_started = true;
181                curr_idx += 1;
182                continue;
183            }
184        }
185
186        // Now parse digit and detect overflow.
187        if 48 <= s_bytes[curr_idx] && s_bytes[curr_idx] <= 57 {
188            // Once a new digit found, update related flags to avoid malformed
189            // sequences like: `0 123` and `123-6`.
190            has_digits = true;
191            is_started = true;
192
193            if val == 0 && s_bytes[curr_idx] == 48 {
194                // Skip leading zeros.
195                curr_idx += 1;
196                continue;
197            }
198
199            // Digits
200            if val_vec.len() >= threshold_val.len() - 1 {
201                // Only check overflow when parsed digits are the same or 1
202                // digit shorter than threshold. Otherwise it could waste
203                // execution time.
204
205                // The following traversal checks:
206                // 1. if parsed part has exact same digits as threshold;
207                // 2. if parsed part should be ignored in future checking.
208                for idx in 0..val_vec.len() {
209                    if !is_overflow {
210                        // Save time
211                        if !is_overflow_ignored && val_vec[idx] > threshold_val[idx] {
212                            // One digit is greater than threshold, no need to
213                            // check later ones.
214                            is_overflow = true;
215                            is_full_match = false;
216                            break;
217                        } else if val_vec[idx] < threshold_val[idx] {
218                            // One digit is smaller than threshold, means as
219                            // long as its shorter than threshold, it cannot
220                            // overflow.
221                            // But it still needs testing for its length
222                            // in a later step.
223                            is_full_match = false;
224                            is_overflow_ignored = true;
225                        }
226                    }
227                }
228
229                if !is_overflow {
230                    // Test for current digit:
231                    // 1. if it extends length of parsed number that causes
232                    // overflow.
233                    // 2. if it increases parsed number that causes overflow.
234                    if val_vec.len() == threshold_val.len() - 1 {
235                        // Check if adding this digit causes overflow
236                        if is_full_match
237                            && s_bytes[curr_idx] > threshold_val[threshold_val.len() - 1]
238                        {
239                            is_overflow = true;
240                            break;
241                        }
242                    } else if val_vec.len() == threshold_val.len() {
243                        // Check if extending parsed part causes overflow.
244                        is_overflow = true;
245                        break;
246                    }
247                }
248            }
249            if is_overflow {
250                // Do NOT prepend current digit to parsed number.
251                break;
252            }
253            val = val * 10 + (s_bytes[curr_idx] - 48) as i32;
254            val_vec.push(s_bytes[curr_idx]);
255            curr_idx += 1;
256            continue;
257        }
258
259        break;
260    }
261    // The following steps simply determines what value to return by
262    // overflowing flag and integer sign.
263    if is_overflow {
264        if is_negative {
265            -2147483648
266        } else {
267            2147483647
268        }
269    } else {
270        if is_negative {
271            -val
272        } else {
273            val
274        }
275    }
276}