Skip to main content

urldecoder/
lib.rs

1pub mod error;
2pub mod log;
3
4#[cfg(feature = "verbose-log")]
5use std::sync::atomic::{AtomicUsize, Ordering};
6use std::{
7    fs,
8    io::{self, BufWriter, Write},
9    path::Path,
10};
11
12pub use error::*;
13use memchr::{memchr, memmem::Finder};
14use snafu::ResultExt;
15use tempfile::NamedTempFile;
16
17#[cfg(feature = "verbose-log")]
18use crate::log::VerboseLogger;
19use crate::log::{DecodeLogger, NoOpLogger};
20
21const SMALL_FILE_THRESHOLD: u64 = 256 * 1024;
22const IO_BUF_SIZE: usize = 64 * 1024;
23const URL_CHAR_BITMAP: [u32; 8] = gen_url_bitmap(b"-+&@#/%?=~_|!:,.;");
24const URL_END_CHAR_BITMAP: [u32; 8] = gen_url_bitmap(b"-+&@#/%=~_|");
25const HEX_MAP: [u8; 256] = gen_hex_map();
26const HEX_INVALID: u8 = 0xFF;
27
28const fn gen_url_bitmap(symbols: &[u8]) -> [u32; 8] {
29    let mut bitmap = [0u32; 8];
30    let mut c = b'0';
31    while c <= b'9' {
32        let idx = c as usize;
33        bitmap[idx >> 5] |= 1u32 << (idx & 31);
34        c += 1;
35    }
36    let mut c = b'A';
37    while c <= b'Z' {
38        let idx = c as usize;
39        bitmap[idx >> 5] |= 1u32 << (idx & 31);
40        c += 1;
41    }
42    let mut c = b'a';
43    while c <= b'z' {
44        let idx = c as usize;
45        bitmap[idx >> 5] |= 1u32 << (idx & 31);
46        c += 1;
47    }
48    let mut i = 0;
49    while i < symbols.len() {
50        let idx = symbols[i] as usize;
51        bitmap[idx >> 5] |= 1u32 << (idx & 31);
52        i += 1;
53    }
54    bitmap
55}
56
57const fn gen_hex_map() -> [u8; 256] {
58    let mut map = [HEX_INVALID; 256];
59    let mut i = 0;
60    while i < 10 {
61        map[(b'0' + i) as usize] = i;
62        i += 1;
63    }
64    let mut i = 0;
65    while i < 6 {
66        map[(b'a' + i) as usize] = 10 + i;
67        map[(b'A' + i) as usize] = 10 + i;
68        i += 1;
69    }
70    map
71}
72
73#[inline]
74#[cold]
75fn cold() {}
76
77#[inline]
78fn likely(b: bool) -> bool {
79    if !b {
80        cold()
81    }
82    b
83}
84
85#[inline(always)]
86fn decode_hex_pair(h1: u8, h2: u8) -> Option<u8> {
87    let v1 = unsafe { *HEX_MAP.get_unchecked(h1 as usize) };
88    let v2 = unsafe { *HEX_MAP.get_unchecked(h2 as usize) };
89    if likely((v1 | v2) != HEX_INVALID) {
90        Some((v1 << 4) | v2)
91    } else {
92        None
93    }
94}
95
96#[inline(always)]
97fn is_url_char(byte: u8) -> bool {
98    let idx = byte as usize;
99    unsafe { (URL_CHAR_BITMAP.get_unchecked(idx >> 5) >> (idx & 31)) & 1 == 1 }
100}
101
102#[inline(always)]
103fn is_url_end_char(byte: u8) -> bool {
104    let idx = byte as usize;
105    unsafe { (URL_END_CHAR_BITMAP.get_unchecked(idx >> 5) >> (idx & 31)) & 1 == 1 }
106}
107
108#[inline(always)]
109fn trim_url_end(slice: &[u8]) -> (&[u8], &[u8]) {
110    let mut end = slice.len();
111    while end > 0 {
112        if is_url_end_char(unsafe { *slice.get_unchecked(end - 1) }) {
113            break;
114        }
115        end -= 1;
116    }
117    unsafe { (slice.get_unchecked(..end), slice.get_unchecked(end..)) }
118}
119
120// ============================================================================
121// Core Logic
122// ============================================================================
123
124/// dispatch verbose to `decode_slice_to_writer` and `decode_in_place`
125macro_rules! decode {
126    ($func:ident($($args:expr),*), $verbose:expr) => {{
127        if $verbose {
128            #[cfg(feature = "verbose-log")]
129            {
130                let mut logger = VerboseLogger::new();
131                $func($($args),*, &mut logger)
132            }
133            #[cfg(not(feature = "verbose-log"))]
134            {
135                $func($($args),*)
136            }
137        } else {
138            #[cfg(feature = "verbose-log")]
139            {
140                let mut logger = NoOpLogger;
141                $func($($args),*, &mut logger)
142            }
143            #[cfg(not(feature = "verbose-log"))]
144            {
145                $func($($args),*)
146            }
147        }
148    }};
149}
150
151// region: in-place
152
153/// Decode URL in-place using read and write pointers.
154/// Returns the new length of the data.
155pub fn decode_in_place(
156    data: &mut [u8],
157    escape_space: bool,
158    #[cfg(feature = "verbose-log")] logger: &mut impl DecodeLogger,
159) -> usize {
160    if escape_space {
161        decode_in_place_inner::<true>(
162            data,
163            #[cfg(feature = "verbose-log")]
164            logger,
165        )
166    } else {
167        decode_in_place_inner::<false>(
168            data,
169            #[cfg(feature = "verbose-log")]
170            logger,
171        )
172    }
173}
174
175#[inline(always)]
176fn decode_in_place_inner<const ESCAPE_SPACE: bool>(
177    data: &mut [u8],
178    #[cfg(feature = "verbose-log")] logger: &mut impl DecodeLogger,
179) -> usize {
180    let mut r = 0;
181    let mut w = 0;
182    let len = data.len();
183    let finder = Finder::new(b"http");
184
185    while r < len {
186        if let Some(match_idx) = finder.find(&data[r..]) {
187            let start = r + match_idx;
188
189            let is_http = data[start..].starts_with(b"http://");
190            let is_https = data[start..].starts_with(b"https://");
191
192            if is_http || is_https {
193                // Copy plain text before URL
194                if start > r {
195                    let copy_len = start - r;
196                    if w != r {
197                        data.copy_within(r..start, w);
198                    }
199                    w += copy_len;
200                }
201
202                // Find URL end
203                let prefix_len = if is_https { 8 } else { 7 };
204                let mut end = start + prefix_len;
205                while end < len && is_url_char(data[end]) {
206                    end += 1;
207                }
208
209                let mut valid_end = end;
210                while valid_end > start {
211                    if is_url_end_char(unsafe { *data.get_unchecked(valid_end - 1) }) {
212                        break;
213                    }
214                    valid_end -= 1;
215                }
216
217                // Decode URL in-place
218                w = decode_url_in_place_indices::<ESCAPE_SPACE>(
219                    data,
220                    start,
221                    valid_end,
222                    w,
223                    #[cfg(feature = "verbose-log")]
224                    logger,
225                );
226
227                // Copy suffix after trimmed punctuation
228                let suffix_len = end - valid_end;
229                if suffix_len > 0 {
230                    if w != valid_end {
231                        data.copy_within(valid_end..end, w);
232                    }
233                    w += suffix_len;
234                }
235
236                r = end;
237            } else {
238                // Found `http` but not a url
239                let copy_len = start + 4 - r;
240                if w != r {
241                    data.copy_within(r..start + 4, w);
242                }
243                w += copy_len;
244                r = start + 4;
245            }
246        } else {
247            // Copy remaining
248            if r < len {
249                let copy_len = len - r;
250                if w != r {
251                    data.copy_within(r..len, w);
252                }
253                w += copy_len;
254            }
255            break;
256        }
257    }
258    w
259}
260
261#[inline(always)]
262fn decode_url_in_place_indices<const ESCAPE_SPACE: bool>(
263    data: &mut [u8],
264    src_start: usize,
265    src_end: usize,
266    mut dst: usize,
267    #[cfg(feature = "verbose-log")] logger: &mut impl DecodeLogger,
268) -> usize {
269    #[cfg(not(feature = "verbose-log"))]
270    let mut logger = NoOpLogger;
271    logger.clear();
272
273    let mut i = src_start;
274    let first_pct = match memchr(b'%', &data[i..src_end]) {
275        Some(idx) => idx,
276        None => {
277            let len = src_end - i;
278            logger.log_orig_slice(&data[i..src_end]);
279            logger.log_res_slice(&data[i..src_end]);
280            if dst != i {
281                data.copy_within(i..src_end, dst);
282            }
283            return dst + len;
284        }
285    };
286
287    if first_pct > 0 {
288        logger.log_orig_slice(&data[i..i + first_pct]);
289        logger.log_res_slice(&data[i..i + first_pct]);
290        if dst != i {
291            data.copy_within(i..i + first_pct, dst);
292        }
293        dst += first_pct;
294        i += first_pct;
295    }
296
297    let mut literal_start = i;
298    let mut changed = false;
299
300    while i < src_end {
301        if data[i] == b'%' && i + 2 < src_end {
302            let h1 = data[i + 1];
303            let h2 = data[i + 2];
304            if let Some(decoded) = decode_hex_pair(h1, h2) {
305                if ESCAPE_SPACE && decoded == b' ' {
306                    i += 3;
307                    continue;
308                }
309
310                changed = true;
311                if i > literal_start {
312                    let len = i - literal_start;
313                    logger.log_orig_slice(&data[literal_start..i]);
314                    logger.log_res_slice(&data[literal_start..i]);
315                    if dst != literal_start {
316                        data.copy_within(literal_start..i, dst);
317                    }
318                    dst += len;
319                }
320
321                logger.log_orig(b'%');
322                logger.log_orig(h1);
323                logger.log_orig(h2);
324                logger.log_res(decoded);
325
326                data[dst] = decoded;
327                dst += 1;
328                i += 3;
329                literal_start = i;
330                continue;
331            } else {
332                i += 1;
333                continue;
334            }
335        }
336        if data[i] == b'%' {
337            i += 1;
338        } else {
339            match memchr(b'%', &data[i..src_end]) {
340                Some(offset) => i += offset,
341                None => i = src_end,
342            }
343        }
344    }
345
346    if literal_start < src_end {
347        let len = src_end - literal_start;
348        logger.log_orig_slice(&data[literal_start..src_end]);
349        logger.log_res_slice(&data[literal_start..src_end]);
350        if dst != literal_start {
351            data.copy_within(literal_start..src_end, dst);
352        }
353        dst += len;
354    }
355
356    logger.print_if_changed(changed);
357    dst
358}
359
360#[cfg(not(feature = "safe"))]
361fn decode_file_in_place(
362    path: &Path,
363    escape_space: bool,
364    #[allow(unused)] verbose: bool,
365    #[cfg(feature = "verbose-log")] p_counter: &AtomicUsize,
366    #[cfg(feature = "verbose-log")] c_counter: &AtomicUsize,
367) -> Result<()> {
368    use std::fs::{self, OpenOptions};
369
370    let metadata = fs::metadata(path).context(ReadInputSnafu)?;
371    let file_len = metadata.len();
372
373    if file_len == 0 {
374        #[cfg(feature = "verbose-log")]
375        p_counter.fetch_add(1, Ordering::Relaxed);
376        return Ok(());
377    }
378
379    #[allow(unused)]
380    let changed = if file_len < SMALL_FILE_THRESHOLD {
381        let mut buf = fs::read(path).context(ReadInputSnafu)?;
382        let new_len = decode!(decode_in_place(&mut buf, escape_space), verbose);
383        let is_changed = new_len < file_len as usize;
384
385        if is_changed {
386            fs::write(path, &buf[..new_len]).context(WriteOutputSnafu)?;
387        }
388        is_changed
389    } else {
390        let file = OpenOptions::new()
391            .read(true)
392            .write(true)
393            .open(path)
394            .context(OpenInputSnafu { path })?;
395
396        let mut mmap = unsafe {
397            memmap2::MmapOptions::new()
398                .map_mut(&file)
399                .context(ReadInputSnafu)?
400        };
401
402        #[cfg(unix)]
403        mmap.advise(memmap2::Advice::Sequential);
404
405        let new_len = decode!(decode_in_place(&mut mmap, escape_space), verbose);
406        let is_changed = new_len < file_len as usize;
407
408        if is_changed {
409            mmap.flush().context(WriteOutputSnafu)?;
410            drop(mmap);
411            file.set_len(new_len as u64).context(WriteOutputSnafu)?;
412        }
413        is_changed
414    };
415
416    #[cfg(feature = "verbose-log")]
417    {
418        p_counter.fetch_add(1, Ordering::Relaxed);
419        if changed {
420            c_counter.fetch_add(1, Ordering::Relaxed);
421            if verbose {
422                println!("Processed File: {:?}", path);
423            }
424        }
425    }
426
427    Ok(())
428}
429
430// region: to writer
431
432pub fn decode_slice_to_writer<W: Write>(
433    input: &[u8],
434    writer: &mut W,
435    escape_space: bool,
436    #[cfg(feature = "verbose-log")] logger: &mut impl DecodeLogger,
437) -> io::Result<bool> {
438    let mut pos = 0;
439    let len = input.len();
440    let mut changed = false;
441    let finder = Finder::new(b"http");
442
443    while pos < len {
444        if let Some(match_idx) = finder.find(&input[pos..]) {
445            let start = pos + match_idx;
446
447            let is_http = input[start..].starts_with(b"http://");
448            let is_https = input[start..].starts_with(b"https://");
449
450            if is_http || is_https {
451                // Write plain text before URL
452                if start > pos {
453                    writer.write_all(&input[pos..start])?;
454                }
455
456                // Find URL end
457                let prefix_len = if is_https { 8 } else { 7 };
458                let mut end = start + prefix_len;
459                while end < len && is_url_char(input[end]) {
460                    end += 1;
461                }
462
463                let raw_url = &input[start..end];
464                let (valid_url, suffix) = trim_url_end(raw_url);
465
466                // Decode URL and write directly
467                #[cfg(feature = "verbose-log")]
468                let url_changed = decode_url_to_writer(valid_url, writer, escape_space, logger)?;
469                #[cfg(not(feature = "verbose-log"))]
470                let url_changed = decode_url_to_writer(valid_url, writer, escape_space)?;
471                if url_changed {
472                    changed = true;
473                }
474
475                // Write suffix after trimmed punctuation
476                if !suffix.is_empty() {
477                    writer.write_all(suffix)?;
478                }
479
480                pos = end;
481            } else {
482                // find `http` but not a url
483                writer.write_all(&input[pos..start + 4])?;
484                pos = start + 4;
485            }
486        } else {
487            // write all
488            if pos < len {
489                writer.write_all(&input[pos..])?;
490            }
491            break;
492        }
493    }
494    Ok(changed)
495}
496
497#[inline(always)]
498pub fn decode_url_to_writer<W: Write>(
499    url: &[u8],
500    writer: &mut W,
501    escape_space: bool,
502    #[cfg(feature = "verbose-log")] logger: &mut impl DecodeLogger,
503) -> io::Result<bool> {
504    // static dispatch: completely remove `escape_space` branch at compile time
505    if escape_space {
506        decode_inner::<true, W>(
507            url,
508            writer,
509            #[cfg(feature = "verbose-log")]
510            logger,
511        )
512    } else {
513        decode_inner::<false, W>(
514            url,
515            writer,
516            #[cfg(feature = "verbose-log")]
517            logger,
518        )
519    }
520}
521
522#[inline(always)]
523fn decode_inner<const ESCAPE_SPACE: bool, W: Write>(
524    url: &[u8],
525    writer: &mut W,
526    #[cfg(feature = "verbose-log")] logger: &mut impl DecodeLogger,
527) -> io::Result<bool> {
528    #[cfg(not(feature = "verbose-log"))]
529    let mut logger = NoOpLogger;
530    logger.clear();
531
532    let first_pct = match memchr(b'%', url) {
533        Some(idx) => idx,
534        None => {
535            writer.write_all(url)?;
536            logger.log_orig_slice(url);
537            logger.log_res_slice(url);
538            return Ok(false);
539        }
540    };
541
542    if first_pct > 0 {
543        writer.write_all(&url[..first_pct])?;
544        logger.log_orig_slice(&url[..first_pct]);
545        logger.log_res_slice(&url[..first_pct]);
546    }
547
548    let mut i = first_pct;
549    let len = url.len();
550    let mut changed = false;
551    let mut literal_start = i; // for batch write
552
553    while i < len {
554        if url[i] == b'%' && i + 2 < len {
555            let h1 = url[i + 1];
556            let h2 = url[i + 2];
557            if let Some(decoded) = decode_hex_pair(h1, h2) {
558                if ESCAPE_SPACE && decoded == b' ' {
559                    i += 3;
560                    continue;
561                }
562
563                changed = true;
564                if i > literal_start {
565                    writer.write_all(&url[literal_start..i])?;
566                    logger.log_orig_slice(&url[literal_start..i]);
567                    logger.log_res_slice(&url[literal_start..i]);
568                }
569                writer.write_all(&[decoded])?;
570                logger.log_orig(b'%');
571                logger.log_orig(h1);
572                logger.log_orig(h2);
573                logger.log_res(decoded);
574
575                i += 3;
576                literal_start = i;
577                continue;
578            } else {
579                i += 1;
580                continue;
581            }
582        }
583        if url[i] == b'%' {
584            i += 1;
585        } else {
586            match memchr(b'%', &url[i..]) {
587                Some(offset) => i += offset,
588                None => i = len,
589            }
590        }
591    }
592    if literal_start < len {
593        writer.write_all(&url[literal_start..len])?;
594        logger.log_orig_slice(&url[literal_start..len]);
595        logger.log_res_slice(&url[literal_start..len]);
596    }
597
598    logger.print_if_changed(changed);
599    Ok(changed)
600}
601
602/// Decode String
603pub fn decode_str(
604    input: &str,
605    escape_space: bool,
606    #[cfg(feature = "verbose-log")] verbose: bool,
607) -> Result<(String, bool)> {
608    #[cfg(not(feature = "verbose-log"))]
609    let verbose = false;
610    let mut buf = Vec::with_capacity(input.len());
611
612    let changed = decode!(
613        decode_slice_to_writer(input.as_bytes(), &mut buf, escape_space),
614        verbose
615    )
616    .context(WriteOutputSnafu)?;
617
618    Ok((
619        simdutf8::basic::from_utf8(&buf)
620            .context(InvalidUtf8Snafu)?
621            .to_owned(),
622        changed,
623    ))
624}
625
626/// Decode file and overwrite.
627pub fn decode_file(
628    path: impl AsRef<Path>,
629    escape_space: bool,
630    dry_run: bool,
631    #[cfg(feature = "verbose-log")] verbose: bool,
632    #[cfg(feature = "verbose-log")] p_counter: &AtomicUsize,
633    #[cfg(feature = "verbose-log")] c_counter: &AtomicUsize,
634) -> Result<()> {
635    #[cfg(not(feature = "verbose-log"))]
636    let verbose = false;
637
638    let path = path.as_ref();
639
640    #[cfg(not(feature = "safe"))]
641    {
642        if !dry_run {
643            return decode_file_in_place(
644                path,
645                escape_space,
646                verbose,
647                #[cfg(feature = "verbose-log")]
648                p_counter,
649                #[cfg(feature = "verbose-log")]
650                c_counter,
651            );
652        }
653    }
654
655    // Safe fallback / dry_run logic
656    let metadata = fs::metadata(path).context(ReadInputSnafu)?;
657    let file_len = metadata.len();
658
659    if file_len == 0 {
660        #[cfg(feature = "verbose-log")]
661        p_counter.fetch_add(1, Ordering::Relaxed);
662        return Ok(());
663    }
664
665    #[allow(unused)]
666    let mut changed = false;
667
668    #[allow(unused)]
669    if file_len < SMALL_FILE_THRESHOLD {
670        let mut buf = fs::read(path).context(ReadInputSnafu)?;
671        let new_len = decode!(decode_in_place(&mut buf, escape_space), verbose);
672        changed = new_len < buf.len();
673
674        if changed && !dry_run {
675            buf.truncate(new_len);
676            let parent = path.parent().unwrap_or_else(|| Path::new("."));
677
678            let mut temp_file =
679                NamedTempFile::new_in(parent).context(CreateTempSnafu { dir: parent })?;
680
681            temp_file.write_all(&buf).context(WriteOutputSnafu)?;
682            temp_file.flush().context(WriteOutputSnafu)?;
683
684            // Set permissions
685            let _ = temp_file.as_file().set_permissions(metadata.permissions());
686            temp_file.persist(path).context(PersistTempSnafu { path })?;
687        }
688    } else {
689        // mmap
690        let file = fs::File::open(path).context(OpenInputSnafu { path })?;
691        let mmap = unsafe {
692            memmap2::MmapOptions::new()
693                .map(&file)
694                .context(ReadInputSnafu)?
695        };
696
697        #[cfg(unix)]
698        mmap.advise(memmap2::Advice::Sequential);
699
700        if dry_run {
701            let mut sink = io::sink();
702            changed = decode!(
703                decode_slice_to_writer(&mmap, &mut sink, escape_space),
704                verbose
705            )
706            .context(WriteOutputSnafu)?;
707        } else {
708            let parent = path.parent().unwrap_or_else(|| Path::new("."));
709
710            let mut temp_file =
711                NamedTempFile::new_in(parent).context(CreateTempSnafu { dir: parent })?;
712
713            {
714                let mut buf_writer = BufWriter::with_capacity(IO_BUF_SIZE, &mut temp_file);
715                changed = decode!(
716                    decode_slice_to_writer(&mmap, &mut buf_writer, escape_space),
717                    verbose
718                )
719                .context(WriteOutputSnafu)?;
720                buf_writer.flush().context(WriteOutputSnafu)?;
721            }
722
723            drop(mmap);
724            drop(file);
725
726            if changed {
727                // Set permissions AFTER writing to avoid PermissionDenied if original is
728                // read-only
729                let _ = temp_file.as_file().set_permissions(metadata.permissions());
730                temp_file.persist(path).context(PersistTempSnafu { path })?;
731            }
732        }
733    }
734
735    #[cfg(feature = "verbose-log")]
736    {
737        p_counter.fetch_add(1, Ordering::Relaxed);
738        if changed {
739            c_counter.fetch_add(1, Ordering::Relaxed);
740            if verbose {
741                println!("Processed File: {:?}", path);
742            }
743        }
744    }
745
746    Ok(())
747}
748
749#[cfg(test)]
750mod tests {
751
752    use tempfile::NamedTempFile;
753
754    use super::*;
755
756    #[test]
757    fn test_basic() {
758        // basic
759        assert_eq!(
760            decode_str(
761                "https://www.baidu.com/s?ie=UTF-8&wd=%E5%A4%A9%E6%B0%94",
762                false,
763                #[cfg(feature = "verbose-log")]
764                false
765            )
766            .unwrap(),
767            ("https://www.baidu.com/s?ie=UTF-8&wd=天气".into(), true)
768        );
769        // symbol end
770        assert_eq!(
771            decode_str(
772                "(https://www.baidu.com/s?ie=UTF-8&wd=%E5%A4%A9%E6%B0%94)",
773                false,
774                #[cfg(feature = "verbose-log")]
775                false
776            )
777            .unwrap(),
778            ("(https://www.baidu.com/s?ie=UTF-8&wd=天气)".into(), true)
779        );
780        // escape space
781        assert_eq!(
782            decode_str(
783                "https://osu.ppy.sh/beatmapsets?q=malody%204k%20extra%20dan%20v3%E4%B8%AD",
784                true,
785                #[cfg(feature = "verbose-log")]
786                true
787            )
788            .unwrap(),
789            (
790                "https://osu.ppy.sh/beatmapsets?q=malody%204k%20extra%20dan%20v3中".into(),
791                true
792            )
793        );
794        // nothing happens
795        assert_eq!(
796            decode_str(
797                "https://osu.ppy.sh",
798                true,
799                #[cfg(feature = "verbose-log")]
800                false
801            )
802            .unwrap(),
803            ("https://osu.ppy.sh".into(), false)
804        );
805    }
806
807    #[test]
808    fn test_long_url() {
809        let mut url = "https://www.baidu.com/s?ie=UTF-8&wd=%E5%A4%A9%E6%B0%94".to_string();
810        for _ in 0..10000 {
811            url.push_str("%20");
812        }
813        assert_eq!(
814            decode_str(
815                &url,
816                false,
817                #[cfg(feature = "verbose-log")]
818                false
819            )
820            .unwrap(),
821            (
822                "https://www.baidu.com/s?ie=UTF-8&wd=天气".to_string() + " ".repeat(10000).as_str(),
823                true
824            )
825        );
826
827        let base = "a".repeat(60000);
828        assert_eq!(
829            decode_str(
830                &(base.clone() + &url),
831                false,
832                #[cfg(feature = "verbose-log")]
833                false
834            )
835            .unwrap(),
836            (
837                (base + "https://www.baidu.com/s?ie=UTF-8&wd=天气") + " ".repeat(10000).as_str(),
838                true
839            )
840        )
841    }
842
843    #[test]
844    fn test_decode_file() {
845        let temp = NamedTempFile::new().unwrap();
846        let t1 = temp.into_temp_path();
847        let test_str = "xxxxhttps://www.baidu.com/s?ie=UTF-8&wd=%E5%A4%A9%E6%B0%94xxxx";
848        fs::write(&t1, test_str).unwrap();
849
850        decode_file(
851            &t1,
852            false,
853            false,
854            #[cfg(feature = "verbose-log")]
855            false,
856            #[cfg(feature = "verbose-log")]
857            &AtomicUsize::new(0),
858            #[cfg(feature = "verbose-log")]
859            &AtomicUsize::new(0),
860        )
861        .unwrap();
862
863        assert_eq!(
864            fs::read_to_string(t1).unwrap(),
865            "xxxxhttps://www.baidu.com/s?ie=UTF-8&wd=天气xxxx"
866        );
867    }
868}