stripper_lib/
strip.rs

1// Copyright 2015 Gomez Guillaume
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//   http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fs::File;
16use std::io::{self, Read, Write};
17use std::ops::Deref;
18use std::path::Path;
19use std::process::exit;
20use types::{EventInfo, EventType, ParseResult, Type, TypeStruct};
21use utils::{join, write_comment, write_file, write_file_comment};
22
23const STOP_CHARACTERS: &[char] = &['\t', '\n', '\r', '<', '{', ':', ';', '!', '(', ','];
24const COMMENT_ID: &[&str] = &["//", "/*"];
25pub(crate) const DOC_COMMENT_ID: &[&str] = &["///", "/*!", "//!", "/**"];
26pub(crate) const IGNORE_NEXT_COMMENT: &str = "// rustdoc-stripper-ignore-next";
27pub(crate) const IGNORE_NEXT_COMMENT_STOP: &str = "// rustdoc-stripper-ignore-next-stop";
28
29fn move_to(words: &[&str], it: &mut usize, limit: &str, line: &mut usize, start_remove: &str) {
30    if words[*it][start_remove.len()..].contains(limit) {
31        return;
32    }
33    *it += 1;
34    while let Some(&word) = words.get(*it) {
35        if words[*it].contains(limit) {
36            break;
37        }
38        if word == "\n" {
39            *line += 1;
40        }
41        *it += 1;
42    }
43    if let Some(&"\n") = words.get(*it) {
44        *line += 1;
45    }
46}
47
48fn move_until(words: &[&str], it: &mut usize, limit: &str, line: &mut usize) {
49    let alternative1 = format!("{};", limit);
50    let alternative2 = format!("{}\n", limit);
51    while *it < words.len()
52        && !words[*it].ends_with(limit)
53        && !words[*it].ends_with(&alternative1)
54        && !words[*it].ends_with(&alternative2)
55    {
56        *line += words[*it].chars().filter(|c| *c == '\n').count();
57        *it += 1;
58    }
59}
60
61fn get_before<'a>(word: &'a str, limits: &[char]) -> &'a str {
62    word.find(limits).map(|pos| &word[..pos]).unwrap_or(word)
63}
64
65fn get_impl(words: &[&str], it: &mut usize, line: &mut usize) -> Vec<String> {
66    let mut v = vec![];
67
68    while *it + 1 < words.len() {
69        if words[*it] == "\n" {
70            *line += 1;
71        }
72        if words[*it + 1] == "{" || words[*it + 1] == ";" {
73            break;
74        }
75        *it += 1;
76        v.push(words[*it].to_owned());
77    }
78    v
79}
80
81pub fn add_to_type_scope(
82    current: &Option<TypeStruct>,
83    e: &Option<TypeStruct>,
84) -> Option<TypeStruct> {
85    match *current {
86        Some(ref c) => match *e {
87            Some(ref t) => {
88                let mut tmp = t.clone();
89                tmp.parent = Some(Box::new(c.clone()));
90                Some(tmp)
91            }
92            _ => {
93                let mut tmp = TypeStruct::empty();
94                tmp.parent = Some(Box::new(c.clone()));
95                Some(tmp)
96            }
97        },
98        None => e.as_ref().cloned(),
99    }
100}
101
102pub fn type_out_scope(current: &Option<TypeStruct>) -> Option<TypeStruct> {
103    current
104        .as_ref()
105        .and_then(|c| c.parent.as_ref().map(|p| p.deref().clone()))
106}
107
108fn get_mod(current: &Option<TypeStruct>) -> bool {
109    match *current {
110        Some(ref t) => {
111            if t.ty != Type::Mod {
112                println!("Mod/File comments cannot be put here!");
113                false
114            } else {
115                true
116            }
117        }
118        None => true,
119    }
120}
121
122enum BlockKind<'a> {
123    Comment((String, String, &'a str)),
124    DocComment((String, String, &'a str)),
125    Other(&'a str),
126}
127
128fn get_three_parts<'a>(
129    before: &'a str,
130    comment_sign: &str,
131    after: &'a str,
132    stop: &str,
133) -> (String, String, &'a str) {
134    if let Some(pos) = after.find(stop) {
135        let extra = if stop != "\n" { stop.len() } else { 0 };
136        (
137            before.to_owned(),
138            format!("{} {}", comment_sign, &after[..pos]),
139            &after[pos + extra..],
140        )
141    } else {
142        (
143            before.to_owned(),
144            format!("{} {}", comment_sign, &after),
145            &after[after.len() - 1..],
146        )
147    }
148}
149
150fn check_if_should_be_ignored(text: &str) -> bool {
151    let mut ignore_until_multi_end = false;
152    for line in text.split('\n').rev() {
153        let line = line.trim();
154        if !ignore_until_multi_end && !line.starts_with("//") {
155            if line.trim().ends_with("*/") {
156                ignore_until_multi_end = !line.starts_with("/**") && !line.starts_with("/*!");
157                continue;
158            }
159        } else if line.starts_with("/*") {
160            ignore_until_multi_end = false;
161            continue;
162        }
163        if !ignore_until_multi_end {
164            if line == IGNORE_NEXT_COMMENT_STOP {
165                return false;
166            } else if line == IGNORE_NEXT_COMMENT {
167                return true;
168            }
169            if !line.starts_with("///") && !line.starts_with("//!") {
170                break;
171            }
172        }
173    }
174    false
175}
176
177fn find_one_of<'a>(comments: &[&str], doc_comments: &[&str], text: &'a str) -> BlockKind<'a> {
178    let mut last_pos = 0;
179
180    let mut tmp_text = &text[last_pos..];
181    while let Some(pos) = tmp_text.find('/') {
182        tmp_text = &tmp_text[pos..];
183        last_pos += pos;
184        for com in doc_comments {
185            if let Some(after) = tmp_text.strip_prefix(com) {
186                if &com[1..2] == "*" {
187                    return BlockKind::DocComment(get_three_parts(
188                        &text[..last_pos],
189                        com,
190                        after,
191                        "*/",
192                    ));
193                } else {
194                    return BlockKind::DocComment(get_three_parts(
195                        &text[..last_pos],
196                        com,
197                        after,
198                        "\n",
199                    ));
200                }
201            }
202        }
203        for com in comments {
204            if tmp_text.starts_with(com) {
205                if &com[1..2] == "*" {
206                    return BlockKind::Comment(get_three_parts(
207                        &text[0..last_pos],
208                        "",
209                        tmp_text,
210                        "*/",
211                    ));
212                } else {
213                    return BlockKind::Comment(get_three_parts(
214                        &text[0..last_pos],
215                        "",
216                        tmp_text,
217                        "\n",
218                    ));
219                }
220            }
221        }
222        if !tmp_text.is_empty() {
223            tmp_text = &tmp_text[1..];
224            last_pos += 1;
225        } else {
226            break;
227        }
228    }
229    BlockKind::Other(text)
230}
231
232fn transform_code(code: &str) -> String {
233    code.replace('{', " { ")
234        .replace('}', " } ")
235        .replace(':', " : ")
236        .replace(" :  : ", "::")
237        .replace("*/", " */")
238        .replace('\n', " \n ")
239        .replace("!(", " !! (")
240        .replace("!  {", " !? {")
241        .replace(',', ", ")
242        .replace('(', " (")
243        .replace('"', " \"")
244}
245
246// Replaces lines that should be removed (doc comments mostly) with empty lines to keep a working
247// line match.
248fn clean_input(s: &str) -> String {
249    let mut ret = String::new();
250    let mut text = s;
251    loop {
252        text = match find_one_of(COMMENT_ID, DOC_COMMENT_ID, text) {
253            BlockKind::Other(content) => {
254                ret.push_str(&transform_code(content));
255                break;
256            }
257            BlockKind::DocComment((before, doc_comment, after))
258                if !check_if_should_be_ignored(&s[..s.len() - after.len()]) =>
259            {
260                ret.push_str(&transform_code(&before));
261                ret.push_str(&doc_comment);
262                after
263            }
264            BlockKind::DocComment((before, doc_comment, after)) => {
265                ret.push_str(&transform_code(&before));
266                for _ in 0..doc_comment.split('\n').count() - 1 {
267                    ret.push_str(" \n ");
268                }
269                after
270            }
271            BlockKind::Comment((before, comment, after)) => {
272                ret.push_str(&transform_code(&before));
273                for _ in 0..comment.split('\n').count() - 1 {
274                    ret.push_str(" \n ");
275                }
276                after
277            }
278        };
279    }
280    ret
281}
282
283fn clear_events(mut events: Vec<EventInfo>) -> Vec<EventInfo> {
284    let mut current: Option<TypeStruct> = None;
285    let mut waiting_type: Option<TypeStruct> = None;
286    let mut it = 0;
287
288    while it < events.len() {
289        if match events[it].event {
290            EventType::Type(ref t) => {
291                if t.ty != Type::Unknown {
292                    waiting_type = Some(t.clone());
293                    false
294                } else if let Some(ref parent) = current {
295                    !matches!(parent.ty, Type::Struct | Type::Enum)
296                } else {
297                    true
298                }
299            }
300            EventType::InScope => {
301                current = add_to_type_scope(&current, &waiting_type);
302                waiting_type = None;
303                false
304            }
305            EventType::OutScope => {
306                current = type_out_scope(&current);
307                waiting_type = None;
308                false
309            }
310            _ => false,
311        } {
312            events.remove(it);
313            continue;
314        }
315        it += 1;
316    }
317    events
318}
319
320fn remove_stop_chars(s: &str) -> String {
321    let mut s = s.to_owned();
322    for c in STOP_CHARACTERS {
323        if s.contains(*c) {
324            s = s.replace(&c.to_string(), "");
325        }
326    }
327    s
328}
329
330#[allow(clippy::useless_let_if_seq)]
331fn build_event_inner(
332    it: &mut usize,
333    line: &mut usize,
334    words: &[&str],
335    event_list: &mut Vec<EventInfo>,
336    comment_lines: &mut Vec<usize>,
337    b_content: &[String],
338    mut par_count: Option<isize>,
339) {
340    let mut waiting_for_macro = false;
341    while *it < words.len() {
342        match words[*it] {
343            c if c.starts_with('"') => move_to(words, it, "\"", line, "\""),
344            c if c.starts_with("b\"") => move_to(words, it, "\"", line, "b\""),
345            // c if c.starts_with("'") => move_to(&words, it, "'", line),
346            c if c.starts_with("r#") => {
347                let end = c.split("#\"").next().unwrap().replace(['"', 'r'], "");
348                move_to(words, it, &format!("\"{}", end), line, "r#");
349            }
350            "///" | "///\n" => {
351                comment_lines.push(*line);
352                event_list.push(EventInfo::new(
353                    *line,
354                    EventType::Comment(b_content[*line].to_owned()),
355                ));
356                move_to(words, it, "\n", line, "");
357            }
358            "//!" | "//!\n" => {
359                comment_lines.push(*line);
360                event_list.push(EventInfo::new(
361                    *line,
362                    EventType::FileComment(b_content[*line].to_owned()),
363                ));
364                if *line + 1 < b_content.len() && b_content[*line + 1].is_empty() {
365                    comment_lines.push(*line + 1);
366                }
367                move_to(words, it, "\n", line, "");
368            }
369            "/*!" | "/*!\n" => {
370                let mark = *line;
371                move_until(words, it, "*/", line);
372                for (pos, s) in b_content.iter().enumerate().take(*line).skip(mark) {
373                    comment_lines.push(pos);
374                    event_list.push(EventInfo::new(*line, EventType::FileComment(s.to_owned())));
375                }
376                comment_lines.push(*line);
377                let mut removed = false;
378                if *line + 1 < b_content.len() && b_content[*line + 1].is_empty() {
379                    comment_lines.push(*line + 1);
380                    removed = true;
381                }
382                event_list.push(EventInfo::new(
383                    mark,
384                    EventType::FileComment("*/".to_owned()),
385                ));
386                if removed {
387                    event_list.push(EventInfo::new(*line, EventType::FileComment("".to_owned())));
388                }
389            }
390            "/**" | "/**\n" => {
391                let mark = *line;
392                move_until(words, it, "*/", line);
393                for (pos, s) in b_content.iter().enumerate().take(*line).skip(mark) {
394                    comment_lines.push(pos);
395                    event_list.push(EventInfo::new(*line, EventType::Comment(s.to_owned())));
396                }
397                comment_lines.push(*line);
398                let mut removed = false;
399                if *line + 1 < b_content.len() && b_content[*line + 1].is_empty() {
400                    comment_lines.push(*line + 1);
401                    removed = true;
402                }
403                event_list.push(EventInfo::new(
404                    mark,
405                    EventType::FileComment("*/".to_owned()),
406                ));
407                if removed {
408                    event_list.push(EventInfo::new(*line, EventType::Comment("".to_owned())));
409                }
410            }
411            "use" | "mod" => {
412                let mut name = words[*it + 1].to_owned();
413                let ty = words[*it];
414
415                if *line + 1 < b_content.len() && b_content[*line].ends_with("::{") {
416                    move_to(words, it, "\n", line, "");
417                    name.push_str(b_content[*line + 1].trim());
418                }
419                event_list.push(EventInfo::new(
420                    *line,
421                    EventType::Type(TypeStruct::new(Type::from(ty), &name)),
422                ));
423            }
424            "struct" | "enum" | "const" | "static" | "type" | "trait" | "macro_rules!"
425            | "flags" => {
426                if *it + 1 >= words.len() {
427                    break;
428                }
429                event_list.push(EventInfo::new(
430                    *line,
431                    EventType::Type(TypeStruct::new(
432                        Type::from(words[*it]),
433                        get_before(words[*it + 1], STOP_CHARACTERS),
434                    )),
435                ));
436                waiting_for_macro = words[*it] == "macro_rules!";
437                *it += 1;
438            }
439            "fn" => {
440                if *it + 1 >= words.len() {
441                    break;
442                }
443                let name = get_before(words[*it + 1], STOP_CHARACTERS);
444                event_list.push(EventInfo::new(
445                    *line,
446                    EventType::Type(TypeStruct::new(Type::from(words[*it]), name)),
447                ));
448                *it += 1;
449                if !name.is_empty() {
450                    while let Some(&word) = words.get(*it) {
451                        if word.ends_with(';') {
452                            break;
453                        }
454                        if word.starts_with('{') {
455                            *it -= 1;
456                            break;
457                        }
458                        if word == "\n" {
459                            *line += 1;
460                        }
461                        *it += 1;
462                    }
463                }
464            }
465            "!!" => {
466                event_list.push(EventInfo::new(
467                    *line,
468                    EventType::Type(TypeStruct::new(
469                        Type::from("macro"),
470                        &format!("{}!{}", words[*it - 1], words[*it + 1]),
471                    )),
472                ));
473                *it += 1;
474            }
475            "!?" => {
476                event_list.push(EventInfo::new(
477                    *line,
478                    EventType::Type(TypeStruct::new(
479                        Type::from("macro"),
480                        &format!("{}!", words[*it - 1]),
481                    )),
482                ));
483            }
484            "impl" => {
485                event_list.push(EventInfo::new(
486                    *line,
487                    EventType::Type(TypeStruct::new(
488                        Type::Impl,
489                        &join(&get_impl(words, it, line), " "),
490                    )),
491                ));
492            }
493            c if c.starts_with("impl<") => {
494                event_list.push(EventInfo::new(
495                    *line,
496                    EventType::Type(TypeStruct::new(
497                        Type::Impl,
498                        &join(&get_impl(words, it, line), " "),
499                    )),
500                ));
501            }
502            x if x == "{" || x == "{\n" => {
503                if let Some(ref mut par_count) = par_count {
504                    *par_count += 1;
505                }
506                event_list.push(EventInfo::new(*line, EventType::InScope));
507                if waiting_for_macro {
508                    build_event_inner(
509                        it,
510                        line,
511                        words,
512                        &mut vec![],
513                        &mut vec![],
514                        b_content,
515                        Some(1),
516                    );
517                    waiting_for_macro = false;
518                }
519                if x == "}\n" {
520                    *line += 1;
521                }
522            }
523            x if x == "}" || x == "}\n" => {
524                if let Some(ref mut par_count) = par_count {
525                    *par_count -= 1;
526                    if *par_count <= 0 {
527                        return;
528                    }
529                }
530                event_list.push(EventInfo::new(*line, EventType::OutScope));
531                if x == "}\n" {
532                    *line += 1;
533                }
534            }
535            "\n" => {
536                *line += 1;
537            }
538            s if s.starts_with("#[") || s.starts_with("#![") => {
539                while *it < words.len() {
540                    *line += words[*it].split('\n').count() - 1;
541                    if words[*it].contains(']') {
542                        break;
543                    }
544                    *it += 1;
545                }
546                *line += s.chars().filter(|c| *c == '\n').count();
547            }
548            x => {
549                event_list.push(EventInfo::new(
550                    *line,
551                    EventType::Type(TypeStruct::new(
552                        Type::Unknown,
553                        &remove_stop_chars(words[*it]),
554                    )),
555                ));
556                *line += x.chars().filter(|c| *c == '\n').count();
557            }
558        }
559        *it += 1;
560    }
561}
562
563pub fn build_event_list(path: &Path) -> io::Result<ParseResult> {
564    let mut f = File::open(path)?;
565    let mut b_content = String::new();
566    f.read_to_string(&mut b_content).unwrap();
567    let content = clean_input(&b_content);
568    let b_content: Vec<String> = b_content.split('\n').map(|s| s.to_owned()).collect();
569    let words: Vec<&str> = content.split(' ').filter(|s| !s.is_empty()).collect();
570    let mut it = 0;
571    let mut line = 0;
572    let mut event_list = vec![];
573    let mut comment_lines = vec![];
574
575    build_event_inner(
576        &mut it,
577        &mut line,
578        &words,
579        &mut event_list,
580        &mut comment_lines,
581        &b_content,
582        None,
583    );
584    let clear = clear_events(event_list);
585    Ok(ParseResult {
586        event_list: clear,
587        comment_lines,
588        original_content: b_content,
589    })
590}
591
592fn unformat_comment(c: &str) -> String {
593    fn remove_prepend(s: &str) -> String {
594        let mut s = s.to_owned();
595
596        for to_remove in DOC_COMMENT_ID {
597            s = s.replace(to_remove, "");
598        }
599        /*for to_remove in COMMENT_ID {
600            s = s.replace(to_remove, "");
601        }*/
602        if s.starts_with(' ') {
603            (&s)[1..].to_owned()
604        } else {
605            s
606        }
607    }
608
609    c.replace("*/", "")
610        .split('\n')
611        .map(|s| remove_prepend(s.trim_start()))
612        .collect::<Vec<String>>()
613        .join("\n")
614}
615
616pub fn strip_comments<F: Write>(
617    work_dir: &Path,
618    path: &str,
619    out_file: &mut F,
620    ignore_macros: bool,
621) {
622    let full_path = work_dir.join(path);
623    match build_event_list(&full_path) {
624        Ok(parse_result) => {
625            if parse_result.comment_lines.is_empty() {
626                return;
627            }
628            writeln!(out_file, "{}", &write_file(path)).unwrap();
629            let mut current: Option<TypeStruct> = None;
630            let mut waiting_type: Option<TypeStruct> = None;
631            let mut it = 0;
632
633            while it < parse_result.event_list.len() {
634                match parse_result.event_list[it].event {
635                    EventType::Type(ref t) => {
636                        if t.ty != Type::Unknown {
637                            waiting_type = Some(t.clone());
638                        }
639                    }
640                    EventType::InScope => {
641                        current = add_to_type_scope(&current, &waiting_type);
642                        waiting_type = None;
643                    }
644                    EventType::OutScope => {
645                        current = type_out_scope(&current);
646                        waiting_type = None;
647                    }
648                    EventType::FileComment(ref c) => {
649                        // first, we need to find if it belongs to a mod
650                        if !get_mod(&current) {
651                            exit(1);
652                        }
653                        it += 1;
654                        let mut comments = format!(
655                            "{}\n",
656                            &write_file_comment(&unformat_comment(c), &current, ignore_macros)
657                        );
658                        while parse_result
659                            .event_list
660                            .get(it)
661                            .map(|x| match x.event {
662                                EventType::FileComment(ref c) => {
663                                    use std::fmt::Write;
664                                    writeln!(comments, "{}", unformat_comment(c)).unwrap();
665                                    true
666                                }
667                                _ => false,
668                            })
669                            .unwrap_or(false)
670                        {
671                            it += 1;
672                        }
673                        write!(out_file, "{}", comments).unwrap();
674                        continue;
675                    }
676                    EventType::Comment(ref c) => {
677                        let mut comments = format!("{}\n", c);
678
679                        it += 1;
680                        while it < parse_result.event_list.len()
681                            && match parse_result.event_list[it].event {
682                                EventType::Comment(ref c) => {
683                                    use std::fmt::Write;
684                                    writeln!(comments, "{}", c).unwrap();
685                                    true
686                                }
687                                EventType::Type(_) => false,
688                                _ => panic!("[{}:{}]: Doc comments cannot be written everywhere:\n---> {:#?}", full_path.display(), parse_result.event_list[it].line, parse_result.event_list),
689                            }
690                        {
691                            it += 1;
692                        }
693                        while parse_result
694                            .event_list
695                            .get(it)
696                            .map(|x| match x.event {
697                                EventType::Type(ref t) => match t.ty {
698                                    Type::Unknown => match current {
699                                        Some(ref cur) => {
700                                            if cur.ty == Type::Enum
701                                                || cur.ty == Type::Struct
702                                                || cur.ty == Type::Use
703                                            {
704                                                if t.name == "pub" {
705                                                    true
706                                                } else {
707                                                    let mut copy = t.clone();
708                                                    copy.ty = Type::Variant;
709                                                    let tmp =
710                                                        add_to_type_scope(&current, &Some(copy));
711                                                    write!(
712                                                        out_file,
713                                                        "{}",
714                                                        write_comment(
715                                                            &tmp.unwrap(),
716                                                            &unformat_comment(&comments),
717                                                            ignore_macros
718                                                        )
719                                                    )
720                                                    .unwrap();
721                                                    false
722                                                }
723                                            } else {
724                                                t.name == "pub"
725                                            }
726                                        }
727                                        None => t.name == "pub",
728                                    },
729                                    _ => {
730                                        let tmp = add_to_type_scope(&current, &Some(t.clone()));
731                                        write!(
732                                            out_file,
733                                            "{}",
734                                            write_comment(
735                                                &tmp.unwrap(),
736                                                &unformat_comment(&comments),
737                                                ignore_macros
738                                            )
739                                        )
740                                        .unwrap();
741                                        false
742                                    }
743                                },
744                                _ => panic!("An item was expected for this comment: {}", comments),
745                            })
746                            .unwrap_or(false)
747                        {
748                            it += 1;
749                        }
750                        continue;
751                    }
752                }
753                it += 1;
754            }
755            // we now remove doc comments from original file
756            remove_comments(
757                &full_path,
758                &parse_result.comment_lines,
759                parse_result.original_content,
760            );
761        }
762        Err(e) => {
763            println!("Unable to open \"{}\": {}", path, e);
764        }
765    }
766}
767
768fn remove_comments(path: &Path, to_remove: &[usize], mut o_content: Vec<String>) {
769    let mut decal = 0;
770    match File::create(path) {
771        Ok(mut f) => {
772            for line in to_remove.iter() {
773                if line - decal > 0
774                    && line - decal + 1 < o_content.len()
775                    && o_content[line - decal - 1].trim() == IGNORE_NEXT_COMMENT_STOP
776                {
777                    let l = o_content[line - decal + 1].trim();
778                    if DOC_COMMENT_ID.iter().any(|d| l.starts_with(d)) {
779                        o_content.remove(line - decal - 1);
780                        decal += 1;
781                    }
782                }
783                o_content.remove(line - decal);
784                decal += 1;
785            }
786            write!(f, "{}", o_content.join("\n")).unwrap();
787        }
788        Err(e) => {
789            println!("Cannot open '{}': {}", path.display(), e);
790        }
791    }
792}