1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
use anyhow::Context;
use ir::Ir;
use location::Visitor;
use syn::visit::Visit;

mod location;

mod emit;
mod ir;
mod lex;

const JUNK: [&str; 81] = [
    "",
    ";",
    "3;",
    "();",
    "{;};",
    "({});",
    "{();};",
    "*&*&();",
    "((),());",
    "let _=();",
    "if true{};",
    "let _=||();",
    "loop{break};",
    "loop{break;};",
    "if let _=(){};",
    "*&*&();((),());",
    "((),());((),());",
    "((),());let _=();",
    "let _=();let _=();",
    "let _=();if true{};",
    "if true{};if true{};",
    "if true{};let _=||();",
    "let _=||();let _=||();",
    "let _=||();loop{break};",
    "loop{break};loop{break};",
    "loop{break};loop{break;};",
    "loop{break;};loop{break;};",
    "loop{break;};if let _=(){};",
    "if let _=(){};if let _=(){};",
    "if let _=(){};*&*&();((),());",
    "*&*&();((),());*&*&();((),());",
    "*&*&();((),());((),());((),());",
    "((),());((),());((),());((),());",
    "((),());((),());((),());let _=();",
    "((),());let _=();((),());let _=();",
    "((),());let _=();let _=();let _=();",
    "let _=();let _=();let _=();let _=();",
    "let _=();let _=();let _=();if true{};",
    "let _=();if true{};let _=();if true{};",
    "let _=();if true{};if true{};if true{};",
    "if true{};if true{};if true{};if true{};",
    "if true{};if true{};if true{};let _=||();",
    "if true{};let _=||();if true{};let _=||();",
    "if true{};let _=||();let _=||();let _=||();",
    "let _=||();let _=||();let _=||();let _=||();",
    "let _=||();let _=||();let _=||();loop{break};",
    "let _=||();loop{break};let _=||();loop{break};",
    "let _=||();loop{break};loop{break};loop{break};",
    "loop{break};loop{break};loop{break};loop{break};",
    "loop{break};loop{break};loop{break};loop{break;};",
    "loop{break};loop{break;};loop{break};loop{break;};",
    "loop{break};loop{break;};loop{break;};loop{break;};",
    "loop{break;};loop{break;};loop{break;};loop{break;};",
    "loop{break;};loop{break;};loop{break;};if let _=(){};",
    "loop{break;};if let _=(){};loop{break;};if let _=(){};",
    "loop{break;};if let _=(){};if let _=(){};if let _=(){};",
    "if let _=(){};if let _=(){};if let _=(){};if let _=(){};",
    "if let _=(){};if let _=(){};if let _=(){};*&*&();((),());",
    "if let _=(){};*&*&();((),());if let _=(){};*&*&();((),());",
    "if let _=(){};*&*&();((),());*&*&();((),());*&*&();((),());",
    "*&*&();((),());*&*&();((),());*&*&();((),());*&*&();((),());",
    "*&*&();((),());*&*&();((),());*&*&();((),());((),());((),());",
    "*&*&();((),());((),());((),());*&*&();((),());((),());((),());",
    "*&*&();((),());((),());((),());((),());((),());((),());((),());",
    "((),());((),());((),());((),());((),());((),());((),());((),());",
    "((),());((),());((),());((),());((),());((),());((),());let _=();",
    "((),());((),());((),());let _=();((),());((),());((),());let _=();",
    "((),());((),());((),());let _=();((),());let _=();((),());let _=();",
    "((),());let _=();((),());let _=();((),());let _=();((),());let _=();",
    "((),());let _=();((),());let _=();((),());let _=();let _=();let _=();",
    "((),());let _=();let _=();let _=();((),());let _=();let _=();let _=();",
    "((),());let _=();let _=();let _=();let _=();let _=();let _=();let _=();",
    "let _=();let _=();let _=();let _=();let _=();let _=();let _=();let _=();",
    "let _=();let _=();let _=();let _=();let _=();let _=();let _=();if true{};",
    "let _=();let _=();let _=();if true{};let _=();let _=();let _=();if true{};",
    "let _=();let _=();let _=();if true{};let _=();if true{};let _=();if true{};",
    "let _=();if true{};let _=();if true{};let _=();if true{};let _=();if true{};",
    "let _=();if true{};let _=();if true{};let _=();if true{};if true{};if true{};",
    "let _=();if true{};if true{};if true{};let _=();if true{};if true{};if true{};",
    "let _=();if true{};if true{};if true{};if true{};if true{};if true{};if true{};",
    "if true{};if true{};if true{};if true{};if true{};if true{};if true{};if true{};",
];

/// Unformat a soruce file into lines of length `width`.
///
/// ## Details
/// This process strips comments, inserts no-op statements, and wraps expressions
/// in extra parentheses to achieve the desired line length.
///
/// ## Errors
/// Returns an error if the source file is not valid Rust.
///
/// This function returns a spurious error if source has documentation comments
/// not at the start of a line, for example
/// ```
/// let x = blah; /// bad!
/// ```
/// This is because we use syn under the hood, which does not understand doc comments.
pub fn unformat(src: &str, width: usize) -> anyhow::Result<Vec<u8>> {
    let src = remove_doc_comments(src);

    let tokens = lex::lex_file(&src).context("source was not valid")?;

    let mut stmts = Visitor::new();
    stmts.visit_file(&syn::parse_file(&src).unwrap());

    let ir = Ir::new(tokens.into_iter());
    let ir = ir.populate_events(stmts.events());

    let mut unformatted = vec![];
    crate::emit::block(&mut unformatted, &ir, width);

    Ok(unformatted)
}

/// Remove doc comments heuristically. Necessary because syn doesn't understand them
/// as comments, mand treats them as expressions.
fn remove_doc_comments(src: &str) -> String {
    let mut out = vec![];
    for line in src.lines() {
        if !line.trim_start().starts_with("///") {
            out.push(line)
        }
    }
    out.join("\n")
}

trait SafeLen {
    /// Returns the displayed length of a string.
    fn safe_len(&self) -> usize;
}

impl SafeLen for &str {
    fn safe_len(&self) -> usize {
        self.chars().count()
    }
}