webspec-index 0.8.0

Query WHATWG/W3C/TC39 web specifications from the command line
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
// Algorithm rendering: convert <ol>/<li> to numbered markdown text
use htmd::HtmlToMarkdown;
use scraper::{ElementRef, Node};

/// Render an algorithm's `<ol>` element with markdown-style numbering.
/// Nested lists use simple numbering (1., 2., etc.) with indentation - markdown handles visual hierarchy.
/// Inline content is converted to markdown using the provided converter.
pub fn render_algorithm_ol(ol_element: &ElementRef, converter: &HtmlToMarkdown) -> String {
    let mut result = String::new();
    let mut step_number = 1;

    for child in ol_element.children() {
        if let Some(child_element) = ElementRef::wrap(child) {
            let tag_name = child_element.value().name();

            if tag_name == "li" {
                let step_text = render_li_recursive(&child_element, &[step_number], 0, converter);
                result.push_str(&step_text);
                step_number += 1;
            } else {
                // Handle other elements between list items (notes, examples, etc.)
                let elem_md = converter
                    .convert(&child_element.html())
                    .unwrap_or_default()
                    .trim()
                    .to_string();

                if !elem_md.is_empty() {
                    result.push_str("\n\n");
                    result.push_str(&elem_md);
                    result.push('\n');
                }
            }
        }
    }

    result.trim_end().to_string()
}

/// Recursively render a `<li>` element with simple numbering (markdown handles hierarchy via indentation)
///
/// Collects inline content (text, `<var>`, `<emu-xref>`, `<p>`, etc.) into HTML chunks
/// and converts each chunk to markdown in one pass. This prevents inline elements from
/// being broken across lines. Nested `<ol>`/`<ul>` are rendered separately with proper
/// indentation.
fn render_li_recursive(
    li: &ElementRef,
    numbering: &[usize],
    indent: usize,
    converter: &HtmlToMarkdown,
) -> String {
    let mut result = String::new();

    // Add indentation (4 spaces per level for markdown list continuation)
    for _ in 0..indent {
        result.push_str("    ");
    }

    // Add step number
    let step_num = numbering.last().unwrap_or(&1);
    result.push_str(&format!("{}. ", step_num));

    // Process children in document order. Accumulate inline/block content HTML into a
    // buffer, flush it when we hit a nested list, then render the list recursively.
    let mut content_html = String::new();
    let mut first_chunk = true;

    for child in li.children() {
        if let Some(child_element) = ElementRef::wrap(child) {
            let tag_name = child_element.value().name();

            if tag_name == "ol" {
                // Flush accumulated content before the nested list
                flush_content_html(
                    &mut result,
                    &mut content_html,
                    &mut first_chunk,
                    indent,
                    converter,
                );

                // Nested numbered list
                result.push_str("\n\n");
                let mut sub_step = 1;
                for sub_child in child_element.children() {
                    if let Some(sub_li) = ElementRef::wrap(sub_child) {
                        if sub_li.value().name() == "li" {
                            let mut new_numbering = numbering.to_vec();
                            new_numbering.push(sub_step);
                            result.push_str(&render_li_recursive(
                                &sub_li,
                                &new_numbering,
                                indent + 1,
                                converter,
                            ));
                            sub_step += 1;
                        }
                    }
                }
            } else if tag_name == "ul" {
                flush_content_html(
                    &mut result,
                    &mut content_html,
                    &mut first_chunk,
                    indent,
                    converter,
                );

                result.push_str("\n\n");
                result.push_str(&render_ul(&child_element, indent + 1, converter));
            } else {
                // Accumulate element HTML (p, var, emu-xref, div, span, etc.)
                content_html.push_str(&child_element.html());
            }
        } else if let Node::Text(text) = child.value() {
            content_html.push_str(text);
        }
    }

    // Flush any remaining content
    flush_content_html(
        &mut result,
        &mut content_html,
        &mut first_chunk,
        indent,
        converter,
    );

    // Ensure the step ends with a newline
    if !result.ends_with('\n') {
        result.push('\n');
    }

    result
}

/// Convert accumulated HTML content to markdown and append to the result.
/// First chunk goes inline with the step number; subsequent chunks get continuation indentation.
fn flush_content_html(
    result: &mut String,
    content_html: &mut String,
    first_chunk: &mut bool,
    indent: usize,
    converter: &HtmlToMarkdown,
) {
    if content_html.trim().is_empty() {
        content_html.clear();
        return;
    }

    let md = converter
        .convert(content_html)
        .unwrap_or_default()
        .trim()
        .to_string();
    content_html.clear();

    if md.is_empty() {
        return;
    }

    if *first_chunk {
        result.push_str(&md);
        *first_chunk = false;
    } else {
        // Continuation content after a nested list needs indentation
        result.push_str("\n\n");
        let indented = indent_lines(&md, indent + 1);
        result.push_str(&indented);
    }
}

/// Indent every line of a multi-line string with N levels of 4-space indentation
fn indent_lines(text: &str, indent: usize) -> String {
    let prefix = "    ".repeat(indent);
    text.lines()
        .map(|line| {
            if line.trim().is_empty() {
                line.to_string()
            } else {
                format!("{}{}", prefix, line)
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
}

/// Render a `<ul>` element with proper indentation
fn render_ul(ul: &ElementRef, indent: usize, converter: &HtmlToMarkdown) -> String {
    let mut result = String::new();

    for child in ul.children() {
        if let Some(li_element) = ElementRef::wrap(child) {
            if li_element.value().name() == "li" {
                // Add indentation (4 spaces per level for markdown consistency)
                for _ in 0..indent {
                    result.push_str("    ");
                }

                // Add bullet marker
                result.push_str("* ");

                // Extract and convert the li content to markdown
                let li_html = li_element.html();
                let li_content = converter
                    .convert(&li_html)
                    .unwrap_or_default()
                    .trim()
                    .to_string();

                // Remove the outer <li> tags that the converter might leave
                let li_content = li_content.strip_prefix("*").unwrap_or(&li_content).trim();

                result.push_str(li_content);
                result.push('\n');
            }
        }
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::parse::markdown;
    use scraper::{Html, Selector};

    fn test_converter() -> HtmlToMarkdown {
        markdown::build_converter("https://test.example.com")
    }

    #[test]
    fn test_simple_algorithm() {
        let html = r#"
            <ol>
                <li><p>First step</p></li>
                <li><p>Second step</p></li>
                <li><p>Third step</p></li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());
        assert!(result.contains("1. First step"));
        assert!(result.contains("2. Second step"));
        assert!(result.contains("3. Third step"));
    }

    #[test]
    fn test_nested_algorithm() {
        let html = r#"
            <ol>
                <li><p>Step one</p></li>
                <li><p>Step two</p>
                    <ol>
                        <li><p>Sub-step 2.1</p></li>
                        <li><p>Sub-step 2.2</p></li>
                    </ol>
                </li>
                <li><p>Step three</p></li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());
        assert!(result.contains("1. Step one"));
        assert!(result.contains("2. Step two"));
        assert!(result.contains("    1. Sub-step 2.1"));
        assert!(result.contains("    2. Sub-step 2.2"));
        assert!(result.contains("3. Step three"));
    }

    #[test]
    fn test_deeply_nested_algorithm() {
        let html = r#"
            <ol>
                <li><p>Level 1</p>
                    <ol>
                        <li><p>Level 1.1</p>
                            <ol>
                                <li><p>Level 1.1.1</p></li>
                            </ol>
                        </li>
                    </ol>
                </li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());
        assert!(result.contains("1. Level 1"));
        assert!(result.contains("    1. Level 1.1"));
        assert!(result.contains("        1. Level 1.1.1"));
    }

    #[test]
    fn test_algorithm_with_var_and_code() {
        let html = r#"
            <ol>
                <li><p>Let <var>foo</var> be a <code>Document</code>.</p></li>
                <li><p>Return <var>foo</var>.</p></li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());
        // <var> now renders as *italic* in markdown, <code> as `backtick`
        assert!(result.contains("1. Let *foo* be a `Document`."));
        assert!(result.contains("2. Return *foo*."));
    }

    #[test]
    fn test_algorithm_from_fixture() {
        let html = include_str!("../../tests/fixtures/algorithms/bikeshed_algorithm.html");
        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("div.algorithm ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());

        // Should have numbered steps
        assert!(result.contains("1. "));
        assert!(result.contains("2. "));

        // Check that it's not empty
        assert!(!result.trim().is_empty());
    }

    #[test]
    fn test_indentation() {
        let html = r#"
            <ol>
                <li><p>Top</p>
                    <ol>
                        <li><p>Nested</p></li>
                    </ol>
                </li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());

        // Top level should have no indentation
        assert!(result.contains("1. Top"));

        // Nested numbered steps should have 4 spaces indentation per level
        assert!(result.contains("    1. Nested"));
        let lines: Vec<&str> = result.lines().collect();
        let nested_line = lines.iter().find(|l| l.contains("Nested")).unwrap();
        assert!(nested_line.starts_with("    1."));
    }

    #[test]
    fn test_note_between_steps() {
        // Notes/examples/warnings between steps should be formatted as blockquotes
        let html = r#"
            <ol>
                <li><p>First step</p></li>
                <li><p>Second step</p>
                    <div class="note">
                        <p>This is a note between steps.</p>
                    </div>
                </li>
                <li><p>Third step</p></li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());

        // All three steps should be present
        assert!(result.contains("1. First step"));
        assert!(result.contains("2. Second step"));
        assert!(result.contains("3. Third step"));

        // Note should be formatted as blockquote with prefix
        assert!(
            result.contains("> **Note:** This is a note between steps."),
            "Note should be a blockquote: {}",
            result
        );

        // Third step should start on a new line after the blockquote
        let lines: Vec<&str> = result.lines().collect();
        let step3_index = lines
            .iter()
            .position(|l| l.contains("3. Third step"))
            .unwrap();
        let note_index = lines
            .iter()
            .position(|l| l.contains("> **Note:**"))
            .unwrap();

        // Step 3 should come after the note
        assert!(
            step3_index > note_index,
            "Step 3 should appear after the note"
        );
    }

    #[test]
    fn test_nested_bullet_list() {
        // Test that nested <ul> lists are properly indented and in document order
        let html = r#"
            <ol>
                <li><p>If all of the following are true:</p>
                    <ul>
                        <li><var>x</var> is null;</li>
                        <li><var>y</var> is null;</li>
                    </ul>
                    <p>then return.</p>
                </li>
                <li><p>Next step</p></li>
            </ol>
        "#;

        let fragment = Html::parse_fragment(html);
        let selector = Selector::parse("ol").unwrap();
        let ol = fragment.select(&selector).next().unwrap();

        let result = render_algorithm_ol(&ol, &test_converter());

        // Step 1 should contain the intro text
        assert!(result.contains("1. If all of the following are true:"));

        // Bullet items should be indented (4 spaces) and appear BEFORE "then return"
        assert!(result.contains("    * *x* is null;"));
        assert!(result.contains("    * *y* is null;"));

        // The "then return" should come AFTER the bullets
        let x_pos = result.find("*x* is null").expect("x bullet should exist");
        let y_pos = result.find("*y* is null").expect("y bullet should exist");
        let then_pos = result
            .find("then return")
            .expect("then return should exist");

        assert!(x_pos < then_pos, "bullets should come before 'then return'");
        assert!(y_pos < then_pos, "bullets should come before 'then return'");

        // The "then return" should be indented (continuation content)
        assert!(
            result.contains("    then return"),
            "continuation content should be indented"
        );

        // Step 2 should be present
        assert!(result.contains("2. Next step"));
    }
}