tlparse 0.4.8

Parse TORCH_LOG logs produced by PyTorch torch.compile
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
pub static CSS: &str = r#"
table td { vertical-align: top; }

.stack-trie { white-space: nowrap; font-family: monospace; }
.stack-trie ul { padding-left: 1ch;  }
.stack-trie li { margin-left: 1ch; list-style-type: none;  }
.stack-trie .marker {
  cursor: pointer;
}
.stack-trie .marker.collapsed::before {
  content: "+ ";
}
.stack-trie .marker:not(.collapsed)::before {
  content: "- ";
}
.stack-trie a { text-decoration: none; }
.stack-trie a:hover { text-decoration: underline; }
.status-missing { background-color: purple; color: white; }
.status-error { background-color: red; color: white; }
.status-empty { background-color: white; color: black; }
.status-ok { background-color: green; color: white; }
.status-break { background-color: lime; color: black; }
summary::-webkit-details-marker { color: #00ACF3; font-size: 125%; margin-right: 2px; }
summary:focus { outline-style: none; }
article > details > summary { font-size: 28px; margin-top: 16px; }
details > p { margin-left: 24px; }
        .warning-box {
            background-color:rgb(249, 178, 178);
            border: 1px solidrgb(251, 251, 251);
            padding: 12px 16px;
            margin: 16px 0;
        }
details details summary { font-size: 16px; }
"#;

pub static JAVASCRIPT: &str = r#"
  function toggleList(toggleItem) {
    const listItem = toggleItem.parentNode;
    const nestedList = listItem.querySelector('ul');
    if (nestedList) {
      nestedList.style.display = nestedList.style.display === 'none' ? 'block' : 'none';

      // Toggle the collapse/expand indicator
      toggleItem.classList.toggle('collapsed');
    }
  }
"#;

pub static EXPORT_CSS: &str = r#"
table {
    width: 90%;
    border-collapse: collapse;
    margin: 20px 0;
    border: 2px solid #000; /* Add border around the table */
}
table, th, td {
    border: 1px solid #999; /* Add border to table cells */
    padding: 10px;
    text-align: left;
}
th {
    background-color: #d3d3d3;
    font-weight: bold;
}
tr:nth-child(odd) {
    background-color: #f2f2f2;
}
a {
    color: #0066cc;
    text-decoration: none;
}
a:hover {
    text-decoration: underline;
}
td:first-child {
    width: auto;
    white-space: nowrap;
    overflow: hidden;
    text-overflow: ellipsis;
}
"#;

pub static TEMPLATE_DYNAMO_GUARDS: &str = r#"
<html>
<body>
<h2>Guards</h2>
<ul>
{{ for guard in guards }}
    <li><code>{guard.code}</code></li>
{{ endfor }}
</ul>
{qps | format_unescaped}
</body>
</html>
"#;

pub static TEMPLATE_INDEX: &str = r#"
<html>
<head>
  <meta charset="UTF-8">
</head>
<style>
{css | format_unescaped}
</style>
<script>
{javascript | format_unescaped}
</script>
<body>
<div>
{custom_header_html | format_unescaped}
<h2>Stack trie</h2>
<p>
The <strong>stack trie</strong> is a way of getting a quick orientation on where all the
compilations in a model take place, esp., if you are compiling a codebase you are unfamiliar with.
It is a tree of stack frames, for all stacks that triggered PT2 compilation.  If only a single
stack is in the tree, you will simply see a plain list of frames (most recent call last).  With
multiple stacks, at every point where two stacks diverge from having a common prefix, we increase
the indentation of the list and have a separate sub-list per sub-tree.
</p>
<p>
Links to particular compilation are color coded by status:
<span class="status-ok">[Success]</span>,
<span class="status-break">[Success with restart (e.g., graph break)]</span>,
<span class="status-empty">[Empty graph]</span>,
<span class="status-error">[Error]</span>,
<span class="status-missing">[Metrics were missing]</span>
</p>
{stack_trie_html | format_unescaped}
</div>
<div>
{{ if num_breaks }}
<h2> Failures and Restarts </h2>
<p>
Various issues may cause Dynamo to restart its analysis or give up on compilation entirely, causing graph breaks and fallbacks to eager mode.
This run had <strong><a href="failures_and_restarts.html">{num_breaks} restart(s) and/or compilation failure(s)</a></strong>.
</p>
{{ endif }}
<h2>IR dumps</h2>
<p>
The <strong>IR dumps</strong> collected dumped intermediate products from various points of the PT2
compilation process.  The products are organized by compile id, and then sorted in chronological
order.
</p>
<p>
A <strong>compile id</strong> uniquely identifies are particular compilation inside a PT2
program.  It is traditionally written as <code>[x/y]</code>, where the <strong>frame id</strong> x
identifies the particular Python frame which we are compiling, and <strong>frame compile
id</strong> y identifies how many times we've recompiled this same frame.  For example,
<code>[0/0]</code> refers to the very first frame compiled by PT2; <code>[0/1]</code> refers to the
first recompilation of this frame, while <code>[1/0]</code> refers to a different frame, within
distinct code cache, which we are compiling next (perhaps because of a graph break).  Although
Dynamo treats distinct frames as completely unrelated, a frame compilation could overlap with another
frame; for example, if you graph break in an inlined function, Dynamo will typically try to compile
the nested frame again on an inner frame.  You can identify the hierarchical relationship between
frames by looking at the stack trie above.
</p>
<p>
In some situations, the compile id will have an extra signifier <code>[x/y_z]</code>, where z is the
<strong>attempt</strong> for this particular (re)compilation.  Certain conditions will cause Dynamo to
restart analysis, when Dynamo discovers that it needs to undo a decision it previously made.  The most
common cause of recompilation is a graph break in an inlined function call, which forces to restart
and avoid inlining the function in the first place.
</p>
<p>
When compiled autograd is enabled, the compile id will include a prefix signifier <code>[!a/x/y]</code>,
where a is the <strong>compiled autograd id</strong>. For instance, <code>[!0/-/-]</code> refers 
to the first graph captured by compiled autograd. It is then traced by torch.compile as <code>[!0/x/y_z]</code>.
</p>
<p>
Here is a high level description of PT2's compilation phases, and the intermediate products each
phase generates:
</p>
<ol>
<li><em>Optional:</em> If compiled autograd is enabled, and we are processing a backward call, compiled autograd will trace the autograd graph from the autograd engine, and produce an FX graph <code>compiled_autograd_graph</code> that will be Dynamo traced.  Otherwise, Dynamo will directly trace user's bytecode.</li>
<li>Dynamo symbolically evaluates the Python bytecode of a program, producing <code>dynamo_output_graph</code></li>
<li><em>Optional:</em> If <code>optimize_ddp</code> is enabled, the DDPOptimizer will split the Dynamo output graph to improve pipelining communications.  Each split subgraph is <code>optimize_ddp_split_child_submod</code>, and the high level graph that plumbs the graphs together is <code>optimize_ddp_split_graph</code>.  If there are multiple splits, each subsequent build product will be produced multiple times, one for each split.</li>
<li>AOTAutograd traces the (possibly split) Dynamo output graph, producing a <code>aot_joint_graph</code> if backwards is enabled.  It then partitions the graph into <code>aot_forward_graph</code> and <code>aot_backward_graph</code>.  If training is not needed, there may only be an <code>aot_inference_graph</code>.</li>
<li>Inductor will apply some post grad FX passes, producing <code>inductor_post_grad_graph</code></li>
<li>Inductor will perform code generation, producing the final <code>inductor_output_code</code> which will be executed at runtime.  This output is a valid Python program and can be directly run.</li>
</ol>

{{ if has_chromium_events }}
<h2> Chromium Events </h2>
PT2 generates <a href='chromium_events.json'>Chromium Trace Events</a> in JSON on specific events during compilation.
You can download and view them in a tool like <a href='https://ui.perfetto.dev/'>Perfetto</a>.
{{ endif  }}
<p>
<a href="collectives_parity.json">Collectives Parity report</a> comparing scheduler and Inductor output code collective operations.
</p>
<p>
Build products below:
</p>
<ul>
{{ for compile_directory in directory }}
    <li><a id="{compile_directory.0}">{compile_directory.0}</a>
    <ul>
        {{ for path_idx in compile_directory.1 }}
            <li><a href="{path_idx.url}">{path_idx.name}</a>{{ if path_idx.readable_url }} (<a href="{path_idx.readable_url}">readable_html</a>){{ endif }} {path_idx.suffix} ({path_idx.number})</li>
        {{ endfor }}
    </ul>
    </li>
{{ endfor }}
</ul>
</div>


{{ if has_inductor_provenance }}
<h2>Provenance Tracking</h2>
<div>
    <p>View detailed provenance tracking information for each rank and frame:</p>
    <ul>
    {{ for directory_name in directory_names }}
        <li><a href='provenance_tracking_{directory_name}.html'>provenance_tracking_{directory_name}</a></li>
    {{ endfor }}
    </ul>
</div>
{{ endif }}

{{ if has_unknown_stack_trie }}
<div>
<h2>Unknown stacks</h2>
<p>
  Sometimes, logs are made without a compile id.  This makes it difficult to correlate related
  logs.  This stack trie shows all places where log entries occurred without compile context; to
  fix, look an appropriate place in the stack where compile id should have been specified.
</p>
{unknown_stack_trie_html | format_unescaped}
</div>
{{ endif }}
{qps | format_unescaped}
</body>
</html>
"#;

pub static TEMPLATE_FAILURES_CSS: &str = r#"
table {
    width: 90%;
    border-collapse: collapse;
    margin: 20px 0;
}
table, th, td {
    border: 1px solid #999;
    padding: 10px;
    text-align: left;
}
th {
    background-color: #d3d3d3;
    font-weight: bold;
}
tr:nth-child(odd) {
    background-color: #f2f2f2;
}
a {
    color: #0066cc;
    text-decoration: none;
}
a:hover {
    text-decoration: underline;
}
"#;

pub static TEMPLATE_FAILURES_AND_RESTARTS: &str = r#"
<html>
<head>
    <style>
    {css}
    </style>
</head>
<body>
    <h1>Failures and Restarts</h1>
    <table>
    <tr> <th> Compile Id </th> <th> Failure Type </th> <th> Failure Description </th> <th> Failure Source (compilation failures only) </th> </tr>
    {{ for failure in failures }}
    <tr> <td> {failure.0 | format_unescaped} </td>{failure.1 | format_unescaped}</tr>
    {{ endfor }}
    {qps | format_unescaped}
</body>
</html>
"#;

pub static TEMPLATE_COMPILATION_METRICS: &str = r#"
<html>
<head>
    <style>
    {css}
    </style>
    <title>Compilation Metrics</title>
    <base href="..">
</head>
<body>
    <h1>Compilation Info for {compile_id}</h1>
    <p>{mini_stack_html | format_unescaped}</p>
    <h2>Output files:</h2>
    <ul>
        {{ for path_idx in output_files }}
            <li><a href="{compile_id_dir}/{path_idx.url}">{path_idx.name}</a> ({path_idx.number})</li>
        {{ endfor }}
    </ul>
    <h2>Stack</h2>
    {stack_html | format_unescaped}
    <h2>Compile Time(seconds)</h2>
    <p>Entire Frame <abbr title="Total time spent in convert_frame function">[?]</abbr>: {m.entire_frame_compile_time_s}</div>
    <p>Backend <abbr title="Time spent running the backend compiler">[?]</abbr>: {m.backend_compile_time_s}</div>
    {{ if m.inductor_compile_time_s }}
    <p>Inductor <abbr title="Total time spent running inductor">[?]</abbr>: {m.inductor_compile_time_s}</div>
    {{ endif }}
    {{ if m.code_gen_time_s }}
    <p>Code Gen Time: {m.code_gen_time_s}</p>
    {{ endif}}
    <div>Dynamo Time Before Restart <abbr title="Total time spent restarting dynamo analysis">[?]</abbr>: {m.dynamo_time_before_restart_s}</div>
    <h2>Restarts and Failures</h2>
    {{ if m.fail_type }}
    <p>Failure Exception: <pre>{m.fail_type}</pre></p>
    <p>Failure Reason: <pre>{m.fail_reason}</pre></p>
    {{ if m.fail_user_frame_filename }}
    <p>In file <pre>{m.fail_user_frame_filename}</pre>, line {m.fail_user_frame_lineno}</p>
    {{ endif}}
    {{ else }}
    <p> No failures! </p>
    {{ endif }}
    {{ if m.restart_reasons }}
    <p>Restart Reasons:<p>
    {{ for restart_reason in m.restart_reasons }}
     <li> <code> {restart_reason} </code> </li>
    {{ endfor }}
    {{ else }}
    <p> No restarts! </p>
    {{ endif }}
    <h2>Cache Metrics</h2>
    <p>Cache Size: {m.cache_size}</p>
    <p>Accumulated Cache Size: {m.accumulated_cache_size}</p>
    <h2>Graph Metrics</h2>
    <p>Guard Count: {m.guard_count}</p>
    <p>Shape Env Guards: {m.shape_env_guard_count}</p>
    <p>Graph Ops: {m.graph_op_count}</p>
    <p>Graph Nodes: {m.graph_node_count}</p>
    <p>Graph Inputs: {m.graph_input_count}</p>
    <h2> Custom Ops </h2>
    {{ if m.compliant_custom_ops }}
    <p> Compliant Custom Ops:</p>
    {{ for op in m.compliant_custom_ops }}
    <li> <code> {op} </code> </li>
    {{ endfor }}
    {{ endif }}
    {{ if m.non_compliant_ops }}
    <p> Non-Compliant Custom Ops:</p>
    {{ for op in m.non_compliant_ops }}
    <li> <code> {op} </code> </li>
    {{ endfor }}
    {{ endif }}
    <h2>Symbolic shape specializations</h2>
    <table>
    <tr>
        <th>Sym</th> <th>Source(s)</th> <th>Value</th> <th>User stack</th> <th>Framework stack</th>
    </tr>
    {{ for spec in symbolic_shape_specializations }}
    <tr>
        <td>{spec.symbol}</td>
        <td>{{ for source in spec.sources }}{source}<br>{{ endfor }}</td>
        <td>{spec.value}</td>
        <td>{spec.user_stack_html | format_unescaped}</td>
        <td>{spec.stack_html | format_unescaped}</td>
    </tr>
    {{ endfor }}
    </table>
    <h2>Guards added fast</h2>
    <table>
    <tr>
        <th>Expr</th> <th>User stack</th> <th>Framework stack</th>
    </tr>
    {{ for g in guards_added_fast }}
    <tr>
        <td>{g.expr}</td>
        <td>{g.user_stack_html | format_unescaped}</td>
        <td>{g.stack_html | format_unescaped}</td>
    </tr>
    {{ endfor }}
    </table>
    <h2>Created Symbols</h2>
    <table>
    <tr>
        <th>Symbol</th> <th>Value</th> <th>Range</th> <th>Source</th> <th>User stack</th> <th>Framework stack</th>
    </tr>
    {{ for sym in create_symbols }}
    <tr>
        <td>{sym.symbol}</td>
        <td>{sym.val}</td>
        <td>{sym.vr}</td>
        <td>{sym.source}</td>
        <td>{sym.user_stack_html | format_unescaped}</td>
        <td>{sym.stack_html | format_unescaped}</td>
    </tr>
    {{ endfor }}
    </table>
    <h2>Unbacked Symbols</h2>
    <table>
    <tr>
        <th>Symbol</th> <th>Range</th> <th>User stack</th> <th>Framework stack</th>
    </tr>
    {{ for sym in unbacked_symbols }}
    <tr>
        <td>{sym.symbol}</td>
        <td>{sym.vr}</td>
        <td>{sym.user_stack_html | format_unescaped}</td>
        <td>{sym.stack_html | format_unescaped}</td>
    </tr>
    {{ endfor }}
    </table>
    {{ if extra_metrics }}
    <h2>Other Metrics</h2>
    <table>
    <tr>
        <th>Key</th> <th>Value</th>
    </tr>
    {{ for em in extra_metrics }}
    <tr>
        <td><code>{em.key}</code></td>
        <td>{em.value_html | format_unescaped}</td>
    </tr>
    {{ endfor }}
    </table>
    {{ endif }}
    {qps | format_unescaped}
</body>
</html>
"#;

pub static TEMPLATE_AOT_AUTOGRAD_BACKWARD_COMPILATION_METRICS: &str = r#"
<html>
<head>
    <style>
    {css}
    </style>
    <title>AOT Autograd Backward Compilation Metrics</title>
</head>
<body>
    <h1>Compilation Info for {compile_id}</h1>
    <h2>Failures</h2>
    {{ if m.fail_type }}
    <p>Failure Exception: <pre>{m.fail_type}</pre></p>
    <p>Failure Reason: <pre>{m.fail_reason}</pre></p>
    {{ else }}
    <p> No failures! </p>
    {{ endif }}
    {qps | format_unescaped}
</body>
</html>
"#;

pub static TEMPLATE_BWD_COMPILATION_METRICS: &str = r#"
<html>
<head>
    <style>
    {css}
    </style>
    <title>Backward Compilation Metrics</title>
</head>
<body>
    <h1>Backward Compilation Info for {compile_id}</h1>
    <h2>Compile Time(seconds)</h2>
    {{ if m.inductor_compile_time_s }}
    <p>Inductor <abbr title="Total time spent running inductor">[?]</abbr>: {m.inductor_compile_time_s}</div>
    {{ endif }}
    {{ if m.code_gen_time_s }}
    <p>Code Gen Time: {m.code_gen_time_s}</p>
    {{ endif}}
    <h2>Failures</h2>
    {{ if m.fail_type }}
    <p>Failure Exception: <pre>{m.fail_type}</pre></p>
    <p>Failure Reason: <pre>{m.fail_reason}</pre></p>
    {{ else }}
    <p> No failures! </p>
    {{ endif }}
    {qps | format_unescaped}
</body>
</html>
"#;

// NB: Invariant for generated HTML: all links must show up in the initial HTML for this to be applied.
//     Links dynamically generated/added after document load (i.e. using JS) will not get this applied.
pub static TEMPLATE_QUERY_PARAM_SCRIPT: &str = r#"
    <script>
    document.addEventListener('DOMContentLoaded', function() {

        // Append the current URL's query parameters to all relative links on the page
        const queryParams = new URLSearchParams(window.location.search);
        if (queryParams.size === 0) return url; // No query params, return original URL

        function appendQueryParams(url) {
            const newURL = new URL((new Request(url)).url);  // new URL(<relative URL>) but it actually works
            const newSearchParams = new URLSearchParams(newURL.searchParams);
            console.log(newURL.searchParams);
            console.log(newSearchParams);

            // Append query parameters
            for (const [key, value] of queryParams) {
                newSearchParams.set(key, value);
            }

            newURL.search = newSearchParams;
            return newURL;
        }

        // Select all relative links on the page
        const relativeLinks = document.querySelectorAll('a[href]:not([href^="http://"]):not([href^="https://"]):not([href^="\#"])');

        // Append query parameters to each relative link
        relativeLinks.forEach((link) => {
            link.setAttribute("href", appendQueryParams(link.getAttribute("href")))
        });
    });
    </script>
"#;

pub static TEMPLATE_EXPORT_INDEX: &str = r#"
<html>
<head>
  <meta charset="UTF-8">
</head>
<style>
{css | format_unescaped}
</style>
<script>
{javascript | format_unescaped}
</script>
<body>
<div>
{custom_header_html | format_unescaped}
<h1>Draft Export Report</h1>
{{ if success }}
<p>
Congratulations! No issues are found during export, and it was able to soundly
produce a graph. You can now change back to torch.export.export()
</p>
{{ else }}
<b>{num_failures} issue(s) were found during export</b>, and it was not able to
soundly produce a graph. The following is a list of all the issues found and how
you may address them.
<table>
<tr> <th> Failure Type </th> <th> Reason </th> <th> Additional Info </th> </tr>
{{ for failure in failures }}
<tr> 
    <td>{failure.failure_type | format_unescaped}</td>
    <td>{failure.reason | format_unescaped}</td>
    <td>{failure.additional_info | format_unescaped}</td>
</tr>
{{ endfor }}
</table>
{{ endif }}

Here is the resulting exported program: <a href="{exported_program_url}">link</a>.
{qps | format_unescaped}
</body>
</html>
"#;

pub static TEMPLATE_SYMBOLIC_GUARD_INFO: &str = r#"
<html>
<head>
    <style>
    {css}
    </style>
    <title>Symbolic Shapes Information</title>
    <base href="..">
</head>
<body>
    <h1>More detailed information on <code>{expr}</code></h1>
    <h2>Stacktrace:</h2>
    {user_stack_html | format_unescaped}
    {framework_stack_html | format_unescaped}
    <h2>Locals Information:</h2>
    {locals_html | format_unescaped}
    <h2>Provenance information for this guard:</h2>
    <details open>
    <summary>Provenance Information</summary>
    {sym_expr_trie_html | format_unescaped}
    </details>
</body>
</html>
"#;

pub static PROVENANCE_CSS: &str = include_str!("provenance.css");
pub static PROVENANCE_JS: &str = include_str!("provenance.js");
pub static TEMPLATE_PROVENANCE_TRACKING: &str = include_str!("provenance.html");

pub static TEMPLATE_MULTI_RANK_INDEX: &str = r#"
<html>
<head>
  <meta charset="UTF-8">
</head>
<style>
{css | format_unescaped}
</style>
<body>
<div>
{custom_header_html | format_unescaped}
{{ if show_desync_warning }}
<div class="warning-box">
    {{ if compile_id_divergence }}
    <p><strong>Warning:</strong> Diverging Compilation IDs detected across ranks. This may lead to hangs or timeouts during distributed execution.</p>
    {{ endif }}
    {{ if diagnostics.divergence.cache }}
    <p><strong>Warning:</strong> Diverging Cache hit/miss patterns detected across ranks. Cache hit/miss pattern groups:</p>
    <ul>
        {{ for group in diagnostics.cache_groups }}
            <li>Ranks: {group.ranks}</li>
        {{ endfor }}
    </ul>
    {{ endif }}
    {{ if diagnostics.divergence.collective }}
    <p><strong>Warning:</strong> Diverging collective operation sequences detected across ranks. This can lead to hangs or timeouts during distributed execution.</p>
    <p>Collective operation sequence groups:</p>
    <ul>
        {{ for group in diagnostics.collective_groups }}
            <li>Ranks: {group.ranks}</li>
        {{ endfor }}
    </ul>
    {{ endif }}
</div>
{{ endif }}
<h2>Multi-Rank TLParse Report</h2>
<p>
This report contains TLParse links from <strong>{num_ranks}</strong> rank(s). Click on any rank below
to view its detailed compilation report.
</p>
{{ if has_chromium_events }}
<h3> Chromium Events </h3>
<p>
PT2 generates <a href='chromium_events.json'>Chromium Trace Events</a> in JSON on specific events during compilation.
You can download and view them in a tool like <a href='https://ui.perfetto.dev/'>Perfetto</a>.
This is a combined trace from all ranks.
</p>
{{ endif }}
{{ if diagnostics.artifacts.runtime_trace }}
<h3> Runtime Trace Visualization </h3>
<p>
<a href='chromium_trace_with_runtime.json'>Runtime Estimation Chromium Trace</a> shows estimated runtime per operation across all ranks and graphs.
Each rank appears as a separate process (PID) in the trace; within each process, each compiled graph is visualized as its own thread (TID). Operations are laid out sequentially by estimated duration on that thread.
You can download and view this trace in <a href='https://ui.perfetto.dev/'>Perfetto</a> to visualize performance differences across ranks.
</p>
{{ endif }}
<p>
Individual rank reports:
</p>
<ul>
{{ for rank in ranks }}
    <li><a href="rank_{rank}/index.html">Rank {rank}</a></li>
{{ endfor }}
</ul>
{{ if diagnostics.analysis }}
{{ if diagnostics.analysis.has_mismatched_graph_counts }}
<h3>Graph Runtime Analysis</h3>
<p>
<strong>Runtime analysis not available:</strong> Ranks have different numbers of compiled graphs, preventing cross-rank comparison. This mismatch may indicate compilation divergence between ranks.
</p>
{{ else }}
<h3>Graph Runtime Analysis</h3>
<p>
Runtime variance analysis across all <strong>{num_ranks}</strong> rank(s) for each compiled graph based on inductor runtime estimates. Shows the delta between the fastest and slowest ranks,
helping identify performance imbalances that could impact distributed training efficiency. Large deltas indicate potential
desync issues on specific ranks.
</p>
{{ for graph in diagnostics.analysis.graphs }}
<p><strong>Graph {graph.graph_id}:</strong> {graph.delta_ms} ms delta (Fastest: Rank {graph.rank_details.0.rank} - {graph.rank_details.0.runtime_ms} ms, Slowest: Rank {graph.rank_details.1.rank} - {graph.rank_details.1.runtime_ms} ms)</p>
{{ endfor }}
{{ endif }}
{{ endif }}
<h3>Graph Execution-Order Diagnostics</h3>
<p>Note: To enable this feature, wrap your code with torch._inductor.debug.record_and_log_graph_execution_order()</p>
{{ if diagnostics.exec_order }}
  {{ if diagnostics.exec_order.order_differs }}
  <p>Graph execution order differs across ranks.</p>
  {{ else }}
  <p>Graph execution order: consistent across ranks.</p>
  {{ endif }}
  {{ if diagnostics.exec_order.has_schedule_mismatch }}
  <p><strong>Warning:</strong> Schedule mismatch across ranks: {diagnostics.exec_order.ranks_schedule_str}</p>
  {{ else }}
  <p>Collectives Schedule: Consistent across ranks.</p>
  {{ endif }}
  {{ if diagnostics.exec_order.has_cache_mismatch }}
  <p><strong>Warning:</strong> Cache hit/miss mismatch across ranks: {diagnostics.exec_order.ranks_cache_str}</p>
  {{ else }}
  <p>Cache hit/miss sequence: Consistent across ranks.</p>
  {{ endif }}
{{ else }}
<p>Execution-order analysis unavailable.</p>
{{ endif }}
<h3>Tensor Metadata Analysis</h3>
<p>
Compares inductor tensor metadata (shapes, dtypes, strides) across ranks to detect compilation divergence.
</p>
{{ if diagnostics.divergence.tensor_meta }}
<p>
Ranks exhibit divergent inductor tensor metadata across graphs. Groups with identical signatures:
</p>
<ul>
    {{ for group in diagnostics.tensor_meta_groups }}
        <li>Ranks: {group.ranks}</li>
    {{ endfor }}
    </ul>
{{ else }}
<p>
All ranks have matching tensor meta signatures across graphs.
</p>
{{ endif }}
</div>
{qps | format_unescaped}
</body>
</html>
"#;