torch-sys 0.24.0

Low-level FFI bindings for the PyTorch C++ api (libtorch).
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
// THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT BY HAND!
#include "torch_api.h"

extern "C" {
void atg___and__(tensor *, tensor self, scalar other);
void atg___and__tensor_(tensor *, tensor self, tensor other);
void atg___iand__(tensor *, tensor self, scalar other);
void atg___iand__tensor_(tensor *, tensor self, tensor other);
void atg___ilshift__(tensor *, tensor self, scalar other);
void atg___ilshift__tensor_(tensor *, tensor self, tensor other);
void atg___ior__(tensor *, tensor self, scalar other);
void atg___ior__tensor_(tensor *, tensor self, tensor other);
void atg___irshift__(tensor *, tensor self, scalar other);
void atg___irshift__tensor_(tensor *, tensor self, tensor other);
void atg___ixor__(tensor *, tensor self, scalar other);
void atg___ixor__tensor_(tensor *, tensor self, tensor other);
void atg___lshift__(tensor *, tensor self, scalar other);
void atg___lshift__scalar_out_(tensor *, tensor out, tensor self, scalar other);
void atg___lshift__tensor_(tensor *, tensor self, tensor other);
void atg___lshift__tensor_out_(tensor *, tensor out, tensor self, tensor other);
void atg___or__(tensor *, tensor self, scalar other);
void atg___or__tensor_(tensor *, tensor self, tensor other);
void atg___rshift__(tensor *, tensor self, scalar other);
void atg___rshift__scalar_out_(tensor *, tensor out, tensor self, scalar other);
void atg___rshift__tensor_(tensor *, tensor self, tensor other);
void atg___rshift__tensor_out_(tensor *, tensor out, tensor self, tensor other);
void atg___xor__(tensor *, tensor self, scalar other);
void atg___xor__tensor_(tensor *, tensor self, tensor other);
void atg__adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool2d_backward(tensor *, tensor grad_output, tensor self);
void atg__adaptive_avg_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg__adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool3d_backward(tensor *, tensor grad_output, tensor self);
void atg__adaptive_avg_pool3d_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg__adaptive_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg__add_batch_dim(tensor *, tensor self, int64_t batch_dim, int64_t level);
void atg__add_relu(tensor *, tensor self, tensor other);
void atg__add_relu_(tensor *, tensor self, tensor other);
void atg__add_relu_out(tensor *, tensor out, tensor self, tensor other);
void atg__add_relu_scalar(tensor *, tensor self, scalar other);
void atg__add_relu_scalar_(tensor *, tensor self, scalar other);
void atg__add_relu_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg__addmm_activation(tensor *, tensor self, tensor mat1, tensor mat2, int use_gelu);
void atg__addmm_activation_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2, int use_gelu);
void atg__aminmax(tensor *, tensor self);
void atg__aminmax_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg__aminmax_dim_out(tensor *, tensor out0, tensor out1, tensor self, int64_t dim, int keepdim);
void atg__aminmax_out(tensor *, tensor out0, tensor out1, tensor self);
void atg__amp_update_scale(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__amp_update_scale_(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__amp_update_scale_out(tensor *, tensor out, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__assert_scalar(scalar self_scalar, char* assert_msg_ptr, int assert_msg_len);
void atg__assert_tensor_metadata(tensor a, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int dtype, int device, int8_t layout);
void atg__autocast_to_full_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled);
void atg__autocast_to_reduced_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled, int cuda_dtype, int cpu_dtype);
void atg__batch_norm_no_update(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_no_update_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_with_update(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_with_update_functional(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_with_update_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor reserve, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__cast_byte(tensor *, tensor self, int non_blocking);
void atg__cast_char(tensor *, tensor self, int non_blocking);
void atg__cast_double(tensor *, tensor self, int non_blocking);
void atg__cast_float(tensor *, tensor self, int non_blocking);
void atg__cast_half(tensor *, tensor self, int non_blocking);
void atg__cast_int(tensor *, tensor self, int non_blocking);
void atg__cast_long(tensor *, tensor self, int non_blocking);
void atg__cast_short(tensor *, tensor self, int non_blocking);
void atg__cdist_backward(tensor *, tensor grad, tensor x1, tensor x2, double p, tensor cdist);
void atg__cdist_backward_out(tensor *, tensor out, tensor grad, tensor x1, tensor x2, double p, tensor cdist);
void atg__cholesky_solve_helper(tensor *, tensor self, tensor A, int upper);
void atg__cholesky_solve_helper_out(tensor *, tensor out, tensor self, tensor A, int upper);
void atg__chunk_cat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks);
void atg__chunk_cat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks);
void atg__coalesce(tensor *, tensor self);
void atg__coalesce_out(tensor *, tensor out, tensor self);
void atg__coalesced(tensor *, tensor self, int coalesced);
void atg__coalesced_(tensor *, tensor self, int coalesced);
void atg__coalesced_out(tensor *, tensor out, tensor self, int coalesced);
void atg__compute_linear_combination(tensor *, tensor input, tensor coefficients);
void atg__compute_linear_combination_out(tensor *, tensor out, tensor input, tensor coefficients);
void atg__conj(tensor *, tensor self);
void atg__conj_copy(tensor *, tensor self);
void atg__conj_copy_out(tensor *, tensor out, tensor self);
void atg__conj_physical(tensor *, tensor self);
void atg__conj_physical_out(tensor *, tensor out, tensor self);
void atg__conv_depthwise2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__conv_depthwise2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__convert_indices_from_coo_to_csr(tensor *, tensor self, int64_t size, int out_int32);
void atg__convert_indices_from_coo_to_csr_out(tensor *, tensor out, tensor self, int64_t size, int out_int32);
void atg__convert_indices_from_csr_to_coo(tensor *, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
void atg__convert_indices_from_csr_to_coo_out(tensor *, tensor out, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
void atg__convert_weight_to_int4pack(tensor *, tensor self, int64_t innerKTiles);
void atg__convert_weight_to_int4pack_for_cpu(tensor *, tensor self, int64_t innerKTiles);
void atg__convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
void atg__convolution_deprecated(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled);
void atg__convolution_mode(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__convolution_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
void atg__copy_from(tensor *, tensor self, tensor dst, int non_blocking);
void atg__copy_from_and_resize(tensor *, tensor self, tensor dst);
void atg__copy_from_and_resize_out(tensor *, tensor out, tensor self, tensor dst);
void atg__copy_from_out(tensor *, tensor out, tensor self, tensor dst, int non_blocking);
void atg__cslt_compress(tensor *, tensor input);
void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result, int64_t alg_id, int64_t split_k, int64_t split_k_mode);
int64_t atg__cslt_sparse_mm_search(tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result);
void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
void atg__ctc_loss_backward(tensor *, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
void atg__ctc_loss_backward_out(tensor *, tensor out, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
void atg__ctc_loss_backward_tensor(tensor *, tensor grad, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
void atg__ctc_loss_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
void atg__ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int zero_infinity);
void atg__ctc_loss_tensor_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int zero_infinity);
void atg__cudnn_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor philox_seed, tensor philox_offset, tensor attn_bias, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
void atg__cudnn_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg__cudnn_ctc_loss_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg__cudnn_ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int deterministic, int zero_infinity);
void atg__cudnn_init_dropout_state(tensor *, double dropout, int train, int64_t dropout_seed, int options_kind, int options_device);
void atg__cudnn_init_dropout_state_out(tensor *, tensor out, double dropout, int train, int64_t dropout_seed);
void atg__cudnn_rnn(tensor *, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor weight_buf, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg__cudnn_rnn_flatten_weight(tensor *, tensor *weight_arr_data, int weight_arr_len, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, int bidirectional);
void atg__cudnn_rnn_flatten_weight_out(tensor *, tensor out, tensor *weight_arr_data, int weight_arr_len, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, int bidirectional);
void atg__cudnn_rnn_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor weight_buf, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
int64_t atg__debug_has_internal_overlap(tensor self);
void atg__dim_arange(tensor *, tensor like, int64_t dim);
int64_t atg__dimi(tensor self);
int64_t atg__dimv(tensor self);
void atg__dirichlet_grad(tensor *, tensor x, tensor alpha, tensor total);
void atg__dirichlet_grad_out(tensor *, tensor out, tensor x, tensor alpha, tensor total);
void atg__dyn_quant_matmul_4bit(tensor *, tensor inp, tensor packed_weights, int64_t block_size, int64_t in_features, int64_t out_features);
void atg__dyn_quant_pack_4bit_weight(tensor *, tensor weights, tensor scales_zeros, tensor bias, int64_t block_size, int64_t in_features, int64_t out_features);
void atg__efficient_attention_backward(tensor *, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null, int64_t window_size_v, uint8_t window_size_null, int shared_storage_dqdkdv);
void atg__efficientzerotensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__efficientzerotensor_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg__embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_dense_backward(tensor *, tensor grad, tensor indices, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_dense_backward_out(tensor *, tensor out, tensor grad, tensor indices, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_forward_only(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_forward_only_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_per_sample_weights_backward(tensor *, tensor grad, tensor weight, tensor indices, tensor offsets, tensor offset2bag, int64_t mode, int64_t padding_idx);
void atg__embedding_bag_per_sample_weights_backward_out(tensor *, tensor out, tensor grad, tensor weight, tensor indices, tensor offsets, tensor offset2bag, int64_t mode, int64_t padding_idx);
void atg__embedding_bag_sparse_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
void atg__empty_affine_quantized(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device, double scale, int64_t zero_point);
void atg__empty_affine_quantized_out(tensor *, tensor out, int64_t *size_data, int size_len, double scale, int64_t zero_point);
void atg__empty_per_channel_affine_quantized(tensor *, int64_t *size_data, int size_len, tensor scales, tensor zero_points, int64_t axis, int options_kind, int options_device);
void atg__empty_per_channel_affine_quantized_out(tensor *, tensor out, int64_t *size_data, int size_len, tensor scales, tensor zero_points, int64_t axis);
void atg__euclidean_dist(tensor *, tensor x1, tensor x2);
void atg__euclidean_dist_out(tensor *, tensor out, tensor x1, tensor x2);
void atg__fake_quantize_learnable_per_channel_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_channel_affine_backward(tensor *, tensor grad, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_channel_affine_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_tensor_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_tensor_affine_backward(tensor *, tensor grad, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_tensor_affine_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_per_tensor_affine_cachemask_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, tensor fake_quant_enabled, int64_t quant_min, int64_t quant_max);
void atg__fake_quantize_per_tensor_affine_cachemask_tensor_qparams_out(tensor *, tensor out0, tensor out1, tensor self, tensor scale, tensor zero_point, tensor fake_quant_enabled, int64_t quant_min, int64_t quant_max);
void atg__fft_c2c(tensor *, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int forward);
void atg__fft_c2c_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int forward);
void atg__fft_c2r(tensor *, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int64_t last_dim_size);
void atg__fft_c2r_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int64_t last_dim_size);
void atg__fft_r2c(tensor *, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int onesided);
void atg__fft_r2c_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int onesided);
void atg__fill_mem_eff_dropout_mask_(tensor *, tensor self, double dropout_p, int64_t seed, int64_t offset);
void atg__flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor rng_state, tensor unused, double scale_v, uint8_t scale_null, int64_t window_size_left_v, uint8_t window_size_left_null, int64_t window_size_right_v, uint8_t window_size_right_null);
void atg__foobar(tensor *, tensor self, int arg1, int arg2, int arg3);
void atg__foobar_out(tensor *, tensor out, tensor self, int arg1, int arg2, int arg3);
void atg__functional_assert_async(tensor *, tensor self, char* assert_msg_ptr, int assert_msg_len, tensor dep_token);
void atg__functional_assert_scalar(tensor *, scalar self_scalar, char* assert_msg_ptr, int assert_msg_len, tensor dep_token);
void atg__functional_sym_constrain_range(tensor *, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token);
void atg__functional_sym_constrain_range_for_size(tensor *, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token);
void atg__fused_dropout(tensor *, tensor self, double p);
void atg__fused_dropout_out(tensor *, tensor out0, tensor out1, tensor self, double p);
void atg__fused_moving_avg_obs_fq_helper(tensor *, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg__fused_moving_avg_obs_fq_helper_functional(tensor *, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg__fused_moving_avg_obs_fq_helper_out(tensor *, tensor out0, tensor out1, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg__fused_rms_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, double eps_v, uint8_t eps_null);
int64_t atg__fused_sdp_choice(tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, double scale_v, uint8_t scale_null, int enable_gqa);
void atg__fw_primal(tensor *, tensor self, int64_t level);
void atg__fw_primal_copy(tensor *, tensor self, int64_t level);
void atg__fw_primal_copy_out(tensor *, tensor out, tensor self, int64_t level);
void atg__gather_sparse_backward(tensor *, tensor self, int64_t dim, tensor index, tensor grad);
void atg__grid_sampler_2d_cpu_fallback(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grid_sampler_2d_cpu_fallback_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grid_sampler_2d_cpu_fallback_out(tensor *, tensor out, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grouped_mm(tensor *, tensor self, tensor mat2, tensor offs, tensor bias, int out_dtype);
int atg__has_compatible_shallow_copy_type(tensor self, tensor from);
int atg__has_same_storage_numel(tensor self, tensor other);
tensor *atg__histogramdd_bin_edges(tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_bin_edges_out(tensor *out_data, int out_len, tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_from_bin_cts(tensor *, tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_from_bin_cts_out(tensor *, tensor out, tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_from_bin_tensors(tensor *, tensor self, tensor *bins_data, int bins_len, tensor weight, int density);
void atg__histogramdd_from_bin_tensors_out(tensor *, tensor out, tensor self, tensor *bins_data, int bins_len, tensor weight, int density);
void atg__index_put_impl(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__index_put_impl_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__index_put_impl_out(tensor *, tensor out, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__indices(tensor *, tensor self);
void atg__indices_copy(tensor *, tensor self);
void atg__indices_copy_out(tensor *, tensor out, tensor self);
void atg__int_mm(tensor *, tensor self, tensor mat2);
void atg__int_mm_out(tensor *, tensor out, tensor self, tensor mat2);
void atg__is_all_true(tensor *, tensor self);
void atg__is_any_true(tensor *, tensor self);
int atg__is_zerotensor(tensor self);
void atg__lazy_clone(tensor *, tensor self);
void atg__linalg_check_errors(tensor info, char* api_name_ptr, int api_name_len, int is_matrix);
void atg__linalg_det(tensor *, tensor A);
void atg__linalg_det_result(tensor *, tensor result, tensor LU, tensor pivots, tensor A);
void atg__linalg_eigh(tensor *, tensor A, char* UPLO_ptr, int UPLO_len, int compute_v);
void atg__linalg_eigh_eigenvalues(tensor *, tensor eigenvalues, tensor eigenvectors, tensor A, char* UPLO_ptr, int UPLO_len, int compute_v);
void atg__linalg_eigvals(tensor *, tensor self);
void atg__linalg_slogdet(tensor *, tensor A);
void atg__linalg_slogdet_sign(tensor *, tensor sign, tensor logabsdet, tensor LU, tensor pivots, tensor A);
void atg__linalg_solve_ex(tensor *, tensor A, tensor B, int left, int check_errors);
void atg__linalg_solve_ex_result(tensor *, tensor result, tensor LU, tensor pivots, tensor info, tensor A, tensor B, int left, int check_errors);
void atg__linalg_svd(tensor *, tensor A, int full_matrices, int compute_uv, char* driver_ptr, int driver_len);
void atg__linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor A, int full_matrices, int compute_uv, char* driver_ptr, int driver_len);
void atg__log_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__log_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__log_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__logcumsumexp(tensor *, tensor self, int64_t dim);
void atg__logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
void atg__lstm_mps(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg__lstm_mps_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor out5, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg__lu_with_info(tensor *, tensor self, int pivot, int check_errors);
void atg__make_dep_token(tensor *, int options_kind, int options_device);
void atg__make_dual(tensor *, tensor primal, tensor tangent, int64_t level);
void atg__make_dual_copy(tensor *, tensor primal, tensor tangent, int64_t level);
void atg__make_dual_copy_out(tensor *, tensor out, tensor primal, tensor tangent, int64_t level);
void atg__make_per_channel_quantized_tensor(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis);
void atg__make_per_channel_quantized_tensor_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int64_t axis);
void atg__make_per_tensor_quantized_tensor(tensor *, tensor self, double scale, int64_t zero_point);
void atg__make_per_tensor_quantized_tensor_out(tensor *, tensor out, tensor self, double scale, int64_t zero_point);
void atg__masked_scale(tensor *, tensor self, tensor mask, double scale);
void atg__masked_scale_out(tensor *, tensor out, tensor self, tensor mask, double scale);
void atg__masked_softmax(tensor *, tensor self, tensor mask, int64_t dim_v, uint8_t dim_null, int64_t mask_type_v, uint8_t mask_type_null);
void atg__masked_softmax_backward(tensor *, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null);
void atg__masked_softmax_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null);
void atg__masked_softmax_out(tensor *, tensor out, tensor self, tensor mask, int64_t dim_v, uint8_t dim_null, int64_t mask_type_v, uint8_t mask_type_null);
void atg__mixed_dtypes_linear(tensor *, tensor input, tensor weight, tensor scale, tensor bias, char* activation_ptr, int activation_len);
void atg__mkldnn_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg__mkldnn_reshape_out(tensor *, tensor out, tensor self, int64_t *shape_data, int shape_len);
void atg__mkldnn_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__mkldnn_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__mkldnn_transpose_out(tensor *, tensor out, tensor self, int64_t dim0, int64_t dim1);
void atg__mps_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__mps_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__mps_convolution_transpose(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__mps_convolution_transpose_out(tensor *, tensor out, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__native_batch_norm_legit(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg__native_batch_norm_legit_functional(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg__native_batch_norm_legit_no_stats(tensor *, tensor input, tensor weight, tensor bias, int training, double momentum, double eps);
void atg__native_batch_norm_legit_no_stats_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, int training, double momentum, double eps);
void atg__native_batch_norm_legit_no_training(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__native_batch_norm_legit_no_training_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__native_batch_norm_legit_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg__native_multi_head_attention(tensor *, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask, int need_weights, int average_attn_weights, int64_t mask_type_v, uint8_t mask_type_null);
void atg__native_multi_head_attention_out(tensor *, tensor out0, tensor out1, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask, int need_weights, int average_attn_weights, int64_t mask_type_v, uint8_t mask_type_null);
void atg__neg_view(tensor *, tensor self);
void atg__neg_view_copy(tensor *, tensor self);
void atg__neg_view_copy_out(tensor *, tensor out, tensor self);
void atg__nested_compute_contiguous_strides_offsets(tensor *, tensor nested_size);
void atg__nested_from_padded(tensor *, tensor padded, tensor cpu_nested_shape_example, int fuse_transform_0213);
void atg__nested_from_padded_and_nested_example(tensor *, tensor padded, tensor nt_example);
void atg__nested_from_padded_and_nested_example_out(tensor *, tensor out, tensor padded, tensor nt_example);
void atg__nested_from_padded_out(tensor *, tensor out, tensor padded, tensor cpu_nested_shape_example, int fuse_transform_0213);
void atg__nested_from_padded_tensor(tensor *, tensor padded, tensor offsets, tensor dummy, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen, int64_t sum_S_v, uint8_t sum_S_null);
void atg__nested_get_jagged_dummy(tensor *, tensor any);
void atg__nested_get_lengths(tensor *, tensor self);
void atg__nested_get_max_seqlen(tensor *, tensor self);
void atg__nested_get_min_seqlen(tensor *, tensor self);
void atg__nested_get_offsets(tensor *, tensor self);
int64_t atg__nested_get_ragged_idx(tensor self);
void atg__nested_get_values(tensor *, tensor self);
void atg__nested_get_values_copy(tensor *, tensor self);
void atg__nested_get_values_copy_out(tensor *, tensor out, tensor self);
void atg__nested_select_backward(tensor *, tensor grad_output, tensor self, int64_t dim, int64_t index);
void atg__nested_sum_backward(tensor *, tensor grad, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg__nested_view_from_buffer(tensor *, tensor self, tensor nested_size, tensor nested_strides, tensor offsets);
void atg__nested_view_from_buffer_copy(tensor *, tensor self, tensor nested_size, tensor nested_strides, tensor offsets);
void atg__nested_view_from_buffer_copy_out(tensor *, tensor out, tensor self, tensor nested_size, tensor nested_strides, tensor offsets);
void atg__nested_view_from_jagged(tensor *, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen);
void atg__nested_view_from_jagged_copy(tensor *, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen);
void atg__nested_view_from_jagged_copy_out(tensor *, tensor out, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen);
void atg__new_zeros_with_same_feature_meta(tensor *, tensor self, tensor other, int64_t self_num_batch_dims);
void atg__new_zeros_with_same_feature_meta_out(tensor *, tensor out, tensor self, tensor other, int64_t self_num_batch_dims);
int atg__nnpack_available();
void atg__nnpack_spatial_convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg__nnpack_spatial_convolution_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
int64_t atg__nnz(tensor self);
void atg__pack_padded_sequence(tensor *, tensor input, tensor lengths, int batch_first);
void atg__pack_padded_sequence_backward(tensor *, tensor grad, int64_t *input_size_data, int input_size_len, tensor batch_sizes, int batch_first);
void atg__pack_padded_sequence_out(tensor *, tensor out0, tensor out1, tensor input, tensor lengths, int batch_first);
void atg__pad_circular(tensor *, tensor self, int64_t *pad_data, int pad_len);
void atg__pad_enum(tensor *, tensor self, int64_t *pad_data, int pad_len, int64_t mode, double value_v, uint8_t value_null);
void atg__pad_packed_sequence(tensor *, tensor data, tensor batch_sizes, int batch_first, scalar padding_value, int64_t total_length);
void atg__pdist_backward(tensor *, tensor grad, tensor self, double p, tensor pdist);
void atg__pdist_backward_out(tensor *, tensor out, tensor grad, tensor self, double p, tensor pdist);
void atg__pin_memory(tensor *, tensor self, int device);
void atg__pin_memory_out(tensor *, tensor out, tensor self, int device);
void atg__prelu_kernel(tensor *, tensor self, tensor weight);
void atg__prelu_kernel_backward(tensor *, tensor grad_output, tensor self, tensor weight);
void atg__print(char* s_ptr, int s_len);
void atg__propagate_xla_data(tensor input, tensor output);
void atg__remove_batch_dim(tensor *, tensor self, int64_t level, int64_t batch_size, int64_t out_dim);
void atg__reshape_alias(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg__reshape_alias_copy(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg__reshape_alias_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg__reshape_copy(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__reshape_from_tensor(tensor *, tensor self, tensor shape);
void atg__resize_output(tensor *, tensor self, int64_t *size_data, int size_len, int device);
void atg__resize_output_(tensor *, tensor self, int64_t *size_data, int size_len, int device);
void atg__resize_output_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int device);
void atg__rowwise_prune(tensor *, tensor weight, tensor mask, int compressed_indices_dtype);
void atg__safe_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg__sample_dirichlet(tensor *, tensor self);
void atg__sample_dirichlet_out(tensor *, tensor out, tensor self);
void atg__saturate_weight_to_fp16(tensor *, tensor weight);
void atg__scaled_dot_product_attention_math(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null, int enable_gqa);
void atg__scaled_dot_product_attention_math_for_mps(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_cudnn_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor philox_seed, tensor philox_offset, tensor attn_bias, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_efficient_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_bias, int compute_log_sumexp, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor philox_seed, tensor philox_offset, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_flash_attention_for_cpu(tensor *, tensor query, tensor key, tensor value, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_flash_attention_for_cpu_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null);
void atg__scaled_grouped_mm(tensor *, tensor self, tensor mat2, tensor scale_a, tensor scale_b, tensor offs, tensor bias, tensor scale_result, int out_dtype, int use_fast_accum);
void atg__scaled_grouped_mm_v2(tensor *, tensor self, tensor mat2, tensor *scale_a_data, int scale_a_len, int64_t *recipe_a_data, int recipe_a_len, int64_t *swizzle_a_data, int swizzle_a_len, tensor *scale_b_data, int scale_b_len, int64_t *recipe_b_data, int recipe_b_len, int64_t *swizzle_b_data, int swizzle_b_len, tensor offs, tensor bias, int out_dtype, int64_t *contraction_dim_data, int contraction_dim_len, int use_fast_accum);
void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor scale_a, tensor scale_b, tensor bias, tensor scale_result, int out_dtype, int use_fast_accum);
void atg__scaled_mm_out(tensor *, tensor out, tensor self, tensor mat2, tensor scale_a, tensor scale_b, tensor bias, tensor scale_result, int out_dtype, int use_fast_accum);
void atg__scaled_mm_v2(tensor *, tensor self, tensor mat2, tensor *scale_a_data, int scale_a_len, int64_t *recipe_a_data, int recipe_a_len, int64_t *swizzle_a_data, int swizzle_a_len, tensor *scale_b_data, int scale_b_len, int64_t *recipe_b_data, int recipe_b_len, int64_t *swizzle_b_data, int swizzle_b_len, tensor bias, int out_dtype, int64_t *contraction_dim_data, int contraction_dim_len, int use_fast_accum);
void atg__scaled_mm_v2_out(tensor *, tensor out, tensor self, tensor mat2, tensor *scale_a_data, int scale_a_len, int64_t *recipe_a_data, int recipe_a_len, int64_t *swizzle_a_data, int swizzle_a_len, tensor *scale_b_data, int scale_b_len, int64_t *recipe_b_data, int recipe_b_len, int64_t *swizzle_b_data, int swizzle_b_len, tensor bias, int out_dtype, int64_t *contraction_dim_data, int contraction_dim_len, int use_fast_accum);
void atg__scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
void atg__scatter_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
void atg__scatter_reduce_two_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
void atg__segment_reduce_backward(tensor *, tensor grad, tensor output, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor offsets, int64_t axis, scalar initial);
void atg__segment_reduce_backward_out(tensor *, tensor out, tensor grad, tensor output, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor offsets, int64_t axis, scalar initial);
void atg__shape_as_tensor(tensor *, tensor self);
void atg__slow_conv2d_backward(tensor *, tensor grad_input, tensor grad_weight, tensor grad_bias, tensor grad_output, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg__sobol_engine_draw(tensor *, tensor quasi, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated, int dtype);
void atg__sobol_engine_ff_(tensor *, tensor self, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated);
void atg__sobol_engine_initialize_state_(tensor *, tensor self, int64_t dimension);
void atg__sobol_engine_scramble_(tensor *, tensor self, tensor ltm, int64_t dimension);
void atg__softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__softmax_backward_data_out(tensor *, tensor grad_input, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__sparse_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg__sparse_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg__sparse_broadcast_to(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__sparse_broadcast_to_copy(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__sparse_broadcast_to_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg__sparse_bsc_tensor_unsafe(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_bsr_tensor_unsafe(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_compressed_tensor_unsafe(tensor *, tensor compressed_indices, tensor plain_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_compressed_tensor_with_dims(tensor *, int64_t nnz, int64_t dense_dim, int64_t *size_data, int size_len, int64_t *blocksize_data, int blocksize_len, int index_dtype, int options_kind, int options_device);
void atg__sparse_coo_tensor_unsafe(tensor *, tensor indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device, int is_coalesced);
void atg__sparse_coo_tensor_with_dims(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_coo_tensor_with_dims_and_tensors(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, tensor indices, tensor values, int options_kind, int options_device, int is_coalesced);
void atg__sparse_coo_tensor_with_dims_and_tensors_out(tensor *, tensor out, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, tensor indices, tensor values, int is_coalesced);
void atg__sparse_coo_tensor_with_dims_out(tensor *, tensor out, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len);
void atg__sparse_csc_tensor_unsafe(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_csr_prod(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_prod_dim_dtype_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_sum(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_sum_dim_dtype_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_tensor_unsafe(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_log_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__sparse_log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_log_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_log_softmax_int(tensor *, tensor self, int64_t dim, int dtype);
void atg__sparse_log_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__sparse_mask_projection(tensor *, tensor self, tensor mask, int accumulate_matches);
void atg__sparse_mask_projection_out(tensor *, tensor out, tensor self, tensor mask, int accumulate_matches);
void atg__sparse_mm(tensor *, tensor sparse, tensor dense);
void atg__sparse_mm_reduce(tensor *, tensor sparse, tensor dense, char* reduce_ptr, int reduce_len);
void atg__sparse_mm_reduce_impl(tensor *, tensor self, tensor other, char* reduce_ptr, int reduce_len);
void atg__sparse_semi_structured_apply(tensor *, tensor input, tensor thread_masks);
void atg__sparse_semi_structured_apply_dense(tensor *, tensor input, tensor thread_masks);
void atg__sparse_semi_structured_linear(tensor *, tensor input, tensor weight, tensor meta, tensor bias, char* activation_ptr, int activation_len, int out_dtype);
void atg__sparse_semi_structured_mm(tensor *, tensor mat1, tensor mat1_meta, tensor mat2, int out_dtype);
void atg__sparse_semi_structured_tile(tensor *, tensor input, char* algorithm_ptr, int algorithm_len, int use_cutlass);
void atg__sparse_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__sparse_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_softmax_int(tensor *, tensor self, int64_t dim, int dtype);
void atg__sparse_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__sparse_sparse_matmul(tensor *, tensor self, tensor other);
void atg__sparse_sparse_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg__sparse_sum(tensor *, tensor self);
void atg__sparse_sum_backward(tensor *, tensor grad, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_backward_out(tensor *, tensor out, tensor grad, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_dim(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_dim_dtype(tensor *, tensor self, int64_t *dim_data, int dim_len, int dtype);
void atg__sparse_sum_dim_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_dtype(tensor *, tensor self, int dtype);
void atg__spdiags(tensor *, tensor diagonals, tensor offsets, int64_t *shape_data, int shape_len, int8_t layout);
void atg__spdiags_out(tensor *, tensor out, tensor diagonals, tensor offsets, int64_t *shape_data, int shape_len, int8_t layout);
void atg__spsolve(tensor *, tensor A, tensor B, int left);
void atg__stack(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg__stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg__standard_gamma(tensor *, tensor self);
void atg__standard_gamma_grad(tensor *, tensor self, tensor output);
void atg__standard_gamma_grad_out(tensor *, tensor out, tensor self, tensor output);
void atg__standard_gamma_out(tensor *, tensor out, tensor self);
void atg__test_ambiguous_defaults(tensor *, tensor dummy, int64_t a, int64_t b);
void atg__test_ambiguous_defaults_b(tensor *, tensor dummy, int64_t a, char* b_ptr, int b_len);
void atg__test_autograd_multiple_dispatch(tensor *, tensor self);
void atg__test_autograd_multiple_dispatch_fullcoverage_out(tensor *, tensor out, tensor self);
void atg__test_autograd_multiple_dispatch_ntonly(tensor *, tensor self, int b);
void atg__test_autograd_multiple_dispatch_view(tensor *, tensor self);
void atg__test_autograd_multiple_dispatch_view_copy(tensor *, tensor self);
void atg__test_autograd_multiple_dispatch_view_copy_out(tensor *, tensor out, tensor self);
void atg__test_check_tensor(tensor *, tensor self);
void atg__test_functorch_fallback(tensor *, tensor self, tensor other);
void atg__test_functorch_fallback_out(tensor *, tensor out, tensor self, tensor other);
void atg__test_optional_filled_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_filled_intlist_out(tensor *, tensor out, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_floatlist(tensor *, tensor values, double *addends_data, int addends_len);
void atg__test_optional_floatlist_out(tensor *, tensor out, tensor values, double *addends_data, int addends_len);
void atg__test_optional_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_intlist_out(tensor *, tensor out, tensor values, int64_t *addends_data, int addends_len);
void atg__test_parallel_materialize(tensor *, tensor self, int64_t num_parallel, int skip_first);
void atg__test_serialization_subcmul(tensor *, tensor self, tensor other);
void atg__test_string_default(tensor *, tensor dummy, char* a_ptr, int a_len, char* b_ptr, int b_len);
void atg__test_warn_in_autograd(tensor *, tensor self);
void atg__test_warn_in_autograd_out(tensor *, tensor out, tensor self);
void atg__to_copy(tensor *, tensor self, int options_kind, int options_device, int non_blocking);
void atg__to_copy_out(tensor *, tensor out, tensor self, int non_blocking);
tensor *atg__to_cpu(tensor *tensors_data, int tensors_len);
void atg__to_dense(tensor *, tensor self, int dtype, int masked_grad);
void atg__to_dense_out(tensor *, tensor out, tensor self, int dtype, int masked_grad);
void atg__to_sparse(tensor *, tensor self, int8_t layout, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsc(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsc_out(tensor *, tensor out, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsr(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsr_out(tensor *, tensor out, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csc(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csc_out(tensor *, tensor out, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csr(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csr_out(tensor *, tensor out, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_out(tensor *, tensor out, tensor self, int8_t layout, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_semi_structured(tensor *, tensor dense);
void atg__to_sparse_sparse_dim(tensor *, tensor self, int64_t sparse_dim);
void atg__to_sparse_sparse_dim_out(tensor *, tensor out, tensor self, int64_t sparse_dim);
void atg__transform_bias_rescale_qkv(tensor *, tensor qkv, tensor qkv_bias, int64_t num_heads);
void atg__transform_bias_rescale_qkv_out(tensor *, tensor out0, tensor out1, tensor out2, tensor qkv, tensor qkv_bias, int64_t num_heads);
void atg__transformer_encoder_layer_fwd(tensor *, tensor src, int64_t embed_dim, int64_t num_heads, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, int use_gelu, int norm_first, double eps, tensor norm_weight_1, tensor norm_bias_1, tensor norm_weight_2, tensor norm_bias_2, tensor ffn_weight_1, tensor ffn_bias_1, tensor ffn_weight_2, tensor ffn_bias_2, tensor mask, int64_t mask_type_v, uint8_t mask_type_null);
void atg__transformer_encoder_layer_fwd_out(tensor *, tensor out, tensor src, int64_t embed_dim, int64_t num_heads, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, int use_gelu, int norm_first, double eps, tensor norm_weight_1, tensor norm_bias_1, tensor norm_weight_2, tensor norm_bias_2, tensor ffn_weight_1, tensor ffn_bias_1, tensor ffn_weight_2, tensor ffn_bias_2, tensor mask, int64_t mask_type_v, uint8_t mask_type_null);
void atg__trilinear(tensor *, tensor i1, tensor i2, tensor i3, int64_t *expand1_data, int expand1_len, int64_t *expand2_data, int expand2_len, int64_t *expand3_data, int expand3_len, int64_t *sumdim_data, int sumdim_len, int64_t unroll_dim);
void atg__trilinear_out(tensor *, tensor out, tensor i1, tensor i2, tensor i3, int64_t *expand1_data, int expand1_len, int64_t *expand2_data, int expand2_len, int64_t *expand3_data, int expand3_len, int64_t *sumdim_data, int sumdim_len, int64_t unroll_dim);
void atg__triton_multi_head_attention(tensor *, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask);
void atg__triton_multi_head_attention_out(tensor *, tensor out, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask);
void atg__triton_scaled_dot_attention(tensor *, tensor q, tensor k, tensor v, double dropout_p);
void atg__triton_scaled_dot_attention_out(tensor *, tensor out, tensor q, tensor k, tensor v, double dropout_p);
void atg__unique(tensor *, tensor self, int sorted, int return_inverse);
void atg__unique2(tensor *, tensor self, int sorted, int return_inverse, int return_counts);
void atg__unique2_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int sorted, int return_inverse, int return_counts);
void atg__unique_out(tensor *, tensor out0, tensor out1, tensor self, int sorted, int return_inverse);
void atg__unpack_dual(tensor *, tensor dual, int64_t level);
void atg__unsafe_index(tensor *, tensor self, tensor *indices_data, int indices_len);
void atg__unsafe_index_put(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg__unsafe_masked_index(tensor *, tensor self, tensor mask, tensor *indices_data, int indices_len, scalar fill);
void atg__unsafe_masked_index_put_accumulate(tensor *, tensor self, tensor mask, tensor *indices_data, int indices_len, tensor values);
void atg__unsafe_view(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__unsafe_view_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg__upsample_bicubic2d_aa(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg__upsample_bilinear2d_aa(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg__upsample_nearest_exact1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg__upsample_nearest_exact2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg__upsample_nearest_exact3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
int atg__use_cudnn_ctc_loss(tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank);
int atg__use_cudnn_ctc_loss_tensor(tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank);
int atg__use_cudnn_rnn_flatten_weight();
int atg__use_miopen_ctc_loss(tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank);
int atg__use_miopen_ctc_loss_tensor(tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank);
void atg__validate_compressed_sparse_indices(int is_crow, tensor compressed_idx, tensor plain_idx, int64_t cdim, int64_t dim, int64_t nnz);
void atg__validate_sparse_bsc_tensor_args(tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__validate_sparse_bsr_tensor_args(tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__validate_sparse_compressed_tensor_args(tensor compressed_indices, tensor plain_indices, tensor values, int64_t *size_data, int size_len, int8_t layout, int check_pinning);
void atg__validate_sparse_csc_tensor_args(tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__validate_sparse_csr_tensor_args(tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__values(tensor *, tensor self);
void atg__values_copy(tensor *, tensor self);
void atg__values_copy_out(tensor *, tensor out, tensor self);
int64_t atg__version(tensor self);
void atg__weight_int4pack_mm(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros);
void atg__weight_int4pack_mm_for_cpu(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros);
void atg__weight_int4pack_mm_with_scales_and_zeros(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScale, tensor qZeros);
void atg__weight_int8pack_mm(tensor *, tensor self, tensor mat2, tensor scales);
void atg__weight_norm(tensor *, tensor v, tensor g, int64_t dim);
void atg__weight_norm_differentiable_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
void atg__weight_norm_interface(tensor *, tensor v, tensor g, int64_t dim);
void atg__weight_norm_interface_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
void atg__weight_norm_interface_backward_out(tensor *, tensor out0, tensor out1, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
void atg__weight_norm_interface_out(tensor *, tensor out0, tensor out1, tensor v, tensor g, int64_t dim);
void atg__wrapped_linear_prepack(tensor *, tensor weight, tensor weight_scale, tensor weight_zero_point, tensor bias);
void atg__wrapped_quantized_linear_prepacked(tensor *, tensor input, tensor input_scale, tensor input_zero_point, tensor packed_weight, tensor output_scale, tensor output_zero_point, int64_t out_channel);
void atg_abs(tensor *, tensor self);
void atg_abs_(tensor *, tensor self);
void atg_abs_out(tensor *, tensor out, tensor self);
void atg_absolute(tensor *, tensor self);
void atg_absolute_(tensor *, tensor self);
void atg_absolute_out(tensor *, tensor out, tensor self);
void atg_acos(tensor *, tensor self);
void atg_acos_(tensor *, tensor self);
void atg_acos_out(tensor *, tensor out, tensor self);
void atg_acosh(tensor *, tensor self);
void atg_acosh_(tensor *, tensor self);
void atg_acosh_out(tensor *, tensor out, tensor self);
void atg_adaptive_avg_pool1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool3d_backward(tensor *, tensor grad_input, tensor grad_output, tensor self);
void atg_adaptive_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool2d_backward(tensor *, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool2d_out(tensor *, tensor out, tensor indices, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool3d_backward(tensor *, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool3d_out(tensor *, tensor out, tensor indices, tensor self, int64_t *output_size_data, int output_size_len);
void atg_add(tensor *, tensor self, tensor other);
void atg_add_(tensor *, tensor self, tensor other);
void atg_add_out(tensor *, tensor out, tensor self, tensor other);
void atg_add_scalar(tensor *, tensor self, scalar other);
void atg_add_scalar_(tensor *, tensor self, scalar other);
void atg_add_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_addbmm(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_addbmm_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_addbmm_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2);
void atg_addcdiv(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcdiv_(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcdiv_out(tensor *, tensor out, tensor self, tensor tensor1, tensor tensor2);
void atg_addcmul(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcmul_(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcmul_out(tensor *, tensor out, tensor self, tensor tensor1, tensor tensor2);
void atg_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_addmm_(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_addmm_dtype(tensor *, tensor self, tensor mat1, tensor mat2, int out_dtype);
void atg_addmm_dtype_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2, int out_dtype);
void atg_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_addmv(tensor *, tensor self, tensor mat, tensor vec);
void atg_addmv_(tensor *, tensor self, tensor mat, tensor vec);
void atg_addmv_out(tensor *, tensor out, tensor self, tensor mat, tensor vec);
void atg_addr(tensor *, tensor self, tensor vec1, tensor vec2);
void atg_addr_(tensor *, tensor self, tensor vec1, tensor vec2);
void atg_addr_out(tensor *, tensor out, tensor self, tensor vec1, tensor vec2);
void atg_adjoint(tensor *, tensor self);
void atg_affine_grid_generator(tensor *, tensor theta, int64_t *size_data, int size_len, int align_corners);
void atg_affine_grid_generator_backward(tensor *, tensor grad, int64_t *size_data, int size_len, int align_corners);
void atg_affine_grid_generator_out(tensor *, tensor out, tensor theta, int64_t *size_data, int size_len, int align_corners);
void atg_alias(tensor *, tensor self);
void atg_alias_copy(tensor *, tensor self);
void atg_alias_copy_out(tensor *, tensor out, tensor self);
void atg_align_as(tensor *, tensor self, tensor other);
tensor *atg_align_tensors(tensor *tensors_data, int tensors_len);
void atg_all(tensor *, tensor self);
void atg_all_all_out(tensor *, tensor out, tensor self);
void atg_all_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_all_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_all_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_all_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
int atg_allclose(tensor self, tensor other, double rtol, double atol, int equal_nan);
void atg_alpha_dropout(tensor *, tensor input, double p, int train);
void atg_alpha_dropout_(tensor *, tensor self, double p, int train);
void atg_amax(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amax_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amin(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amin_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_aminmax(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_aminmax_out(tensor *, tensor min, tensor max, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_angle(tensor *, tensor self);
void atg_angle_out(tensor *, tensor out, tensor self);
void atg_any(tensor *, tensor self);
void atg_any_all_out(tensor *, tensor out, tensor self);
void atg_any_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_any_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_any_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_any_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
void atg_arange(tensor *, scalar end, int options_kind, int options_device);
void atg_arange_start(tensor *, scalar start, scalar end, int options_kind, int options_device);
void atg_arange_start_step(tensor *, scalar start, scalar end, scalar step, int options_kind, int options_device);
void atg_arccos(tensor *, tensor self);
void atg_arccos_(tensor *, tensor self);
void atg_arccos_out(tensor *, tensor out, tensor self);
void atg_arccosh(tensor *, tensor self);
void atg_arccosh_(tensor *, tensor self);
void atg_arccosh_out(tensor *, tensor out, tensor self);
void atg_arcsin(tensor *, tensor self);
void atg_arcsin_(tensor *, tensor self);
void atg_arcsin_out(tensor *, tensor out, tensor self);
void atg_arcsinh(tensor *, tensor self);
void atg_arcsinh_(tensor *, tensor self);
void atg_arcsinh_out(tensor *, tensor out, tensor self);
void atg_arctan(tensor *, tensor self);
void atg_arctan2(tensor *, tensor self, tensor other);
void atg_arctan2_(tensor *, tensor self, tensor other);
void atg_arctan2_out(tensor *, tensor out, tensor self, tensor other);
void atg_arctan_(tensor *, tensor self);
void atg_arctan_out(tensor *, tensor out, tensor self);
void atg_arctanh(tensor *, tensor self);
void atg_arctanh_(tensor *, tensor self);
void atg_arctanh_out(tensor *, tensor out, tensor self);
void atg_argmax(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmax_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmin(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmin_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argsort(tensor *, tensor self, int64_t dim, int descending);
void atg_argsort_stable(tensor *, tensor self, int stable, int64_t dim, int descending);
void atg_argsort_stable_out(tensor *, tensor out, tensor self, int stable, int64_t dim, int descending);
void atg_argwhere(tensor *, tensor self);
void atg_as_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_copy(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_scatter(tensor *, tensor self, tensor src, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_asin(tensor *, tensor self);
void atg_asin_(tensor *, tensor self);
void atg_asin_out(tensor *, tensor out, tensor self);
void atg_asinh(tensor *, tensor self);
void atg_asinh_(tensor *, tensor self);
void atg_asinh_out(tensor *, tensor out, tensor self);
void atg_atan(tensor *, tensor self);
void atg_atan2(tensor *, tensor self, tensor other);
void atg_atan2_(tensor *, tensor self, tensor other);
void atg_atan2_out(tensor *, tensor out, tensor self, tensor other);
void atg_atan_(tensor *, tensor self);
void atg_atan_out(tensor *, tensor out, tensor self);
void atg_atanh(tensor *, tensor self);
void atg_atanh_(tensor *, tensor self);
void atg_atanh_out(tensor *, tensor out, tensor self);
void atg_atleast_1d(tensor *, tensor self);
tensor *atg_atleast_1d_sequence(tensor *tensors_data, int tensors_len);
void atg_atleast_2d(tensor *, tensor self);
tensor *atg_atleast_2d_sequence(tensor *tensors_data, int tensors_len);
void atg_atleast_3d(tensor *, tensor self);
tensor *atg_atleast_3d_sequence(tensor *tensors_data, int tensors_len);
void atg_avg_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad);
void atg_avg_pool1d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad);
void atg_avg_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_baddbmm(tensor *, tensor self, tensor batch1, tensor batch2, scalar beta, scalar alpha);
void atg_baddbmm_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_baddbmm_dtype(tensor *, tensor self, tensor batch1, tensor batch2, int out_dtype, scalar beta, scalar alpha);
void atg_baddbmm_dtype_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2, int out_dtype);
void atg_baddbmm_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2);
void atg_bartlett_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_bartlett_window_out(tensor *, tensor out, int64_t window_length);
void atg_bartlett_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_bartlett_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps, int cudnn_enabled);
void atg_batch_norm_backward_elemt(tensor *, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, tensor sum_dy, tensor sum_dy_xmu, tensor count);
void atg_batch_norm_backward_elemt_out(tensor *, tensor out, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, tensor sum_dy, tensor sum_dy_xmu, tensor count);
void atg_batch_norm_backward_reduce(tensor *, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, int input_g, int weight_g, int bias_g);
void atg_batch_norm_backward_reduce_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, int input_g, int weight_g, int bias_g);
void atg_batch_norm_elemt(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor invstd, double eps);
void atg_batch_norm_elemt_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, tensor mean, tensor invstd, double eps);
void atg_batch_norm_gather_stats(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, int64_t count);
void atg_batch_norm_gather_stats_out(tensor *, tensor out0, tensor out1, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, int64_t count);
void atg_batch_norm_gather_stats_with_counts(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, tensor counts);
void atg_batch_norm_gather_stats_with_counts_out(tensor *, tensor out0, tensor out1, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, tensor counts);
void atg_batch_norm_stats(tensor *, tensor input, double eps);
void atg_batch_norm_stats_out(tensor *, tensor out0, tensor out1, tensor input, double eps);
void atg_batch_norm_update_stats(tensor *, tensor input, tensor running_mean, tensor running_var, double momentum);
void atg_batch_norm_update_stats_out(tensor *, tensor out0, tensor out1, tensor input, tensor running_mean, tensor running_var, double momentum);
void atg_bernoulli(tensor *, tensor self);
void atg_bernoulli_(tensor *, tensor self, tensor p);
void atg_bernoulli_float_(tensor *, tensor self, double p);
void atg_bernoulli_p(tensor *, tensor self, double p);
void atg_bernoulli_tensor(tensor *, tensor self, tensor p);
void atg_bilinear(tensor *, tensor input1, tensor input2, tensor weight, tensor bias);
void atg_binary_cross_entropy(tensor *, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_out(tensor *, tensor out, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_with_logits(tensor *, tensor self, tensor target, tensor weight, tensor pos_weight, int64_t reduction);
void atg_binary_cross_entropy_with_logits_out(tensor *, tensor out, tensor self, tensor target, tensor weight, tensor pos_weight, int64_t reduction);
void atg_bincount(tensor *, tensor self, tensor weights, int64_t minlength);
void atg_bincount_out(tensor *, tensor out, tensor self, tensor weights, int64_t minlength);
void atg_binomial(tensor *, tensor count, tensor prob);
void atg_binomial_out(tensor *, tensor out, tensor count, tensor prob);
void atg_bitwise_and(tensor *, tensor self, scalar other);
void atg_bitwise_and_(tensor *, tensor self, scalar other);
void atg_bitwise_and_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_and_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_and_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_and_tensor(tensor *, tensor self, tensor other);
void atg_bitwise_and_tensor_(tensor *, tensor self, tensor other);
void atg_bitwise_and_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_left_shift(tensor *, tensor self, tensor other);
void atg_bitwise_left_shift_(tensor *, tensor self, tensor other);
void atg_bitwise_left_shift_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_left_shift_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_left_shift_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_left_shift_tensor_scalar(tensor *, tensor self, scalar other);
void atg_bitwise_left_shift_tensor_scalar_(tensor *, tensor self, scalar other);
void atg_bitwise_left_shift_tensor_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_not(tensor *, tensor self);
void atg_bitwise_not_(tensor *, tensor self);
void atg_bitwise_not_out(tensor *, tensor out, tensor self);
void atg_bitwise_or(tensor *, tensor self, scalar other);
void atg_bitwise_or_(tensor *, tensor self, scalar other);
void atg_bitwise_or_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_or_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_or_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_or_tensor(tensor *, tensor self, tensor other);
void atg_bitwise_or_tensor_(tensor *, tensor self, tensor other);
void atg_bitwise_or_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_right_shift(tensor *, tensor self, tensor other);
void atg_bitwise_right_shift_(tensor *, tensor self, tensor other);
void atg_bitwise_right_shift_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_right_shift_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_right_shift_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_right_shift_tensor_scalar(tensor *, tensor self, scalar other);
void atg_bitwise_right_shift_tensor_scalar_(tensor *, tensor self, scalar other);
void atg_bitwise_right_shift_tensor_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_xor(tensor *, tensor self, scalar other);
void atg_bitwise_xor_(tensor *, tensor self, scalar other);
void atg_bitwise_xor_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_xor_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_xor_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_xor_tensor(tensor *, tensor self, tensor other);
void atg_bitwise_xor_tensor_(tensor *, tensor self, tensor other);
void atg_bitwise_xor_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_blackman_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_blackman_window_out(tensor *, tensor out, int64_t window_length);
void atg_blackman_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_blackman_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_block_diag(tensor *, tensor *tensors_data, int tensors_len);
void atg_block_diag_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_bmm(tensor *, tensor self, tensor mat2);
void atg_bmm_dtype(tensor *, tensor self, tensor mat2, int out_dtype);
void atg_bmm_dtype_out(tensor *, tensor out, tensor self, tensor mat2, int out_dtype);
void atg_bmm_out(tensor *, tensor out, tensor self, tensor mat2);
tensor *atg_broadcast_tensors(tensor *tensors_data, int tensors_len);
void atg_broadcast_to(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_bucketize(tensor *, tensor self, tensor boundaries, int out_int32, int right);
void atg_bucketize_scalar(tensor *, scalar self_scalar, tensor boundaries, int out_int32, int right);
void atg_bucketize_scalar_out(tensor *, tensor out, scalar self_scalar, tensor boundaries, int out_int32, int right);
void atg_bucketize_tensor_out(tensor *, tensor out, tensor self, tensor boundaries, int out_int32, int right);
int atg_can_cast(int from_, int to);
void atg_cartesian_prod(tensor *, tensor *tensors_data, int tensors_len);
void atg_cat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_cat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_cauchy(tensor *, tensor self, double median, double sigma);
void atg_cauchy_(tensor *, tensor self, double median, double sigma);
void atg_cauchy_out(tensor *, tensor out, tensor self, double median, double sigma);
void atg_ccol_indices(tensor *, tensor self);
void atg_ccol_indices_copy(tensor *, tensor self);
void atg_ccol_indices_copy_out(tensor *, tensor out, tensor self);
void atg_cdist(tensor *, tensor x1, tensor x2, double p, int64_t compute_mode_v, uint8_t compute_mode_null);
void atg_ceil(tensor *, tensor self);
void atg_ceil_(tensor *, tensor self);
void atg_ceil_out(tensor *, tensor out, tensor self);
void atg_celu(tensor *, tensor self);
void atg_celu_(tensor *, tensor self);
void atg_celu_out(tensor *, tensor out, tensor self);
void atg_chain_matmul(tensor *, tensor *matrices_data, int matrices_len);
void atg_chain_matmul_out(tensor *, tensor out, tensor *matrices_data, int matrices_len);
void atg_chalf(tensor *, tensor self);
void atg_channel_shuffle(tensor *, tensor self, int64_t groups);
void atg_channel_shuffle_out(tensor *, tensor out, tensor self, int64_t groups);
void atg_cholesky(tensor *, tensor self, int upper);
void atg_cholesky_inverse(tensor *, tensor self, int upper);
void atg_cholesky_inverse_out(tensor *, tensor out, tensor self, int upper);
void atg_cholesky_out(tensor *, tensor out, tensor self, int upper);
void atg_cholesky_solve(tensor *, tensor self, tensor input2, int upper);
void atg_cholesky_solve_out(tensor *, tensor out, tensor self, tensor input2, int upper);
void atg_choose_qparams_optimized(tensor *, tensor input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width);
tensor *atg_chunk(tensor self, int64_t chunks, int64_t dim);
void atg_clamp(tensor *, tensor self, scalar min, scalar max);
void atg_clamp_(tensor *, tensor self, scalar min, scalar max);
void atg_clamp_max(tensor *, tensor self, scalar max);
void atg_clamp_max_(tensor *, tensor self, scalar max);
void atg_clamp_max_out(tensor *, tensor out, tensor self, scalar max);
void atg_clamp_max_tensor(tensor *, tensor self, tensor max);
void atg_clamp_max_tensor_(tensor *, tensor self, tensor max);
void atg_clamp_max_tensor_out(tensor *, tensor out, tensor self, tensor max);
void atg_clamp_min(tensor *, tensor self, scalar min);
void atg_clamp_min_(tensor *, tensor self, scalar min);
void atg_clamp_min_out(tensor *, tensor out, tensor self, scalar min);
void atg_clamp_min_tensor(tensor *, tensor self, tensor min);
void atg_clamp_min_tensor_(tensor *, tensor self, tensor min);
void atg_clamp_min_tensor_out(tensor *, tensor out, tensor self, tensor min);
void atg_clamp_out(tensor *, tensor out, tensor self, scalar min, scalar max);
void atg_clamp_tensor(tensor *, tensor self, tensor min, tensor max);
void atg_clamp_tensor_(tensor *, tensor self, tensor min, tensor max);
void atg_clamp_tensor_out(tensor *, tensor out, tensor self, tensor min, tensor max);
void atg_clip(tensor *, tensor self, scalar min, scalar max);
void atg_clip_(tensor *, tensor self, scalar min, scalar max);
void atg_clip_out(tensor *, tensor out, tensor self, scalar min, scalar max);
void atg_clip_tensor(tensor *, tensor self, tensor min, tensor max);
void atg_clip_tensor_(tensor *, tensor self, tensor min, tensor max);
void atg_clip_tensor_out(tensor *, tensor out, tensor self, tensor min, tensor max);
void atg_clone(tensor *, tensor out, tensor self);
void atg_coalesce(tensor *, tensor self);
void atg_col2im(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col2im_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col_indices(tensor *, tensor self);
void atg_col_indices_copy(tensor *, tensor self);
void atg_col_indices_copy_out(tensor *, tensor out, tensor self);
void atg_column_stack(tensor *, tensor *tensors_data, int tensors_len);
void atg_column_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_combinations(tensor *, tensor self, int64_t r, int with_replacement);
void atg_complex(tensor *, tensor real, tensor imag);
void atg_complex_out(tensor *, tensor out, tensor real, tensor imag);
void atg_concat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_concat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_concatenate(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_concatenate_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_conj(tensor *, tensor self);
void atg_conj_physical(tensor *, tensor self);
void atg_conj_physical_(tensor *, tensor self);
void atg_conj_physical_out(tensor *, tensor out, tensor self);
void atg_constant_pad_nd(tensor *, tensor self, int64_t *pad_data, int pad_len);
void atg_constant_pad_nd_out(tensor *, tensor out, tensor self, int64_t *pad_data, int pad_len);
void atg_contiguous(tensor *, tensor self);
void atg_conv1d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv1d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv2d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv2d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv3d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv3d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv_depthwise3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_conv_depthwise3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_conv_tbc(tensor *, tensor self, tensor weight, tensor bias, int64_t pad);
void atg_conv_tbc_backward(tensor *, tensor self, tensor input, tensor weight, tensor bias, int64_t pad);
void atg_conv_tbc_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t pad);
void atg_conv_transpose1d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
void atg_conv_transpose2d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
void atg_conv_transpose3d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
void atg_convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_convolution_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_convolution_overrideable(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_convolution_overrideable_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_copy_sparse_to_sparse(tensor *, tensor self, tensor src, int non_blocking);
void atg_copy_sparse_to_sparse_(tensor *, tensor self, tensor src, int non_blocking);
void atg_copy_sparse_to_sparse_out(tensor *, tensor out, tensor self, tensor src, int non_blocking);
void atg_copysign(tensor *, tensor self, tensor other);
void atg_copysign_(tensor *, tensor self, tensor other);
void atg_copysign_out(tensor *, tensor out, tensor self, tensor other);
void atg_copysign_scalar(tensor *, tensor self, scalar other);
void atg_copysign_scalar_(tensor *, tensor self, scalar other);
void atg_copysign_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_corrcoef(tensor *, tensor self);
void atg_cos(tensor *, tensor self);
void atg_cos_(tensor *, tensor self);
void atg_cos_out(tensor *, tensor out, tensor self);
void atg_cosh(tensor *, tensor self);
void atg_cosh_(tensor *, tensor self);
void atg_cosh_out(tensor *, tensor out, tensor self);
void atg_cosine_embedding_loss(tensor *, tensor input1, tensor input2, tensor target, double margin, int64_t reduction);
void atg_cosine_similarity(tensor *, tensor x1, tensor x2, int64_t dim, double eps);
void atg_count_nonzero(tensor *, tensor self, int64_t dim_v, uint8_t dim_null);
void atg_count_nonzero_dim_intlist(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_count_nonzero_dim_intlist_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len);
void atg_count_nonzero_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null);
void atg_cov(tensor *, tensor self, int64_t correction, tensor fweights, tensor aweights);
void atg_cross(tensor *, tensor self, tensor other, int64_t dim_v, uint8_t dim_null);
void atg_cross_entropy_loss(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, double label_smoothing);
void atg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim_v, uint8_t dim_null);
void atg_crow_indices(tensor *, tensor self);
void atg_crow_indices_copy(tensor *, tensor self);
void atg_crow_indices_copy_out(tensor *, tensor out, tensor self);
void atg_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int64_t reduction, int zero_infinity);
void atg_ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int64_t reduction, int zero_infinity);
void atg_cudnn_affine_grid_generator(tensor *, tensor theta, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_affine_grid_generator_backward(tensor *, tensor grad, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_affine_grid_generator_backward_out(tensor *, tensor out, tensor grad, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_affine_grid_generator_out(tensor *, tensor out, tensor theta, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_cudnn_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon, tensor reserveSpace);
void atg_cudnn_batch_norm_backward_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon, tensor reserveSpace);
void atg_cudnn_batch_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_cudnn_convolution(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_add_relu(tensor *, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_add_relu_out(tensor *, tensor out, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_out(tensor *, tensor out, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_relu(tensor *, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_relu_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_transpose(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_out(tensor *, tensor out, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_grid_sampler(tensor *, tensor self, tensor grid);
void atg_cudnn_grid_sampler_backward(tensor *, tensor self, tensor grid, tensor grad_output);
void atg_cudnn_grid_sampler_backward_out(tensor *, tensor out0, tensor out1, tensor self, tensor grid, tensor grad_output);
void atg_cudnn_grid_sampler_out(tensor *, tensor out, tensor self, tensor grid);
int atg_cudnn_is_acceptable(tensor self);
void atg_cummax(tensor *, tensor self, int64_t dim);
void atg_cummax_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim);
void atg_cummaxmin_backward(tensor *, tensor grad, tensor input, tensor indices, int64_t dim);
void atg_cummin(tensor *, tensor self, int64_t dim);
void atg_cummin_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim);
void atg_cumprod(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumprod_(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumprod_backward(tensor *, tensor grad, tensor input, int64_t dim, tensor output);
void atg_cumprod_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_cumsum(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumsum_(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumsum_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_cumulative_trapezoid(tensor *, tensor y, int64_t dim);
void atg_cumulative_trapezoid_x(tensor *, tensor y, tensor x, int64_t dim);
void atg_data(tensor *, tensor self);
void atg_deg2rad(tensor *, tensor self);
void atg_deg2rad_(tensor *, tensor self);
void atg_deg2rad_out(tensor *, tensor out, tensor self);
int64_t atg_dense_dim(tensor self);
void atg_dequantize(tensor *, tensor self);
void atg_dequantize_self_out(tensor *, tensor out, tensor self);
tensor *atg_dequantize_tensors(tensor *tensors_data, int tensors_len);
void atg_dequantize_tensors_out(tensor *out_data, int out_len, tensor *tensors_data, int tensors_len);
void atg_det(tensor *, tensor self);
void atg_detach(tensor *, tensor self);
void atg_detach_(tensor *, tensor self);
void atg_detach_copy(tensor *, tensor self);
void atg_detach_copy_out(tensor *, tensor out, tensor self);
void atg_diag(tensor *, tensor self, int64_t diagonal);
void atg_diag_embed(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diag_embed_out(tensor *, tensor out, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diag_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_diagflat(tensor *, tensor self, int64_t offset);
void atg_diagonal(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_copy(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_copy_out(tensor *, tensor out, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_scatter(tensor *, tensor self, tensor src, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diff(tensor *, tensor self, int64_t n, int64_t dim, tensor prepend, tensor append);
void atg_diff_out(tensor *, tensor out, tensor self, int64_t n, int64_t dim, tensor prepend, tensor append);
void atg_digamma(tensor *, tensor self);
void atg_digamma_(tensor *, tensor self);
void atg_digamma_out(tensor *, tensor out, tensor self);
void atg_dist(tensor *, tensor self, tensor other);
void atg_dist_out(tensor *, tensor out, tensor self, tensor other);
void atg_div(tensor *, tensor self, tensor other);
void atg_div_(tensor *, tensor self, tensor other);
void atg_div_out(tensor *, tensor out, tensor self, tensor other);
void atg_div_out_mode(tensor *, tensor out, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar(tensor *, tensor self, scalar other);
void atg_div_scalar_(tensor *, tensor self, scalar other);
void atg_div_scalar_mode(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar_mode_(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar_mode_out(tensor *, tensor out, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_div_tensor_mode(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_tensor_mode_(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide(tensor *, tensor self, tensor other);
void atg_divide_(tensor *, tensor self, tensor other);
void atg_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_divide_out_mode(tensor *, tensor out, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_scalar(tensor *, tensor self, scalar other);
void atg_divide_scalar_(tensor *, tensor self, scalar other);
void atg_divide_scalar_mode(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_scalar_mode_(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_tensor_mode(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_tensor_mode_(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_dot(tensor *, tensor self, tensor tensor);
void atg_dot_out(tensor *, tensor out, tensor self, tensor tensor);
void atg_dropout(tensor *, tensor input, double p, int train);
void atg_dropout_(tensor *, tensor self, double p, int train);
tensor *atg_dsplit(tensor self, int64_t sections);
tensor *atg_dsplit_array(tensor self, int64_t *indices_data, int indices_len);
void atg_dstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_dstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_einsum(tensor *, char* equation_ptr, int equation_len, tensor *tensors_data, int tensors_len, int64_t *path_data, int path_len);
void atg_elu(tensor *, tensor self);
void atg_elu_(tensor *, tensor self);
void atg_elu_backward(tensor *, tensor grad_output, scalar alpha, scalar scale, scalar input_scale, int is_result, tensor self_or_result);
void atg_elu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, scalar alpha, scalar scale, scalar input_scale, int is_result, tensor self_or_result);
void atg_elu_out(tensor *, tensor out, tensor self);
void atg_embedding(tensor *, tensor weight, tensor indices, int64_t padding_idx, int scale_grad_by_freq, int sparse);
void atg_embedding_backward(tensor *, tensor grad, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq, int sparse);
void atg_embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset);
void atg_embedding_bag_padding_idx(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx_v, uint8_t padding_idx_null);
void atg_embedding_dense_backward(tensor *, tensor grad_output, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_embedding_dense_backward_out(tensor *, tensor out, tensor grad_output, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_embedding_out(tensor *, tensor out, tensor weight, tensor indices, int64_t padding_idx, int scale_grad_by_freq, int sparse);
void atg_embedding_renorm(tensor *, tensor self, tensor indices, double max_norm, double norm_type);
void atg_embedding_renorm_(tensor *, tensor self, tensor indices, double max_norm, double norm_type);
void atg_embedding_renorm_out(tensor *, tensor out, tensor self, tensor indices, double max_norm, double norm_type);
void atg_embedding_sparse_backward(tensor *, tensor grad, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_empty(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_empty_like(tensor *, tensor self);
void atg_empty_like_out(tensor *, tensor out, tensor self);
void atg_empty_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_empty_permuted(tensor *, int64_t *size_data, int size_len, int64_t *physical_layout_data, int physical_layout_len, int options_kind, int options_device);
void atg_empty_permuted_out(tensor *, tensor out, int64_t *size_data, int size_len, int64_t *physical_layout_data, int physical_layout_len);
void atg_empty_quantized(tensor *, int64_t *size_data, int size_len, tensor qtensor, int options_kind, int options_device);
void atg_empty_quantized_out(tensor *, tensor out, int64_t *size_data, int size_len, tensor qtensor);
void atg_empty_strided(tensor *, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int options_kind, int options_device);
void atg_empty_strided_out(tensor *, tensor out, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg_eq(tensor *, tensor self, scalar other);
void atg_eq_(tensor *, tensor self, scalar other);
void atg_eq_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_eq_tensor(tensor *, tensor self, tensor other);
void atg_eq_tensor_(tensor *, tensor self, tensor other);
void atg_eq_tensor_out(tensor *, tensor out, tensor self, tensor other);
int atg_equal(tensor self, tensor other);
void atg_erf(tensor *, tensor self);
void atg_erf_(tensor *, tensor self);
void atg_erf_out(tensor *, tensor out, tensor self);
void atg_erfc(tensor *, tensor self);
void atg_erfc_(tensor *, tensor self);
void atg_erfc_out(tensor *, tensor out, tensor self);
void atg_erfinv(tensor *, tensor self);
void atg_erfinv_(tensor *, tensor self);
void atg_erfinv_out(tensor *, tensor out, tensor self);
void atg_exp(tensor *, tensor self);
void atg_exp2(tensor *, tensor self);
void atg_exp2_(tensor *, tensor self);
void atg_exp2_out(tensor *, tensor out, tensor self);
void atg_exp_(tensor *, tensor self);
void atg_exp_out(tensor *, tensor out, tensor self);
void atg_expand(tensor *, tensor self, int64_t *size_data, int size_len, int implicit);
void atg_expand_as(tensor *, tensor self, tensor other);
void atg_expand_copy(tensor *, tensor self, int64_t *size_data, int size_len, int implicit);
void atg_expand_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int implicit);
void atg_expm1(tensor *, tensor self);
void atg_expm1_(tensor *, tensor self);
void atg_expm1_out(tensor *, tensor out, tensor self);
void atg_exponential(tensor *, tensor self, double lambd);
void atg_exponential_(tensor *, tensor self, double lambd);
void atg_exponential_out(tensor *, tensor out, tensor self, double lambd);
void atg_eye(tensor *, int64_t n, int options_kind, int options_device);
void atg_eye_m(tensor *, int64_t n, int64_t m, int options_kind, int options_device);
void atg_eye_m_out(tensor *, tensor out, int64_t n, int64_t m);
void atg_eye_out(tensor *, tensor out, int64_t n);
void atg_fake_quantize_per_channel_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_channel_affine_cachemask(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_channel_affine_cachemask_backward(tensor *, tensor grad, tensor mask);
void atg_fake_quantize_per_channel_affine_cachemask_out(tensor *, tensor out0, tensor out1, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine(tensor *, tensor self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine_cachemask(tensor *, tensor self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine_cachemask_backward(tensor *, tensor grad, tensor mask);
void atg_fake_quantize_per_tensor_affine_cachemask_out(tensor *, tensor out0, tensor out1, tensor self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max);
void atg_feature_alpha_dropout(tensor *, tensor input, double p, int train);
void atg_feature_alpha_dropout_(tensor *, tensor self, double p, int train);
void atg_feature_dropout(tensor *, tensor input, double p, int train);
void atg_feature_dropout_(tensor *, tensor self, double p, int train);
void atg_fft_fft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_fft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_fftfreq(tensor *, int64_t n, double d, int options_kind, int options_device);
void atg_fft_fftfreq_out(tensor *, tensor out, int64_t n, double d);
void atg_fft_fftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fftshift(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_fft_hfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_hfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_hfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifftshift(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_fft_ihfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ihfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ihfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_rfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_rfftfreq(tensor *, int64_t n, double d, int options_kind, int options_device);
void atg_fft_rfftfreq_out(tensor *, tensor out, int64_t n, double d);
void atg_fft_rfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fill(tensor *, tensor self, scalar value);
void atg_fill_(tensor *, tensor self, scalar value);
void atg_fill_diagonal_(tensor *, tensor self, scalar fill_value, int wrap);
void atg_fill_scalar_out(tensor *, tensor out, tensor self, scalar value);
void atg_fill_tensor(tensor *, tensor self, tensor value);
void atg_fill_tensor_(tensor *, tensor self, tensor value);
void atg_fill_tensor_out(tensor *, tensor out, tensor self, tensor value);
void atg_fix(tensor *, tensor self);
void atg_fix_(tensor *, tensor self);
void atg_fix_out(tensor *, tensor out, tensor self);
void atg_flatten(tensor *, tensor self, int64_t start_dim, int64_t end_dim);
void atg_flatten_dense_tensors(tensor *, tensor *tensors_data, int tensors_len);
void atg_flip(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_flip_out(tensor *, tensor out, tensor self, int64_t *dims_data, int dims_len);
void atg_fliplr(tensor *, tensor self);
void atg_flipud(tensor *, tensor self);
void atg_float_power(tensor *, tensor self, tensor exponent);
void atg_float_power_(tensor *, tensor self, scalar exponent);
void atg_float_power_scalar(tensor *, scalar self_scalar, tensor exponent);
void atg_float_power_scalar_out(tensor *, tensor out, scalar self_scalar, tensor exponent);
void atg_float_power_tensor_(tensor *, tensor self, tensor exponent);
void atg_float_power_tensor_scalar(tensor *, tensor self, scalar exponent);
void atg_float_power_tensor_scalar_out(tensor *, tensor out, tensor self, scalar exponent);
void atg_float_power_tensor_tensor_out(tensor *, tensor out, tensor self, tensor exponent);
void atg_floor(tensor *, tensor self);
void atg_floor_(tensor *, tensor self);
void atg_floor_divide(tensor *, tensor self, tensor other);
void atg_floor_divide_(tensor *, tensor self, tensor other);
void atg_floor_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_floor_divide_scalar(tensor *, tensor self, scalar other);
void atg_floor_divide_scalar_(tensor *, tensor self, scalar other);
void atg_floor_divide_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_floor_out(tensor *, tensor out, tensor self);
void atg_fmax(tensor *, tensor self, tensor other);
void atg_fmax_out(tensor *, tensor out, tensor self, tensor other);
void atg_fmin(tensor *, tensor self, tensor other);
void atg_fmin_out(tensor *, tensor out, tensor self, tensor other);
void atg_fmod(tensor *, tensor self, scalar other);
void atg_fmod_(tensor *, tensor self, scalar other);
void atg_fmod_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_fmod_tensor(tensor *, tensor self, tensor other);
void atg_fmod_tensor_(tensor *, tensor self, tensor other);
void atg_fmod_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_frac(tensor *, tensor self);
void atg_frac_(tensor *, tensor self);
void atg_frac_out(tensor *, tensor out, tensor self);
void atg_fractional_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_fractional_max_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool2d_output(tensor *, tensor output, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_fractional_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_fractional_max_pool3d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool3d_output(tensor *, tensor output, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_frexp(tensor *, tensor self);
void atg_frexp_tensor_out(tensor *, tensor mantissa, tensor exponent, tensor self);
void atg_frobenius_norm(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_frobenius_norm_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_from_file(tensor *, char* filename_ptr, int filename_len, int shared, int64_t size_v, uint8_t size_null, int options_kind, int options_device);
void atg_from_file_out(tensor *, tensor out, char* filename_ptr, int filename_len, int shared, int64_t size_v, uint8_t size_null);
void atg_full(tensor *, int64_t *size_data, int size_len, scalar fill_value, int options_kind, int options_device);
void atg_full_like(tensor *, tensor self, scalar fill_value);
void atg_full_like_out(tensor *, tensor out, tensor self, scalar fill_value);
void atg_full_out(tensor *, tensor out, int64_t *size_data, int size_len, scalar fill_value);
void atg_fused_moving_avg_obs_fake_quant(tensor *, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg_gather(tensor *, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gather_backward(tensor *, tensor grad, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gather_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gcd(tensor *, tensor self, tensor other);
void atg_gcd_(tensor *, tensor self, tensor other);
void atg_gcd_out(tensor *, tensor out, tensor self, tensor other);
void atg_ge(tensor *, tensor self, scalar other);
void atg_ge_(tensor *, tensor self, scalar other);
void atg_ge_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_ge_tensor(tensor *, tensor self, tensor other);
void atg_ge_tensor_(tensor *, tensor self, tensor other);
void atg_ge_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_gelu(tensor *, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_(tensor *, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_backward(tensor *, tensor grad_output, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_out(tensor *, tensor out, tensor self, char* approximate_ptr, int approximate_len);
void atg_geometric(tensor *, tensor self, double p);
void atg_geometric_(tensor *, tensor self, double p);
void atg_geometric_out(tensor *, tensor out, tensor self, double p);
void atg_geqrf(tensor *, tensor self);
void atg_geqrf_a(tensor *, tensor a, tensor tau, tensor self);
void atg_ger(tensor *, tensor self, tensor vec2);
void atg_ger_out(tensor *, tensor out, tensor self, tensor vec2);
void atg_glu(tensor *, tensor self, int64_t dim);
void atg_glu_backward(tensor *, tensor grad_output, tensor self, int64_t dim);
void atg_glu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t dim);
void atg_glu_backward_jvp(tensor *, tensor grad_x, tensor grad_glu, tensor x, tensor dgrad_glu, tensor dx, int64_t dim);
void atg_glu_backward_jvp_out(tensor *, tensor out, tensor grad_x, tensor grad_glu, tensor x, tensor dgrad_glu, tensor dx, int64_t dim);
void atg_glu_jvp(tensor *, tensor glu, tensor x, tensor dx, int64_t dim);
void atg_glu_jvp_out(tensor *, tensor out, tensor glu, tensor x, tensor dx, int64_t dim);
void atg_glu_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_grad(tensor *, tensor self);
void atg_greater(tensor *, tensor self, scalar other);
void atg_greater_(tensor *, tensor self, scalar other);
void atg_greater_equal(tensor *, tensor self, scalar other);
void atg_greater_equal_(tensor *, tensor self, scalar other);
void atg_greater_equal_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_greater_equal_tensor(tensor *, tensor self, tensor other);
void atg_greater_equal_tensor_(tensor *, tensor self, tensor other);
void atg_greater_equal_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_greater_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_greater_tensor(tensor *, tensor self, tensor other);
void atg_greater_tensor_(tensor *, tensor self, tensor other);
void atg_greater_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_grid_sampler(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d_out(tensor *, tensor out, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_3d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_3d_out(tensor *, tensor out, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_group_norm(tensor *, tensor input, int64_t num_groups, tensor weight, tensor bias, double eps, int cudnn_enabled);
void atg_gru(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_gru_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_gru_data(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_gt(tensor *, tensor self, scalar other);
void atg_gt_(tensor *, tensor self, scalar other);
void atg_gt_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_gt_tensor(tensor *, tensor self, tensor other);
void atg_gt_tensor_(tensor *, tensor self, tensor other);
void atg_gt_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_hamming_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_hamming_window_out(tensor *, tensor out, int64_t window_length);
void atg_hamming_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_hamming_window_periodic_alpha(tensor *, int64_t window_length, int periodic, double alpha, int options_kind, int options_device);
void atg_hamming_window_periodic_alpha_beta(tensor *, int64_t window_length, int periodic, double alpha, double beta, int options_kind, int options_device);
void atg_hamming_window_periodic_alpha_beta_out(tensor *, tensor out, int64_t window_length, int periodic, double alpha, double beta);
void atg_hamming_window_periodic_alpha_out(tensor *, tensor out, int64_t window_length, int periodic, double alpha);
void atg_hamming_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_hann_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_hann_window_out(tensor *, tensor out, int64_t window_length);
void atg_hann_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_hann_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_hardshrink(tensor *, tensor self);
void atg_hardshrink_backward(tensor *, tensor grad_out, tensor self, scalar lambd);
void atg_hardshrink_backward_grad_input(tensor *, tensor grad_input, tensor grad_out, tensor self, scalar lambd);
void atg_hardshrink_out(tensor *, tensor out, tensor self);
void atg_hardsigmoid(tensor *, tensor self);
void atg_hardsigmoid_(tensor *, tensor self);
void atg_hardsigmoid_backward(tensor *, tensor grad_output, tensor self);
void atg_hardsigmoid_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self);
void atg_hardsigmoid_out(tensor *, tensor out, tensor self);
void atg_hardswish(tensor *, tensor self);
void atg_hardswish_(tensor *, tensor self);
void atg_hardswish_backward(tensor *, tensor grad_output, tensor self);
void atg_hardswish_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg_hardswish_out(tensor *, tensor out, tensor self);
void atg_hardtanh(tensor *, tensor self);
void atg_hardtanh_(tensor *, tensor self);
void atg_hardtanh_backward(tensor *, tensor grad_output, tensor self, scalar min_val, scalar max_val);
void atg_hardtanh_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar min_val, scalar max_val);
void atg_hardtanh_out(tensor *, tensor out, tensor self);
void atg_hash_tensor(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int64_t mode);
void atg_hash_tensor_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int64_t mode);
void atg_heaviside(tensor *, tensor self, tensor values);
void atg_heaviside_(tensor *, tensor self, tensor values);
void atg_heaviside_out(tensor *, tensor out, tensor self, tensor values);
void atg_hinge_embedding_loss(tensor *, tensor self, tensor target, double margin, int64_t reduction);
void atg_histc(tensor *, tensor self, int64_t bins);
void atg_histc_out(tensor *, tensor out, tensor self, int64_t bins);
void atg_histogram(tensor *, tensor self, tensor bins, tensor weight, int density);
void atg_histogram_bin_ct(tensor *, tensor self, int64_t bins, double *range_data, int range_len, tensor weight, int density);
void atg_histogram_bin_ct_out(tensor *, tensor hist, tensor bin_edges, tensor self, int64_t bins, double *range_data, int range_len, tensor weight, int density);
void atg_histogram_bins_tensor_out(tensor *, tensor hist, tensor bin_edges, tensor self, tensor bins, tensor weight, int density);
tensor *atg_hsplit(tensor self, int64_t sections);
tensor *atg_hsplit_array(tensor self, int64_t *indices_data, int indices_len);
void atg_hspmm(tensor *, tensor mat1, tensor mat2);
void atg_hspmm_out(tensor *, tensor out, tensor mat1, tensor mat2);
void atg_hstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_hstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_huber_loss(tensor *, tensor self, tensor target, int64_t reduction, double delta);
void atg_huber_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, double delta);
void atg_huber_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, double delta);
void atg_huber_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction, double delta);
void atg_hypot(tensor *, tensor self, tensor other);
void atg_hypot_(tensor *, tensor self, tensor other);
void atg_hypot_out(tensor *, tensor out, tensor self, tensor other);
void atg_i0(tensor *, tensor self);
void atg_i0_(tensor *, tensor self);
void atg_i0_out(tensor *, tensor out, tensor self);
void atg_igamma(tensor *, tensor self, tensor other);
void atg_igamma_(tensor *, tensor self, tensor other);
void atg_igamma_out(tensor *, tensor out, tensor self, tensor other);
void atg_igammac(tensor *, tensor self, tensor other);
void atg_igammac_(tensor *, tensor self, tensor other);
void atg_igammac_out(tensor *, tensor out, tensor self, tensor other);
void atg_im2col(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_im2col_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_imag(tensor *, tensor self);
void atg_index(tensor *, tensor self, tensor *indices_data, int indices_len);
void atg_index_add(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_add_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_add_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_fill(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_index_fill_(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_index_fill_int_scalar_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value);
void atg_index_fill_int_tensor(tensor *, tensor self, int64_t dim, tensor index, tensor value);
void atg_index_fill_int_tensor_(tensor *, tensor self, int64_t dim, tensor index, tensor value);
void atg_index_fill_int_tensor_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor value);
void atg_index_put(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_put_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_put_out(tensor *, tensor out, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor source, char* reduce_ptr, int reduce_len, int include_self);
void atg_index_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor source, char* reduce_ptr, int reduce_len, int include_self);
void atg_index_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source, char* reduce_ptr, int reduce_len, int include_self);
void atg_index_select(tensor *, tensor self, int64_t dim, tensor index);
void atg_index_select_backward(tensor *, tensor grad, int64_t *self_sizes_data, int self_sizes_len, int64_t dim, tensor index);
void atg_index_select_out(tensor *, tensor out, tensor self, int64_t dim, tensor index);
void atg_index_tensor_out(tensor *, tensor out, tensor self, tensor *indices_data, int indices_len);
void atg_indices(tensor *, tensor self);
void atg_indices_copy(tensor *, tensor self);
void atg_indices_copy_out(tensor *, tensor out, tensor self);
void atg_infinitely_differentiable_gelu_backward(tensor *, tensor grad, tensor self);
void atg_inner(tensor *, tensor self, tensor other);
void atg_inner_out(tensor *, tensor out, tensor self, tensor other);
void atg_instance_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int use_input_stats, double momentum, double eps, int cudnn_enabled);
void atg_int_repr(tensor *, tensor self);
void atg_int_repr_out(tensor *, tensor out, tensor self);
void atg_inverse(tensor *, tensor self);
void atg_inverse_out(tensor *, tensor out, tensor self);
int atg_is_coalesced(tensor self);
int atg_is_complex(tensor self);
int atg_is_conj(tensor self);
int atg_is_distributed(tensor self);
int atg_is_floating_point(tensor self);
int atg_is_inference(tensor self);
int atg_is_leaf(tensor self);
int atg_is_neg(tensor self);
int atg_is_nonzero(tensor self);
int atg_is_pinned(tensor self, int device);
int atg_is_same_size(tensor self, tensor other);
int atg_is_set_to(tensor self, tensor tensor);
int atg_is_signed(tensor self);
int atg_is_vulkan_available();
void atg_isclose(tensor *, tensor self, tensor other, double rtol, double atol, int equal_nan);
void atg_isfinite(tensor *, tensor self);
void atg_isin(tensor *, tensor elements, tensor test_elements, int assume_unique, int invert);
void atg_isin_scalar_tensor(tensor *, scalar element, tensor test_elements, int assume_unique, int invert);
void atg_isin_scalar_tensor_out(tensor *, tensor out, scalar element, tensor test_elements, int assume_unique, int invert);
void atg_isin_tensor_scalar(tensor *, tensor elements, scalar test_element, int assume_unique, int invert);
void atg_isin_tensor_scalar_out(tensor *, tensor out, tensor elements, scalar test_element, int assume_unique, int invert);
void atg_isin_tensor_tensor_out(tensor *, tensor out, tensor elements, tensor test_elements, int assume_unique, int invert);
void atg_isinf(tensor *, tensor self);
void atg_isinf_out(tensor *, tensor out, tensor self);
void atg_isnan(tensor *, tensor self);
void atg_isnan_out(tensor *, tensor out, tensor self);
void atg_isneginf(tensor *, tensor self);
void atg_isneginf_out(tensor *, tensor out, tensor self);
void atg_isposinf(tensor *, tensor self);
void atg_isposinf_out(tensor *, tensor out, tensor self);
void atg_isreal(tensor *, tensor self);
void atg_istft(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int center, int normalized, int onesided, int64_t length_v, uint8_t length_null, int return_complex);
void atg_kaiser_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_kaiser_window_beta(tensor *, int64_t window_length, int periodic, double beta, int options_kind, int options_device);
void atg_kaiser_window_beta_out(tensor *, tensor out, int64_t window_length, int periodic, double beta);
void atg_kaiser_window_out(tensor *, tensor out, int64_t window_length);
void atg_kaiser_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_kaiser_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_kl_div(tensor *, tensor self, tensor target, int64_t reduction, int log_target);
void atg_kron(tensor *, tensor self, tensor other);
void atg_kron_out(tensor *, tensor out, tensor self, tensor other);
void atg_kthvalue(tensor *, tensor self, int64_t k, int64_t dim, int keepdim);
void atg_kthvalue_values(tensor *, tensor values, tensor indices, tensor self, int64_t k, int64_t dim, int keepdim);
void atg_l1_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_layer_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps, int cudnn_enable);
void atg_lcm(tensor *, tensor self, tensor other);
void atg_lcm_(tensor *, tensor self, tensor other);
void atg_lcm_out(tensor *, tensor out, tensor self, tensor other);
void atg_ldexp(tensor *, tensor self, tensor other);
void atg_ldexp_(tensor *, tensor self, tensor other);
void atg_ldexp_out(tensor *, tensor out, tensor self, tensor other);
void atg_le(tensor *, tensor self, scalar other);
void atg_le_(tensor *, tensor self, scalar other);
void atg_le_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_le_tensor(tensor *, tensor self, tensor other);
void atg_le_tensor_(tensor *, tensor self, tensor other);
void atg_le_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_leaky_relu(tensor *, tensor self);
void atg_leaky_relu_(tensor *, tensor self);
void atg_leaky_relu_backward(tensor *, tensor grad_output, tensor self, scalar negative_slope, int self_is_result);
void atg_leaky_relu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar negative_slope, int self_is_result);
void atg_leaky_relu_out(tensor *, tensor out, tensor self);
void atg_lerp(tensor *, tensor self, tensor end, scalar weight);
void atg_lerp_(tensor *, tensor self, tensor end, scalar weight);
void atg_lerp_scalar_out(tensor *, tensor out, tensor self, tensor end, scalar weight);
void atg_lerp_tensor(tensor *, tensor self, tensor end, tensor weight);
void atg_lerp_tensor_(tensor *, tensor self, tensor end, tensor weight);
void atg_lerp_tensor_out(tensor *, tensor out, tensor self, tensor end, tensor weight);
void atg_less(tensor *, tensor self, scalar other);
void atg_less_(tensor *, tensor self, scalar other);
void atg_less_equal(tensor *, tensor self, scalar other);
void atg_less_equal_(tensor *, tensor self, scalar other);
void atg_less_equal_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_less_equal_tensor(tensor *, tensor self, tensor other);
void atg_less_equal_tensor_(tensor *, tensor self, tensor other);
void atg_less_equal_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_less_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_less_tensor(tensor *, tensor self, tensor other);
void atg_less_tensor_(tensor *, tensor self, tensor other);
void atg_less_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_lgamma(tensor *, tensor self);
void atg_lgamma_(tensor *, tensor self);
void atg_lgamma_out(tensor *, tensor out, tensor self);
void atg_lift(tensor *, tensor self);
void atg_lift_fresh(tensor *, tensor self);
void atg_lift_fresh_copy(tensor *, tensor self);
void atg_lift_fresh_copy_out(tensor *, tensor out, tensor self);
void atg_lift_out(tensor *, tensor out, tensor self);
void atg_linalg_cholesky(tensor *, tensor self, int upper);
void atg_linalg_cholesky_ex(tensor *, tensor self, int upper, int check_errors);
void atg_linalg_cholesky_ex_l(tensor *, tensor L, tensor info, tensor self, int upper, int check_errors);
void atg_linalg_cholesky_out(tensor *, tensor out, tensor self, int upper);
void atg_linalg_cond(tensor *, tensor self, scalar p);
void atg_linalg_cond_out(tensor *, tensor out, tensor self, scalar p);
void atg_linalg_cond_p_str(tensor *, tensor self, char* p_ptr, int p_len);
void atg_linalg_cond_p_str_out(tensor *, tensor out, tensor self, char* p_ptr, int p_len);
void atg_linalg_cross(tensor *, tensor self, tensor other, int64_t dim);
void atg_linalg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim);
void atg_linalg_det(tensor *, tensor A);
void atg_linalg_det_out(tensor *, tensor out, tensor A);
void atg_linalg_diagonal(tensor *, tensor A, int64_t offset, int64_t dim1, int64_t dim2);
void atg_linalg_eig(tensor *, tensor self);
void atg_linalg_eig_out(tensor *, tensor eigenvalues, tensor eigenvectors, tensor self);
void atg_linalg_eigh(tensor *, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_eigh_eigvals(tensor *, tensor eigvals, tensor eigvecs, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_eigvals(tensor *, tensor self);
void atg_linalg_eigvals_out(tensor *, tensor out, tensor self);
void atg_linalg_eigvalsh(tensor *, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_eigvalsh_out(tensor *, tensor out, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_householder_product(tensor *, tensor input, tensor tau);
void atg_linalg_householder_product_out(tensor *, tensor out, tensor input, tensor tau);
void atg_linalg_inv(tensor *, tensor A);
void atg_linalg_inv_ex(tensor *, tensor A, int check_errors);
void atg_linalg_inv_ex_inverse(tensor *, tensor inverse, tensor info, tensor A, int check_errors);
void atg_linalg_inv_out(tensor *, tensor out, tensor A);
void atg_linalg_ldl_factor(tensor *, tensor self, int hermitian);
void atg_linalg_ldl_factor_ex(tensor *, tensor self, int hermitian, int check_errors);
void atg_linalg_ldl_factor_ex_out(tensor *, tensor LD, tensor pivots, tensor info, tensor self, int hermitian, int check_errors);
void atg_linalg_ldl_factor_out(tensor *, tensor LD, tensor pivots, tensor self, int hermitian);
void atg_linalg_ldl_solve(tensor *, tensor LD, tensor pivots, tensor B, int hermitian);
void atg_linalg_ldl_solve_out(tensor *, tensor out, tensor LD, tensor pivots, tensor B, int hermitian);
void atg_linalg_lstsq(tensor *, tensor self, tensor b, double rcond_v, uint8_t rcond_null, char* driver_ptr, int driver_len);
void atg_linalg_lstsq_out(tensor *, tensor solution, tensor residuals, tensor rank, tensor singular_values, tensor self, tensor b, double rcond_v, uint8_t rcond_null, char* driver_ptr, int driver_len);
void atg_linalg_lu(tensor *, tensor A, int pivot);
void atg_linalg_lu_factor(tensor *, tensor A, int pivot);
void atg_linalg_lu_factor_ex(tensor *, tensor A, int pivot, int check_errors);
void atg_linalg_lu_factor_ex_out(tensor *, tensor LU, tensor pivots, tensor info, tensor A, int pivot, int check_errors);
void atg_linalg_lu_factor_out(tensor *, tensor LU, tensor pivots, tensor A, int pivot);
void atg_linalg_lu_out(tensor *, tensor P, tensor L, tensor U, tensor A, int pivot);
void atg_linalg_lu_solve(tensor *, tensor LU, tensor pivots, tensor B, int left, int adjoint);
void atg_linalg_lu_solve_out(tensor *, tensor out, tensor LU, tensor pivots, tensor B, int left, int adjoint);
void atg_linalg_matmul(tensor *, tensor self, tensor other);
void atg_linalg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_linalg_matrix_exp(tensor *, tensor self);
void atg_linalg_matrix_exp_out(tensor *, tensor out, tensor self);
void atg_linalg_matrix_power(tensor *, tensor self, int64_t n);
void atg_linalg_matrix_power_out(tensor *, tensor out, tensor self, int64_t n);
void atg_linalg_matrix_rank(tensor *, tensor self, double tol, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_float(tensor *, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_float_out(tensor *, tensor out, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_tensor(tensor *, tensor input, tensor atol, tensor rtol, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_tensor_out(tensor *, tensor out, tensor input, tensor atol, tensor rtol, int hermitian);
void atg_linalg_matrix_rank_out(tensor *, tensor out, tensor self, double tol, int hermitian);
void atg_linalg_matrix_rank_out_tol_tensor(tensor *, tensor out, tensor input, tensor tol, int hermitian);
void atg_linalg_matrix_rank_tol_tensor(tensor *, tensor input, tensor tol, int hermitian);
void atg_linalg_multi_dot(tensor *, tensor *tensors_data, int tensors_len);
void atg_linalg_multi_dot_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_linalg_norm(tensor *, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_ord_str(tensor *, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_ord_str_out(tensor *, tensor out, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_out(tensor *, tensor out, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_pinv(tensor *, tensor self, double rcond, int hermitian);
void atg_linalg_pinv_atol_rtol_float(tensor *, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_pinv_atol_rtol_float_out(tensor *, tensor out, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_pinv_atol_rtol_tensor(tensor *, tensor self, tensor atol, tensor rtol, int hermitian);
void atg_linalg_pinv_atol_rtol_tensor_out(tensor *, tensor out, tensor self, tensor atol, tensor rtol, int hermitian);
void atg_linalg_pinv_out(tensor *, tensor out, tensor self, double rcond, int hermitian);
void atg_linalg_pinv_out_rcond_tensor(tensor *, tensor out, tensor self, tensor rcond, int hermitian);
void atg_linalg_pinv_rcond_tensor(tensor *, tensor self, tensor rcond, int hermitian);
void atg_linalg_qr(tensor *, tensor A, char* mode_ptr, int mode_len);
void atg_linalg_qr_out(tensor *, tensor Q, tensor R, tensor A, char* mode_ptr, int mode_len);
void atg_linalg_slogdet(tensor *, tensor A);
void atg_linalg_slogdet_out(tensor *, tensor sign, tensor logabsdet, tensor A);
void atg_linalg_solve(tensor *, tensor A, tensor B, int left);
void atg_linalg_solve_ex(tensor *, tensor A, tensor B, int left, int check_errors);
void atg_linalg_solve_ex_out(tensor *, tensor result, tensor info, tensor A, tensor B, int left, int check_errors);
void atg_linalg_solve_out(tensor *, tensor out, tensor A, tensor B, int left);
void atg_linalg_solve_triangular(tensor *, tensor self, tensor B, int upper, int left, int unitriangular);
void atg_linalg_solve_triangular_out(tensor *, tensor out, tensor self, tensor B, int upper, int left, int unitriangular);
void atg_linalg_svd(tensor *, tensor A, int full_matrices, char* driver_ptr, int driver_len);
void atg_linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor A, int full_matrices, char* driver_ptr, int driver_len);
void atg_linalg_svdvals(tensor *, tensor A, char* driver_ptr, int driver_len);
void atg_linalg_svdvals_out(tensor *, tensor out, tensor A, char* driver_ptr, int driver_len);
void atg_linalg_tensorinv(tensor *, tensor self, int64_t ind);
void atg_linalg_tensorinv_out(tensor *, tensor out, tensor self, int64_t ind);
void atg_linalg_tensorsolve(tensor *, tensor self, tensor other, int64_t *dims_data, int dims_len);
void atg_linalg_tensorsolve_out(tensor *, tensor out, tensor self, tensor other, int64_t *dims_data, int dims_len);
void atg_linalg_vander(tensor *, tensor x, int64_t n_v, uint8_t n_null);
void atg_linalg_vecdot(tensor *, tensor x, tensor y, int64_t dim);
void atg_linalg_vecdot_out(tensor *, tensor out, tensor x, tensor y, int64_t dim);
void atg_linear(tensor *, tensor input, tensor weight, tensor bias);
void atg_linear_out(tensor *, tensor out, tensor input, tensor weight, tensor bias);
void atg_linspace(tensor *, scalar start, scalar end, int64_t steps, int options_kind, int options_device);
void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps);
void atg_linspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, int options_kind, int options_device);
void atg_linspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps);
void atg_linspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, int options_kind, int options_device);
void atg_linspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps);
void atg_linspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, int options_kind, int options_device);
void atg_linspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps);
void atg_log(tensor *, tensor self);
void atg_log10(tensor *, tensor self);
void atg_log10_(tensor *, tensor self);
void atg_log10_out(tensor *, tensor out, tensor self);
void atg_log1p(tensor *, tensor self);
void atg_log1p_(tensor *, tensor self);
void atg_log1p_out(tensor *, tensor out, tensor self);
void atg_log2(tensor *, tensor self);
void atg_log2_(tensor *, tensor self);
void atg_log2_out(tensor *, tensor out, tensor self);
void atg_log_(tensor *, tensor self);
void atg_log_normal(tensor *, tensor self, double mean, double std);
void atg_log_normal_(tensor *, tensor self, double mean, double std);
void atg_log_normal_out(tensor *, tensor out, tensor self, double mean, double std);
void atg_log_out(tensor *, tensor out, tensor self);
void atg_log_sigmoid(tensor *, tensor self);
void atg_log_sigmoid_backward(tensor *, tensor grad_output, tensor self, tensor buffer);
void atg_log_sigmoid_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor buffer);
void atg_log_sigmoid_out(tensor *, tensor out, tensor self);
void atg_log_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_log_softmax_int_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_logaddexp(tensor *, tensor self, tensor other);
void atg_logaddexp2(tensor *, tensor self, tensor other);
void atg_logaddexp2_out(tensor *, tensor out, tensor self, tensor other);
void atg_logaddexp_out(tensor *, tensor out, tensor self, tensor other);
void atg_logcumsumexp(tensor *, tensor self, int64_t dim);
void atg_logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_logdet(tensor *, tensor self);
void atg_logical_and(tensor *, tensor self, tensor other);
void atg_logical_and_(tensor *, tensor self, tensor other);
void atg_logical_and_out(tensor *, tensor out, tensor self, tensor other);
void atg_logical_not(tensor *, tensor self);
void atg_logical_not_(tensor *, tensor self);
void atg_logical_not_out(tensor *, tensor out, tensor self);
void atg_logical_or(tensor *, tensor self, tensor other);
void atg_logical_or_(tensor *, tensor self, tensor other);
void atg_logical_or_out(tensor *, tensor out, tensor self, tensor other);
void atg_logical_xor(tensor *, tensor self, tensor other);
void atg_logical_xor_(tensor *, tensor self, tensor other);
void atg_logical_xor_out(tensor *, tensor out, tensor self, tensor other);
void atg_logit(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward(tensor *, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
void atg_logspace(tensor *, scalar start, scalar end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps, double base);
void atg_logspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps, double base);
void atg_logspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps, double base);
void atg_logspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps, double base);
void atg_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_lstm_cell(tensor *, tensor input, tensor *hx_data, int hx_len, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_lstm_data(tensor *, tensor data, tensor batch_sizes, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_lstm_mps_backward(tensor out0, tensor *out1_data, int out1_len, tensor *out2_data, int out2_len, tensor grad_y, tensor grad_hy, tensor grad_cy, tensor z_state, tensor cell_state_fwd, tensor input, tensor layersOutputs, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_lt(tensor *, tensor self, scalar other);
void atg_lt_(tensor *, tensor self, scalar other);
void atg_lt_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_lt_tensor(tensor *, tensor self, tensor other);
void atg_lt_tensor_(tensor *, tensor self, tensor other);
void atg_lt_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_lu_solve(tensor *, tensor self, tensor LU_data, tensor LU_pivots);
void atg_lu_solve_out(tensor *, tensor out, tensor self, tensor LU_data, tensor LU_pivots);
void atg_lu_unpack(tensor *, tensor LU_data, tensor LU_pivots, int unpack_data, int unpack_pivots);
void atg_lu_unpack_out(tensor *, tensor P, tensor L, tensor U, tensor LU_data, tensor LU_pivots, int unpack_data, int unpack_pivots);
void atg_margin_ranking_loss(tensor *, tensor input1, tensor input2, tensor target, double margin, int64_t reduction);
void atg_masked_fill(tensor *, tensor self, tensor mask, scalar value);
void atg_masked_fill_(tensor *, tensor self, tensor mask, scalar value);
void atg_masked_fill_scalar_out(tensor *, tensor out, tensor self, tensor mask, scalar value);
void atg_masked_fill_tensor(tensor *, tensor self, tensor mask, tensor value);
void atg_masked_fill_tensor_(tensor *, tensor self, tensor mask, tensor value);
void atg_masked_fill_tensor_out(tensor *, tensor out, tensor self, tensor mask, tensor value);
void atg_masked_scatter(tensor *, tensor self, tensor mask, tensor source);
void atg_masked_scatter_(tensor *, tensor self, tensor mask, tensor source);
void atg_masked_scatter_backward(tensor *, tensor grad_output, tensor mask, int64_t *sizes_data, int sizes_len);
void atg_masked_scatter_out(tensor *, tensor out, tensor self, tensor mask, tensor source);
void atg_masked_select(tensor *, tensor self, tensor mask);
void atg_masked_select_backward(tensor *, tensor grad, tensor input, tensor mask);
void atg_masked_select_out(tensor *, tensor out, tensor self, tensor mask);
void atg_matmul(tensor *, tensor self, tensor other);
void atg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_matrix_exp(tensor *, tensor self);
void atg_matrix_exp_backward(tensor *, tensor self, tensor grad);
void atg_matrix_h(tensor *, tensor self);
void atg_matrix_power(tensor *, tensor self, int64_t n);
void atg_matrix_power_out(tensor *, tensor out, tensor self, int64_t n);
void atg_max(tensor *, tensor self);
void atg_max_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_max_dim_max(tensor *, tensor max, tensor max_values, tensor self, int64_t dim, int keepdim);
void atg_max_other(tensor *, tensor self, tensor other);
void atg_max_out(tensor *, tensor out, tensor self, tensor other);
void atg_max_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool1d_with_indices(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_with_indices(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_with_indices_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool2d_with_indices_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool2d_with_indices_out(tensor *, tensor out, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool3d_with_indices(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool3d_with_indices_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool3d_with_indices_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool3d_with_indices_out(tensor *, tensor out, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_unary_out(tensor *, tensor out, tensor self);
void atg_max_unpool2d(tensor *, tensor self, tensor indices, int64_t *output_size_data, int output_size_len);
void atg_max_unpool2d_out(tensor *, tensor out, tensor self, tensor indices, int64_t *output_size_data, int output_size_len);
void atg_max_unpool3d(tensor *, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_max_unpool3d_out(tensor *, tensor out, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_maximum(tensor *, tensor self, tensor other);
void atg_maximum_out(tensor *, tensor out, tensor self, tensor other);
void atg_mean(tensor *, tensor self, int dtype);
void atg_mean_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_mean_dtype_out(tensor *, tensor out, tensor self, int dtype);
void atg_mean_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_median(tensor *, tensor self);
void atg_median_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_median_dim_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_median_out(tensor *, tensor out, tensor self);
tensor *atg_meshgrid(tensor *tensors_data, int tensors_len);
tensor *atg_meshgrid_indexing(tensor *tensors_data, int tensors_len, char* indexing_ptr, int indexing_len);
void atg_mh(tensor *, tensor self);
void atg_min(tensor *, tensor self);
void atg_min_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_min_dim_min(tensor *, tensor min, tensor min_indices, tensor self, int64_t dim, int keepdim);
void atg_min_other(tensor *, tensor self, tensor other);
void atg_min_out(tensor *, tensor out, tensor self, tensor other);
void atg_min_unary_out(tensor *, tensor out, tensor self);
void atg_minimum(tensor *, tensor self, tensor other);
void atg_minimum_out(tensor *, tensor out, tensor self, tensor other);
void atg_miopen_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_miopen_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon);
void atg_miopen_batch_norm_backward_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon);
void atg_miopen_batch_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_miopen_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_add_relu(tensor *, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_miopen_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_relu(tensor *, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_miopen_convolution_transpose(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_transpose_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg_miopen_ctc_loss_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg_miopen_ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int deterministic, int zero_infinity);
void atg_miopen_depthwise_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_depthwise_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_rnn(tensor *, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg_miopen_rnn_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg_mish(tensor *, tensor self);
void atg_mish_(tensor *, tensor self);
void atg_mish_backward(tensor *, tensor grad_output, tensor self);
void atg_mish_out(tensor *, tensor out, tensor self);
void atg_mkldnn_adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_mkldnn_adaptive_avg_pool2d_backward(tensor *, tensor grad_output, tensor self);
void atg_mkldnn_adaptive_avg_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg_mkldnn_adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_mkldnn_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mkldnn_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mkldnn_linear(tensor *, tensor self, tensor weight, tensor bias);
void atg_mkldnn_linear_backward_input(tensor *, int64_t *input_size_data, int input_size_len, tensor grad_output, tensor weight);
void atg_mkldnn_linear_backward_input_out(tensor *, tensor out, int64_t *input_size_data, int input_size_len, tensor grad_output, tensor weight);
void atg_mkldnn_linear_backward_weights(tensor *, tensor grad_output, tensor input, tensor weight, int bias_defined);
void atg_mkldnn_linear_backward_weights_out(tensor *, tensor out0, tensor out1, tensor grad_output, tensor input, tensor weight, int bias_defined);
void atg_mkldnn_linear_out(tensor *, tensor out, tensor self, tensor weight, tensor bias);
void atg_mkldnn_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool2d_backward(tensor *, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d_backward(tensor *, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_reorder_conv2d_weight(tensor *, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_reorder_conv2d_weight_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_reorder_conv3d_weight(tensor *, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_reorder_conv3d_weight_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_rnn_layer(tensor *, tensor input, tensor weight0, tensor weight1, tensor weight2, tensor weight3, tensor hx_, tensor cx_, int reverse, int64_t *batch_sizes_data, int batch_sizes_len, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int bidirectional, int batch_first, int train);
void atg_mkldnn_rnn_layer_backward(tensor *, tensor input, tensor weight1, tensor weight2, tensor weight3, tensor weight4, tensor hx_, tensor cx_tmp, tensor output, tensor hy_, tensor cy_, tensor grad_output, tensor grad_hy, tensor grad_cy, int reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, int batch_first, tensor workspace);
void atg_mkldnn_rnn_layer_backward_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor out5, tensor out6, tensor input, tensor weight1, tensor weight2, tensor weight3, tensor weight4, tensor hx_, tensor cx_tmp, tensor output, tensor hy_, tensor cy_, tensor grad_output, tensor grad_hy, tensor grad_cy, int reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, int batch_first, tensor workspace);
void atg_mkldnn_rnn_layer_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor input, tensor weight0, tensor weight1, tensor weight2, tensor weight3, tensor hx_, tensor cx_, int reverse, int64_t *batch_sizes_data, int batch_sizes_len, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int bidirectional, int batch_first, int train);
void atg_mm(tensor *, tensor self, tensor mat2);
void atg_mm_dtype(tensor *, tensor self, tensor mat2, int out_dtype);
void atg_mm_dtype_out(tensor *, tensor out, tensor self, tensor mat2, int out_dtype);
void atg_mm_out(tensor *, tensor out, tensor self, tensor mat2);
void atg_mode(tensor *, tensor self, int64_t dim, int keepdim);
void atg_mode_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_moveaxis(tensor *, tensor self, int64_t *source_data, int source_len, int64_t *destination_data, int destination_len);
void atg_moveaxis_int(tensor *, tensor self, int64_t source, int64_t destination);
void atg_movedim(tensor *, tensor self, int64_t *source_data, int source_len, int64_t *destination_data, int destination_len);
void atg_movedim_int(tensor *, tensor self, int64_t source, int64_t destination);
void atg_mse_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_msort(tensor *, tensor self);
void atg_msort_out(tensor *, tensor out, tensor self);
void atg_mt(tensor *, tensor self);
void atg_mul(tensor *, tensor self, tensor other);
void atg_mul_(tensor *, tensor self, tensor other);
void atg_mul_out(tensor *, tensor out, tensor self, tensor other);
void atg_mul_scalar(tensor *, tensor self, scalar other);
void atg_mul_scalar_(tensor *, tensor self, scalar other);
void atg_mul_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_multi_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, scalar p, scalar margin, tensor weight, int64_t reduction);
void atg_multi_margin_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, scalar p, scalar margin, tensor weight, int64_t reduction);
void atg_multilabel_margin_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_multilabel_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, tensor is_target);
void atg_multilabel_margin_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, tensor is_target);
void atg_multilabel_margin_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_multinomial(tensor *, tensor self, int64_t num_samples, int replacement);
void atg_multinomial_out(tensor *, tensor out, tensor self, int64_t num_samples, int replacement);
void atg_multiply(tensor *, tensor self, tensor other);
void atg_multiply_(tensor *, tensor self, tensor other);
void atg_multiply_out(tensor *, tensor out, tensor self, tensor other);
void atg_multiply_scalar(tensor *, tensor self, scalar other);
void atg_multiply_scalar_(tensor *, tensor self, scalar other);
void atg_mv(tensor *, tensor self, tensor vec);
void atg_mv_out(tensor *, tensor out, tensor self, tensor vec);
void atg_mvlgamma(tensor *, tensor self, int64_t p);
void atg_mvlgamma_(tensor *, tensor self, int64_t p);
void atg_mvlgamma_out(tensor *, tensor out, tensor self, int64_t p);
void atg_nan_to_num(tensor *, tensor self, double nan_v, uint8_t nan_null, double posinf_v, uint8_t posinf_null, double neginf_v, uint8_t neginf_null);
void atg_nan_to_num_(tensor *, tensor self, double nan_v, uint8_t nan_null, double posinf_v, uint8_t posinf_null, double neginf_v, uint8_t neginf_null);
void atg_nan_to_num_out(tensor *, tensor out, tensor self, double nan_v, uint8_t nan_null, double posinf_v, uint8_t posinf_null, double neginf_v, uint8_t neginf_null);
void atg_nanmean(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nanmean_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nanmedian(tensor *, tensor self);
void atg_nanmedian_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_nanmedian_dim_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_nanmedian_out(tensor *, tensor out, tensor self);
void atg_nanquantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nansum(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nansum_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_narrow(tensor *, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow_copy(tensor *, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow_copy_out(tensor *, tensor out, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow_tensor(tensor *, tensor self, int64_t dim, tensor start, int64_t length);
void atg_native_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_batch_norm_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_channel_shuffle(tensor *, tensor self, int64_t groups);
void atg_native_dropout(tensor *, tensor input, double p, int train);
void atg_native_dropout_backward(tensor *, tensor grad_output, tensor mask, double scale);
void atg_native_dropout_backward_out(tensor *, tensor out, tensor grad_output, tensor mask, double scale);
void atg_native_dropout_out(tensor *, tensor out0, tensor out1, tensor input, double p, int train);
void atg_native_group_norm(tensor *, tensor input, tensor weight, tensor bias, int64_t n, int64_t C, int64_t HxW, int64_t group, double eps);
void atg_native_group_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor weight, tensor bias, int64_t n, int64_t C, int64_t HxW, int64_t group, double eps);
void atg_native_layer_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps);
void atg_native_layer_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps);
void atg_native_norm(tensor *, tensor self);
void atg_native_norm_out(tensor *, tensor out, tensor self);
void atg_native_norm_scalaropt_dim_dtype(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_native_norm_scalaropt_dim_dtype_out(tensor *, tensor out, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_ne(tensor *, tensor self, scalar other);
void atg_ne_(tensor *, tensor self, scalar other);
void atg_ne_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_ne_tensor(tensor *, tensor self, tensor other);
void atg_ne_tensor_(tensor *, tensor self, tensor other);
void atg_ne_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_neg(tensor *, tensor self);
void atg_neg_(tensor *, tensor self);
void atg_neg_out(tensor *, tensor out, tensor self);
void atg_negative(tensor *, tensor self);
void atg_negative_(tensor *, tensor self);
void atg_negative_out(tensor *, tensor out, tensor self);
void atg_nested_to_padded_tensor(tensor *, tensor self, double padding, int64_t *output_size_data, int output_size_len);
void atg_new_empty(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_empty_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_new_empty_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int options_kind, int options_device);
void atg_new_empty_strided_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg_new_full(tensor *, tensor self, int64_t *size_data, int size_len, scalar fill_value, int options_kind, int options_device);
void atg_new_full_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, scalar fill_value);
void atg_new_ones(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_ones_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_new_zeros(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_zeros_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_nextafter(tensor *, tensor self, tensor other);
void atg_nextafter_(tensor *, tensor self, tensor other);
void atg_nextafter_out(tensor *, tensor out, tensor self, tensor other);
void atg_nll_loss(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss2d(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss2d_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss2d_out(tensor *, tensor out, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss_nd(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss_out(tensor *, tensor out, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nonzero(tensor *, tensor self);
tensor *atg_nonzero_numpy(tensor self);
void atg_nonzero_out(tensor *, tensor out, tensor self);
void atg_nonzero_static(tensor *, tensor self, int64_t size, int64_t fill_value);
void atg_nonzero_static_out(tensor *, tensor out, tensor self, int64_t size, int64_t fill_value);
void atg_norm(tensor *, tensor self);
void atg_norm_dtype_out(tensor *, tensor out, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_norm_except_dim(tensor *, tensor v, int64_t pow, int64_t dim);
void atg_norm_out(tensor *, tensor out, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim);
void atg_norm_scalar_out(tensor *, tensor out, tensor self);
void atg_norm_scalaropt_dim(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim);
void atg_norm_scalaropt_dim_dtype(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_norm_scalaropt_dtype(tensor *, tensor self, scalar p, int dtype);
void atg_norm_scalaropt_dtype_out(tensor *, tensor out, tensor self, scalar p, int dtype);
void atg_normal_(tensor *, tensor self, double mean, double std);
void atg_normal_functional(tensor *, tensor self, double mean, double std);
void atg_not_equal(tensor *, tensor self, scalar other);
void atg_not_equal_(tensor *, tensor self, scalar other);
void atg_not_equal_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_not_equal_tensor(tensor *, tensor self, tensor other);
void atg_not_equal_tensor_(tensor *, tensor self, tensor other);
void atg_not_equal_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_nuclear_norm(tensor *, tensor self, int keepdim);
void atg_nuclear_norm_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_nuclear_norm_dim_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_nuclear_norm_out(tensor *, tensor out, tensor self, int keepdim);
void atg_numpy_t(tensor *, tensor self);
void atg_one_hot(tensor *, tensor self, int64_t num_classes);
void atg_ones(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_ones_like(tensor *, tensor self);
void atg_ones_like_out(tensor *, tensor out, tensor self);
void atg_ones_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_orgqr(tensor *, tensor self, tensor input2);
void atg_orgqr_out(tensor *, tensor out, tensor self, tensor input2);
void atg_ormqr(tensor *, tensor self, tensor input2, tensor input3, int left, int transpose);
void atg_ormqr_out(tensor *, tensor out, tensor self, tensor input2, tensor input3, int left, int transpose);
void atg_outer(tensor *, tensor self, tensor vec2);
void atg_outer_out(tensor *, tensor out, tensor self, tensor vec2);
int64_t atg_output_nr(tensor self);
void atg_pad(tensor *, tensor self, int64_t *pad_data, int pad_len, char* mode_ptr, int mode_len, double value_v, uint8_t value_null);
void atg_pad_sequence(tensor *, tensor *sequences_data, int sequences_len, int batch_first, double padding_value, char* padding_side_ptr, int padding_side_len);
void atg_pairwise_distance(tensor *, tensor x1, tensor x2, double p, double eps, int keepdim);
void atg_pdist(tensor *, tensor self, double p);
void atg_permute(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_permute_copy(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_permute_copy_out(tensor *, tensor out, tensor self, int64_t *dims_data, int dims_len);
void atg_pin_memory(tensor *, tensor self, int device);
void atg_pinverse(tensor *, tensor self, double rcond);
void atg_pixel_shuffle(tensor *, tensor self, int64_t upscale_factor);
void atg_pixel_shuffle_out(tensor *, tensor out, tensor self, int64_t upscale_factor);
void atg_pixel_unshuffle(tensor *, tensor self, int64_t downscale_factor);
void atg_pixel_unshuffle_out(tensor *, tensor out, tensor self, int64_t downscale_factor);
void atg_poisson(tensor *, tensor self);
void atg_poisson_nll_loss(tensor *, tensor input, tensor target, int log_input, int full, double eps, int64_t reduction);
void atg_poisson_out(tensor *, tensor out, tensor self);
void atg_polar(tensor *, tensor abs, tensor angle);
void atg_polar_out(tensor *, tensor out, tensor abs, tensor angle);
void atg_polygamma(tensor *, int64_t n, tensor self);
void atg_polygamma_(tensor *, tensor self, int64_t n);
void atg_polygamma_out(tensor *, tensor out, int64_t n, tensor self);
void atg_positive(tensor *, tensor self);
void atg_pow(tensor *, tensor self, tensor exponent);
void atg_pow_(tensor *, tensor self, scalar exponent);
void atg_pow_scalar(tensor *, scalar self_scalar, tensor exponent);
void atg_pow_scalar_out(tensor *, tensor out, scalar self_scalar, tensor exponent);
void atg_pow_tensor_(tensor *, tensor self, tensor exponent);
void atg_pow_tensor_scalar(tensor *, tensor self, scalar exponent);
void atg_pow_tensor_scalar_out(tensor *, tensor out, tensor self, scalar exponent);
void atg_pow_tensor_tensor_out(tensor *, tensor out, tensor self, tensor exponent);
void atg_prelu(tensor *, tensor self, tensor weight);
void atg_prod(tensor *, tensor self, int dtype);
void atg_prod_dim_int(tensor *, tensor self, int64_t dim, int keepdim, int dtype);
void atg_prod_int_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim, int dtype);
void atg_prod_out(tensor *, tensor out, tensor self, int dtype);
void atg_put(tensor *, tensor self, tensor index, tensor source, int accumulate);
void atg_put_(tensor *, tensor self, tensor index, tensor source, int accumulate);
void atg_put_out(tensor *, tensor out, tensor self, tensor index, tensor source, int accumulate);
int64_t atg_q_per_channel_axis(tensor self);
void atg_q_per_channel_scales(tensor *, tensor self);
void atg_q_per_channel_scales_out(tensor *, tensor out, tensor self);
void atg_q_per_channel_zero_points(tensor *, tensor self);
void atg_q_per_channel_zero_points_out(tensor *, tensor out, tensor self);
double atg_q_scale(tensor self);
int64_t atg_q_zero_point(tensor self);
void atg_qr(tensor *, tensor self, int some);
void atg_qr_q(tensor *, tensor Q, tensor R, tensor self, int some);
void atg_quantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantize_per_channel(tensor *, tensor self, tensor scales, tensor zero_points, int64_t axis, int dtype);
void atg_quantize_per_channel_out(tensor *, tensor out, tensor self, tensor scales, tensor zero_points, int64_t axis, int dtype);
void atg_quantize_per_tensor(tensor *, tensor self, double scale, int64_t zero_point, int dtype);
void atg_quantize_per_tensor_dynamic(tensor *, tensor self, int dtype, int reduce_range);
void atg_quantize_per_tensor_dynamic_out(tensor *, tensor out, tensor self, int dtype, int reduce_range);
void atg_quantize_per_tensor_out(tensor *, tensor out, tensor self, double scale, int64_t zero_point, int dtype);
void atg_quantize_per_tensor_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, int dtype);
void atg_quantize_per_tensor_tensor_qparams_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int dtype);
tensor *atg_quantize_per_tensor_tensors(tensor *tensors_data, int tensors_len, tensor scales, tensor zero_points, int dtype);
void atg_quantize_per_tensor_tensors_out(tensor *out_data, int out_len, tensor *tensors_data, int tensors_len, tensor scales, tensor zero_points, int dtype);
void atg_quantized_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor var, double eps, double output_scale, int64_t output_zero_point);
void atg_quantized_batch_norm_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, tensor mean, tensor var, double eps, double output_scale, int64_t output_zero_point);
void atg_quantized_gru_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_lstm_cell(tensor *, tensor input, tensor *hx_data, int hx_len, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_max_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool1d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_rnn_relu_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_rnn_tanh_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_rad2deg(tensor *, tensor self);
void atg_rad2deg_(tensor *, tensor self);
void atg_rad2deg_out(tensor *, tensor out, tensor self);
void atg_rand(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_rand_like(tensor *, tensor self);
void atg_rand_like_out(tensor *, tensor out, tensor self);
void atg_rand_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_randint(tensor *, int64_t high, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_randint_like(tensor *, tensor self, int64_t high);
void atg_randint_like_low_dtype(tensor *, tensor self, int64_t low, int64_t high);
void atg_randint_like_low_dtype_out(tensor *, tensor out, tensor self, int64_t low, int64_t high);
void atg_randint_like_out(tensor *, tensor out, tensor self, int64_t high);
void atg_randint_like_tensor(tensor *, tensor self, tensor high);
void atg_randint_like_tensor_out(tensor *, tensor out, tensor self, tensor high);
void atg_randint_low(tensor *, int64_t low, int64_t high, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_randint_low_out(tensor *, tensor out, int64_t low, int64_t high, int64_t *size_data, int size_len);
void atg_randint_out(tensor *, tensor out, int64_t high, int64_t *size_data, int size_len);
void atg_randn(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_randn_like(tensor *, tensor self);
void atg_randn_like_out(tensor *, tensor out, tensor self);
void atg_randn_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_random(tensor *, tensor self);
void atg_random_(tensor *, tensor self);
void atg_random_from(tensor *, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_random_from_(tensor *, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_random_from_out(tensor *, tensor out, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_random_out(tensor *, tensor out, tensor self);
void atg_random_to(tensor *, tensor self, int64_t to);
void atg_random_to_(tensor *, tensor self, int64_t to);
void atg_random_to_out(tensor *, tensor out, tensor self, int64_t to);
void atg_randperm(tensor *, int64_t n, int options_kind, int options_device);
void atg_randperm_out(tensor *, tensor out, int64_t n);
void atg_range(tensor *, scalar start, scalar end, int options_kind, int options_device);
void atg_range_out(tensor *, tensor out, scalar start, scalar end);
void atg_range_out_(tensor *, tensor out, scalar start, scalar end);
void atg_range_step(tensor *, scalar start, scalar end, int options_kind, int options_device);
void atg_ravel(tensor *, tensor self);
void atg_real(tensor *, tensor self);
void atg_reciprocal(tensor *, tensor self);
void atg_reciprocal_(tensor *, tensor self);
void atg_reciprocal_out(tensor *, tensor out, tensor self);
void atg_reflection_pad1d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad1d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad1d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_relu(tensor *, tensor self);
void atg_relu6(tensor *, tensor self);
void atg_relu6_(tensor *, tensor self);
void atg_relu_(tensor *, tensor self);
void atg_relu_out(tensor *, tensor out, tensor self);
void atg_remainder(tensor *, tensor self, scalar other);
void atg_remainder_(tensor *, tensor self, scalar other);
void atg_remainder_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_remainder_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_remainder_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_remainder_tensor(tensor *, tensor self, tensor other);
void atg_remainder_tensor_(tensor *, tensor self, tensor other);
void atg_remainder_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_renorm(tensor *, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_renorm_(tensor *, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_renorm_out(tensor *, tensor out, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_repeat(tensor *, tensor self, int64_t *repeats_data, int repeats_len);
void atg_repeat_interleave(tensor *, tensor repeats, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_interleave_self_int(tensor *, tensor self, int64_t repeats, int64_t dim_v, uint8_t dim_null, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_interleave_self_tensor(tensor *, tensor self, tensor repeats, int64_t dim_v, uint8_t dim_null, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_interleave_tensor_out(tensor *, tensor out, tensor repeats, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_out(tensor *, tensor out, tensor self, int64_t *repeats_data, int repeats_len);
void atg_replication_pad1d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_requires_grad_(tensor *, tensor self, int requires_grad);
void atg_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg_reshape_as(tensor *, tensor self, tensor other);
void atg_resize(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_resize_(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_resize_as(tensor *, tensor self, tensor the_template);
void atg_resize_as_(tensor *, tensor self, tensor the_template);
void atg_resize_as_out(tensor *, tensor out, tensor self, tensor the_template);
void atg_resize_as_sparse(tensor *, tensor self, tensor the_template);
void atg_resize_as_sparse_(tensor *, tensor self, tensor the_template);
void atg_resize_as_sparse_out(tensor *, tensor out, tensor self, tensor the_template);
void atg_resize_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_resolve_conj(tensor *, tensor self);
void atg_resolve_neg(tensor *, tensor self);
int atg_retains_grad(tensor self);
void atg_rms_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, double eps_v, uint8_t eps_null);
void atg_rnn_relu(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_rnn_relu_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_rnn_relu_data(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_rnn_tanh(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_rnn_tanh_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_rnn_tanh_data(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_roll(tensor *, tensor self, int64_t *shifts_data, int shifts_len, int64_t *dims_data, int dims_len);
void atg_roll_out(tensor *, tensor out, tensor self, int64_t *shifts_data, int shifts_len, int64_t *dims_data, int dims_len);
void atg_rot90(tensor *, tensor self, int64_t k, int64_t *dims_data, int dims_len);
void atg_rot90_out(tensor *, tensor out, tensor self, int64_t k, int64_t *dims_data, int dims_len);
void atg_round(tensor *, tensor self);
void atg_round_(tensor *, tensor self);
void atg_round_decimals(tensor *, tensor self, int64_t decimals);
void atg_round_decimals_(tensor *, tensor self, int64_t decimals);
void atg_round_decimals_out(tensor *, tensor out, tensor self, int64_t decimals);
void atg_round_out(tensor *, tensor out, tensor self);
void atg_row_indices(tensor *, tensor self);
void atg_row_indices_copy(tensor *, tensor self);
void atg_row_indices_copy_out(tensor *, tensor out, tensor self);
void atg_row_stack(tensor *, tensor *tensors_data, int tensors_len);
void atg_row_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_rrelu(tensor *, tensor self, int training);
void atg_rrelu_(tensor *, tensor self, int training);
void atg_rrelu_with_noise(tensor *, tensor self, tensor noise, int training);
void atg_rrelu_with_noise_(tensor *, tensor self, tensor noise, int training);
void atg_rrelu_with_noise_backward(tensor *, tensor grad_output, tensor self, tensor noise, scalar lower, scalar upper, int training, int self_is_result);
void atg_rrelu_with_noise_backward_out(tensor *, tensor out, tensor grad_output, tensor self, tensor noise, scalar lower, scalar upper, int training, int self_is_result);
void atg_rrelu_with_noise_functional(tensor *, tensor self, tensor noise, int training);
void atg_rrelu_with_noise_out(tensor *, tensor out, tensor self, tensor noise, int training);
void atg_rsqrt(tensor *, tensor self);
void atg_rsqrt_(tensor *, tensor self);
void atg_rsqrt_out(tensor *, tensor out, tensor self);
void atg_rsub(tensor *, tensor self, tensor other);
void atg_rsub_scalar(tensor *, tensor self, scalar other);
void atg_rsub_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_rsub_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_scalar_tensor(tensor *, scalar s, int options_kind, int options_device);
void atg_scalar_tensor_out(tensor *, tensor out, scalar s);
void atg_scaled_dot_product_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, double scale_v, uint8_t scale_null, int enable_gqa);
void atg_scatter(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add_(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_src_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_value(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_value_(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_value_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_value_reduce(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_value_reduce_(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_value_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_searchsorted(tensor *, tensor sorted_sequence, tensor self, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_scalar(tensor *, tensor sorted_sequence, scalar self_scalar, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_scalar_out(tensor *, tensor out, tensor sorted_sequence, scalar self_scalar, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_tensor_out(tensor *, tensor out, tensor sorted_sequence, tensor self, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_segment_reduce(tensor *, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor indices, tensor offsets, int64_t axis, int unsafe, scalar initial);
void atg_segment_reduce_out(tensor *, tensor out, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor indices, tensor offsets, int64_t axis, int unsafe, scalar initial);
void atg_select(tensor *, tensor self, int64_t dim, int64_t index);
void atg_select_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t index);
void atg_select_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t index);
void atg_select_copy(tensor *, tensor self, int64_t dim, int64_t index);
void atg_select_copy_int_out(tensor *, tensor out, tensor self, int64_t dim, int64_t index);
void atg_select_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t index);
void atg_select_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t dim, int64_t index);
void atg_selu(tensor *, tensor self);
void atg_selu_(tensor *, tensor self);
void atg_set(tensor *, tensor self);
void atg_set_(tensor *, tensor self);
void atg_set_data(tensor self, tensor new_data);
void atg_set_out(tensor *, tensor out, tensor self);
void atg_set_requires_grad(tensor *, tensor self, int r);
void atg_set_source_tensor(tensor *, tensor self, tensor source);
void atg_set_source_tensor_(tensor *, tensor self, tensor source);
void atg_set_source_tensor_out(tensor *, tensor out, tensor self, tensor source);
void atg_set_source_tensor_storage_offset_(tensor *, tensor self, tensor source, int64_t storage_offset, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg_sgn(tensor *, tensor self);
void atg_sgn_(tensor *, tensor self);
void atg_sgn_out(tensor *, tensor out, tensor self);
void atg_sigmoid(tensor *, tensor self);
void atg_sigmoid_(tensor *, tensor self);
void atg_sigmoid_backward(tensor *, tensor grad_output, tensor output);
void atg_sigmoid_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor output);
void atg_sigmoid_out(tensor *, tensor out, tensor self);
void atg_sign(tensor *, tensor self);
void atg_sign_(tensor *, tensor self);
void atg_sign_out(tensor *, tensor out, tensor self);
void atg_signbit(tensor *, tensor self);
void atg_signbit_out(tensor *, tensor out, tensor self);
void atg_silu(tensor *, tensor self);
void atg_silu_(tensor *, tensor self);
void atg_silu_backward(tensor *, tensor grad_output, tensor self);
void atg_silu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self);
void atg_silu_out(tensor *, tensor out, tensor self);
void atg_sin(tensor *, tensor self);
void atg_sin_(tensor *, tensor self);
void atg_sin_out(tensor *, tensor out, tensor self);
void atg_sinc(tensor *, tensor self);
void atg_sinc_(tensor *, tensor self);
void atg_sinc_out(tensor *, tensor out, tensor self);
void atg_sinh(tensor *, tensor self);
void atg_sinh_(tensor *, tensor self);
void atg_sinh_out(tensor *, tensor out, tensor self);
void atg_slice(tensor *, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slice_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slice_copy(tensor *, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_copy_tensor_out(tensor *, tensor out, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_inverse(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slogdet(tensor *, tensor self);
void atg_slogdet_out(tensor *, tensor sign, tensor logabsdet, tensor self);
void atg_slow_conv3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_slow_conv3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_slow_conv_dilated2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_dilated2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_dilated3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_dilated3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_smm(tensor *, tensor self, tensor mat2);
void atg_smooth_l1_loss(tensor *, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction, double beta);
void atg_soft_margin_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_softmax_int_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_softplus(tensor *, tensor self);
void atg_softplus_backward(tensor *, tensor grad_output, tensor self, scalar beta, scalar threshold);
void atg_softplus_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar beta, scalar threshold);
void atg_softplus_out(tensor *, tensor out, tensor self);
void atg_softshrink(tensor *, tensor self);
void atg_softshrink_backward(tensor *, tensor grad_output, tensor self, scalar lambd);
void atg_softshrink_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar lambd);
void atg_softshrink_out(tensor *, tensor out, tensor self);
void atg_sort(tensor *, tensor self, int64_t dim, int descending);
void atg_sort_stable(tensor *, tensor self, int stable, int64_t dim, int descending);
void atg_sort_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int descending);
void atg_sort_values_stable(tensor *, tensor values, tensor indices, tensor self, int stable, int64_t dim, int descending);
void atg_sparse_bsc_tensor(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int options_kind, int options_device);
void atg_sparse_bsc_tensor_ccol_row_value_size(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_bsr_tensor(tensor *, tensor crow_indices, tensor col_indices, tensor values, int options_kind, int options_device);
void atg_sparse_bsr_tensor_crow_col_value_size(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_compressed_tensor(tensor *, tensor compressed_indices, tensor plain_indices, tensor values, int options_kind, int options_device);
void atg_sparse_compressed_tensor_comp_plain_value_size(tensor *, tensor compressed_indices, tensor plain_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_coo_tensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_coo_tensor_indices(tensor *, tensor indices, tensor values, int options_kind, int options_device, int is_coalesced);
void atg_sparse_coo_tensor_indices_size(tensor *, tensor indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device, int is_coalesced);
void atg_sparse_coo_tensor_size_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_sparse_csc_tensor(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int options_kind, int options_device);
void atg_sparse_csc_tensor_ccol_row_value_size(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_csr_tensor(tensor *, tensor crow_indices, tensor col_indices, tensor values, int options_kind, int options_device);
void atg_sparse_csr_tensor_crow_col_value_size(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
int64_t atg_sparse_dim(tensor self);
void atg_sparse_mask(tensor *, tensor self, tensor mask);
void atg_sparse_mask_out(tensor *, tensor out, tensor self, tensor mask);
void atg_sparse_resize(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_sampled_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_sparse_sampled_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_special_airy_ai(tensor *, tensor x);
void atg_special_airy_ai_out(tensor *, tensor out, tensor x);
void atg_special_bessel_j0(tensor *, tensor self);
void atg_special_bessel_j0_out(tensor *, tensor out, tensor self);
void atg_special_bessel_j1(tensor *, tensor self);
void atg_special_bessel_j1_out(tensor *, tensor out, tensor self);
void atg_special_bessel_y0(tensor *, tensor self);
void atg_special_bessel_y0_out(tensor *, tensor out, tensor self);
void atg_special_bessel_y1(tensor *, tensor self);
void atg_special_bessel_y1_out(tensor *, tensor out, tensor self);
void atg_special_chebyshev_polynomial_t(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_t_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_t_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_t_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_t_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_t_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_chebyshev_polynomial_u(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_u_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_u_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_u_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_u_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_u_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_chebyshev_polynomial_v(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_v_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_v_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_v_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_v_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_v_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_chebyshev_polynomial_w(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_w_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_w_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_w_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_w_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_w_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_digamma(tensor *, tensor self);
void atg_special_digamma_out(tensor *, tensor out, tensor self);
void atg_special_entr(tensor *, tensor self);
void atg_special_entr_out(tensor *, tensor out, tensor self);
void atg_special_erf(tensor *, tensor self);
void atg_special_erf_out(tensor *, tensor out, tensor self);
void atg_special_erfc(tensor *, tensor self);
void atg_special_erfc_out(tensor *, tensor out, tensor self);
void atg_special_erfcx(tensor *, tensor self);
void atg_special_erfcx_out(tensor *, tensor out, tensor self);
void atg_special_erfinv(tensor *, tensor self);
void atg_special_erfinv_out(tensor *, tensor out, tensor self);
void atg_special_exp2(tensor *, tensor self);
void atg_special_exp2_out(tensor *, tensor out, tensor self);
void atg_special_expit(tensor *, tensor self);
void atg_special_expit_out(tensor *, tensor out, tensor self);
void atg_special_expm1(tensor *, tensor self);
void atg_special_expm1_out(tensor *, tensor out, tensor self);
void atg_special_gammainc(tensor *, tensor self, tensor other);
void atg_special_gammainc_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_gammaincc(tensor *, tensor self, tensor other);
void atg_special_gammaincc_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_gammaln(tensor *, tensor self);
void atg_special_gammaln_out(tensor *, tensor out, tensor self);
void atg_special_hermite_polynomial_h(tensor *, tensor x, tensor n);
void atg_special_hermite_polynomial_h_n_scalar(tensor *, tensor x, scalar n);
void atg_special_hermite_polynomial_h_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_hermite_polynomial_h_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_hermite_polynomial_h_x_scalar(tensor *, scalar x, tensor n);
void atg_special_hermite_polynomial_h_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_hermite_polynomial_he(tensor *, tensor x, tensor n);
void atg_special_hermite_polynomial_he_n_scalar(tensor *, tensor x, scalar n);
void atg_special_hermite_polynomial_he_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_hermite_polynomial_he_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_hermite_polynomial_he_x_scalar(tensor *, scalar x, tensor n);
void atg_special_hermite_polynomial_he_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_i0(tensor *, tensor self);
void atg_special_i0_out(tensor *, tensor out, tensor self);
void atg_special_i0e(tensor *, tensor self);
void atg_special_i0e_out(tensor *, tensor out, tensor self);
void atg_special_i1(tensor *, tensor self);
void atg_special_i1_out(tensor *, tensor out, tensor self);
void atg_special_i1e(tensor *, tensor self);
void atg_special_i1e_out(tensor *, tensor out, tensor self);
void atg_special_laguerre_polynomial_l(tensor *, tensor x, tensor n);
void atg_special_laguerre_polynomial_l_n_scalar(tensor *, tensor x, scalar n);
void atg_special_laguerre_polynomial_l_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_laguerre_polynomial_l_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_laguerre_polynomial_l_x_scalar(tensor *, scalar x, tensor n);
void atg_special_laguerre_polynomial_l_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_legendre_polynomial_p(tensor *, tensor x, tensor n);
void atg_special_legendre_polynomial_p_n_scalar(tensor *, tensor x, scalar n);
void atg_special_legendre_polynomial_p_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_legendre_polynomial_p_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_legendre_polynomial_p_x_scalar(tensor *, scalar x, tensor n);
void atg_special_legendre_polynomial_p_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_log1p(tensor *, tensor self);
void atg_special_log1p_out(tensor *, tensor out, tensor self);
void atg_special_log_ndtr(tensor *, tensor self);
void atg_special_log_ndtr_out(tensor *, tensor out, tensor self);
void atg_special_log_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_special_logit(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_special_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
void atg_special_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_special_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_special_modified_bessel_i0(tensor *, tensor self);
void atg_special_modified_bessel_i0_out(tensor *, tensor out, tensor self);
void atg_special_modified_bessel_i1(tensor *, tensor self);
void atg_special_modified_bessel_i1_out(tensor *, tensor out, tensor self);
void atg_special_modified_bessel_k0(tensor *, tensor self);
void atg_special_modified_bessel_k0_out(tensor *, tensor out, tensor self);
void atg_special_modified_bessel_k1(tensor *, tensor self);
void atg_special_modified_bessel_k1_out(tensor *, tensor out, tensor self);
void atg_special_multigammaln(tensor *, tensor self, int64_t p);
void atg_special_multigammaln_out(tensor *, tensor out, tensor self, int64_t p);
void atg_special_ndtr(tensor *, tensor self);
void atg_special_ndtr_out(tensor *, tensor out, tensor self);
void atg_special_ndtri(tensor *, tensor self);
void atg_special_ndtri_out(tensor *, tensor out, tensor self);
void atg_special_polygamma(tensor *, int64_t n, tensor self);
void atg_special_polygamma_out(tensor *, tensor out, int64_t n, tensor self);
void atg_special_psi(tensor *, tensor self);
void atg_special_psi_out(tensor *, tensor out, tensor self);
void atg_special_round(tensor *, tensor self, int64_t decimals);
void atg_special_round_out(tensor *, tensor out, tensor self, int64_t decimals);
void atg_special_scaled_modified_bessel_k0(tensor *, tensor x);
void atg_special_scaled_modified_bessel_k0_out(tensor *, tensor out, tensor x);
void atg_special_scaled_modified_bessel_k1(tensor *, tensor x);
void atg_special_scaled_modified_bessel_k1_out(tensor *, tensor out, tensor x);
void atg_special_shifted_chebyshev_polynomial_t(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_t_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_t_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_t_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_t_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_t_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_u_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_u_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_v_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_v_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_w_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_w_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_sinc(tensor *, tensor self);
void atg_special_sinc_out(tensor *, tensor out, tensor self);
void atg_special_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_special_spherical_bessel_j0(tensor *, tensor x);
void atg_special_spherical_bessel_j0_out(tensor *, tensor out, tensor x);
void atg_special_xlog1py(tensor *, tensor self, tensor other);
void atg_special_xlog1py_other_scalar(tensor *, tensor self, scalar other);
void atg_special_xlog1py_other_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_special_xlog1py_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_xlog1py_self_scalar(tensor *, scalar self_scalar, tensor other);
void atg_special_xlog1py_self_scalar_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_special_xlogy(tensor *, tensor self, tensor other);
void atg_special_xlogy_other_scalar(tensor *, tensor self, scalar other);
void atg_special_xlogy_other_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_special_xlogy_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_xlogy_self_scalar(tensor *, scalar self_scalar, tensor other);
void atg_special_xlogy_self_scalar_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_special_zeta(tensor *, tensor self, tensor other);
void atg_special_zeta_other_scalar(tensor *, tensor self, scalar other);
void atg_special_zeta_other_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_special_zeta_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_zeta_self_scalar(tensor *, scalar self_scalar, tensor other);
void atg_special_zeta_self_scalar_out(tensor *, tensor out, scalar self_scalar, tensor other);
tensor *atg_split(tensor self, int64_t split_size, int64_t dim);
tensor *atg_split_copy(tensor self, int64_t split_size, int64_t dim);
void atg_split_copy_tensor_out(tensor *out_data, int out_len, tensor self, int64_t split_size, int64_t dim);
tensor *atg_split_sizes(tensor self, int64_t *split_size_data, int split_size_len, int64_t dim);
tensor *atg_split_with_sizes(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
tensor *atg_split_with_sizes_copy(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_split_with_sizes_copy_out(tensor *out_data, int out_len, tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_sqrt(tensor *, tensor self);
void atg_sqrt_(tensor *, tensor self);
void atg_sqrt_out(tensor *, tensor out, tensor self);
void atg_square(tensor *, tensor self);
void atg_square_(tensor *, tensor self);
void atg_square_out(tensor *, tensor out, tensor self);
void atg_squeeze(tensor *, tensor self);
void atg_squeeze_(tensor *, tensor self);
void atg_squeeze_copy(tensor *, tensor self);
void atg_squeeze_copy_dim(tensor *, tensor self, int64_t dim);
void atg_squeeze_copy_dim_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_squeeze_copy_dims(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_squeeze_copy_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len);
void atg_squeeze_copy_out(tensor *, tensor out, tensor self);
void atg_squeeze_dim(tensor *, tensor self, int64_t dim);
void atg_squeeze_dim_(tensor *, tensor self, int64_t dim);
void atg_squeeze_dims(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_squeeze_dims_(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_sspaddmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_sspaddmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_stack(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_std(tensor *, tensor self, int unbiased);
void atg_std_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_correction_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_std_mean(tensor *, tensor self, int unbiased);
void atg_std_mean_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_mean_correction_out(tensor *, tensor out0, tensor out1, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_mean_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_std_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_stft(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int normalized, int onesided, int return_complex, int align_to_window);
void atg_stft_center(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int center, char* pad_mode_ptr, int pad_mode_len, int normalized, int onesided, int return_complex, int align_to_window);
void atg_sub(tensor *, tensor self, tensor other);
void atg_sub_(tensor *, tensor self, tensor other);
void atg_sub_out(tensor *, tensor out, tensor self, tensor other);
void atg_sub_scalar(tensor *, tensor self, scalar other);
void atg_sub_scalar_(tensor *, tensor self, scalar other);
void atg_sub_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_subtract(tensor *, tensor self, tensor other);
void atg_subtract_(tensor *, tensor self, tensor other);
void atg_subtract_out(tensor *, tensor out, tensor self, tensor other);
void atg_subtract_scalar(tensor *, tensor self, scalar other);
void atg_subtract_scalar_(tensor *, tensor self, scalar other);
void atg_sum(tensor *, tensor self, int dtype);
void atg_sum_dim_intlist(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_sum_intlist_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_sum_out(tensor *, tensor out, tensor self, int dtype);
void atg_sum_to_size(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_svd(tensor *, tensor self, int some, int compute_uv);
void atg_svd_u(tensor *, tensor U, tensor S, tensor V, tensor self, int some, int compute_uv);
void atg_swapaxes(tensor *, tensor self, int64_t axis0, int64_t axis1);
void atg_swapaxes_(tensor *, tensor self, int64_t axis0, int64_t axis1);
void atg_swapdims(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_swapdims_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_t(tensor *, tensor self);
void atg_t_(tensor *, tensor self);
void atg_t_copy(tensor *, tensor self);
void atg_t_copy_out(tensor *, tensor out, tensor self);
void atg_take(tensor *, tensor self, tensor index);
void atg_take_along_dim(tensor *, tensor self, tensor indices, int64_t dim_v, uint8_t dim_null);
void atg_take_along_dim_out(tensor *, tensor out, tensor self, tensor indices, int64_t dim_v, uint8_t dim_null);
void atg_take_out(tensor *, tensor out, tensor self, tensor index);
void atg_tan(tensor *, tensor self);
void atg_tan_(tensor *, tensor self);
void atg_tan_out(tensor *, tensor out, tensor self);
void atg_tanh(tensor *, tensor self);
void atg_tanh_(tensor *, tensor self);
void atg_tanh_backward(tensor *, tensor grad_output, tensor output);
void atg_tanh_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor output);
void atg_tanh_out(tensor *, tensor out, tensor self);
tensor *atg_tensor_split(tensor self, int64_t sections, int64_t dim);
tensor *atg_tensor_split_indices(tensor self, int64_t *indices_data, int indices_len, int64_t dim);
tensor *atg_tensor_split_tensor_indices_or_sections(tensor self, tensor tensor_indices_or_sections, int64_t dim);
void atg_tensordot(tensor *, tensor self, tensor other, int64_t *dims_self_data, int dims_self_len, int64_t *dims_other_data, int dims_other_len);
void atg_tensordot_out(tensor *, tensor out, tensor self, tensor other, int64_t *dims_self_data, int dims_self_len, int64_t *dims_other_data, int dims_other_len);
void atg_threshold(tensor *, tensor self, scalar threshold, scalar value);
void atg_threshold_(tensor *, tensor self, scalar threshold, scalar value);
void atg_threshold_backward(tensor *, tensor grad_output, tensor self, scalar threshold);
void atg_threshold_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar threshold);
void atg_threshold_out(tensor *, tensor out, tensor self, scalar threshold, scalar value);
void atg_tile(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_to(tensor *, tensor self, int device);
void atg_to_dense(tensor *, tensor self, int dtype, int masked_grad);
void atg_to_dense_backward(tensor *, tensor grad, tensor input, int masked_grad);
void atg_to_device(tensor *, tensor self, int device, int dtype, int non_blocking, int copy);
void atg_to_dtype(tensor *, tensor self, int dtype, int non_blocking, int copy);
void atg_to_dtype_layout(tensor *, tensor self, int options_kind, int options_device, int non_blocking, int copy);
void atg_to_mkldnn(tensor *, tensor self, int dtype);
void atg_to_mkldnn_backward(tensor *, tensor grad, tensor input);
void atg_to_mkldnn_out(tensor *, tensor out, tensor self, int dtype);
void atg_to_other(tensor *, tensor self, tensor other, int non_blocking, int copy);
void atg_to_padded_tensor(tensor *, tensor self, double padding, int64_t *output_size_data, int output_size_len);
void atg_to_padded_tensor_out(tensor *, tensor out, tensor self, double padding, int64_t *output_size_data, int output_size_len);
void atg_to_sparse(tensor *, tensor self, int8_t layout, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_bsc(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_bsr(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_csc(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_csr(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_sparse_dim(tensor *, tensor self, int64_t sparse_dim);
void atg_topk(tensor *, tensor self, int64_t k, int64_t dim, int largest, int sorted);
void atg_topk_values(tensor *, tensor values, tensor indices, tensor self, int64_t k, int64_t dim, int largest, int sorted);
void atg_totype(tensor *, tensor self, int scalar_type);
void atg_trace(tensor *, tensor self);
void atg_trace_backward(tensor *, tensor grad, int64_t *sizes_data, int sizes_len);
void atg_trace_out(tensor *, tensor out, tensor self);
void atg_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_copy(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_copy_int_out(tensor *, tensor out, tensor self, int64_t dim0, int64_t dim1);
void atg_trapezoid(tensor *, tensor y, int64_t dim);
void atg_trapezoid_x(tensor *, tensor y, tensor x, int64_t dim);
void atg_trapz(tensor *, tensor y, tensor x, int64_t dim);
void atg_trapz_dx(tensor *, tensor y, double dx, int64_t dim);
void atg_triangular_solve(tensor *, tensor self, tensor A, int upper, int transpose, int unitriangular);
void atg_triangular_solve_x(tensor *, tensor X, tensor M, tensor self, tensor A, int upper, int transpose, int unitriangular);
void atg_tril(tensor *, tensor self, int64_t diagonal);
void atg_tril_(tensor *, tensor self, int64_t diagonal);
void atg_tril_indices(tensor *, int64_t row, int64_t col, int64_t offset, int options_kind, int options_device);
void atg_tril_indices_out(tensor *, tensor out, int64_t row, int64_t col, int64_t offset);
void atg_tril_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_triplet_margin_loss(tensor *, tensor anchor, tensor positive, tensor negative, double margin, double p, double eps, int swap, int64_t reduction);
void atg_triu(tensor *, tensor self, int64_t diagonal);
void atg_triu_(tensor *, tensor self, int64_t diagonal);
void atg_triu_indices(tensor *, int64_t row, int64_t col, int64_t offset, int options_kind, int options_device);
void atg_triu_indices_out(tensor *, tensor out, int64_t row, int64_t col, int64_t offset);
void atg_triu_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_true_divide(tensor *, tensor self, tensor other);
void atg_true_divide_(tensor *, tensor self, tensor other);
void atg_true_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_true_divide_scalar(tensor *, tensor self, scalar other);
void atg_true_divide_scalar_(tensor *, tensor self, scalar other);
void atg_trunc(tensor *, tensor self);
void atg_trunc_(tensor *, tensor self);
void atg_trunc_out(tensor *, tensor out, tensor self);
void atg_type_as(tensor *, tensor self, tensor other);
tensor *atg_unbind(tensor self, int64_t dim);
tensor *atg_unbind_copy(tensor self, int64_t dim);
void atg_unbind_copy_int_out(tensor *out_data, int out_len, tensor self, int64_t dim);
void atg_unflatten(tensor *, tensor self, int64_t dim, int64_t *sizes_data, int sizes_len);
tensor *atg_unflatten_dense_tensors(tensor flat, tensor *tensors_data, int tensors_len);
void atg_unfold(tensor *, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_unfold_backward(tensor *, tensor grad_in, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t size, int64_t step);
void atg_unfold_backward_out(tensor *, tensor out, tensor grad_in, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t size, int64_t step);
void atg_unfold_copy(tensor *, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_unfold_copy_out(tensor *, tensor out, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_uniform(tensor *, tensor self, double from, double to);
void atg_uniform_(tensor *, tensor self, double from, double to);
void atg_uniform_out(tensor *, tensor out, tensor self, double from, double to);
void atg_unique_consecutive(tensor *, tensor self, int return_inverse, int return_counts, int64_t dim_v, uint8_t dim_null);
void atg_unique_consecutive_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int return_inverse, int return_counts, int64_t dim_v, uint8_t dim_null);
void atg_unique_dim(tensor *, tensor self, int64_t dim, int sorted, int return_inverse, int return_counts);
void atg_unique_dim_consecutive(tensor *, tensor self, int64_t dim, int return_inverse, int return_counts);
void atg_unique_dim_consecutive_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int64_t dim, int return_inverse, int return_counts);
void atg_unique_dim_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int64_t dim, int sorted, int return_inverse, int return_counts);
tensor *atg_unsafe_chunk(tensor self, int64_t chunks, int64_t dim);
tensor *atg_unsafe_split(tensor self, int64_t split_size, int64_t dim);
void atg_unsafe_split_tensor_out(tensor *out_data, int out_len, tensor self, int64_t split_size, int64_t dim);
tensor *atg_unsafe_split_with_sizes(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_unsafe_split_with_sizes_out(tensor *out_data, int out_len, tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_unsqueeze(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_copy(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_copy_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_upsample_bicubic2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_bilinear2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_bilinear2d_vec_out(tensor *, tensor out, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_linear1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest2d_vec_out(tensor *, tensor out, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_trilinear3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_value_selecting_reduction_backward(tensor *, tensor grad, int64_t dim, tensor indices, int64_t *sizes_data, int sizes_len, int keepdim);
void atg_values(tensor *, tensor self);
void atg_values_copy(tensor *, tensor self);
void atg_values_copy_out(tensor *, tensor out, tensor self);
void atg_vander(tensor *, tensor x, int64_t n_v, uint8_t n_null, int increasing);
void atg_var(tensor *, tensor self, int unbiased);
void atg_var_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_correction_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_var_mean(tensor *, tensor self, int unbiased);
void atg_var_mean_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_mean_correction_out(tensor *, tensor out0, tensor out1, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_mean_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_var_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_vdot(tensor *, tensor self, tensor other);
void atg_vdot_out(tensor *, tensor out, tensor self, tensor other);
void atg_view(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_view_as(tensor *, tensor self, tensor other);
void atg_view_as_complex(tensor *, tensor self);
void atg_view_as_complex_copy(tensor *, tensor self);
void atg_view_as_complex_copy_out(tensor *, tensor out, tensor self);
void atg_view_as_real(tensor *, tensor self);
void atg_view_as_real_copy(tensor *, tensor self);
void atg_view_as_real_copy_out(tensor *, tensor out, tensor self);
void atg_view_copy(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_view_copy_dtype(tensor *, tensor self, int dtype);
void atg_view_copy_dtype_out(tensor *, tensor out, tensor self, int dtype);
void atg_view_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_view_dtype(tensor *, tensor self, int dtype);
tensor *atg_vsplit(tensor self, int64_t sections);
tensor *atg_vsplit_array(tensor self, int64_t *indices_data, int indices_len);
void atg_vstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_vstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
tensor *atg_where(tensor condition);
void atg_where_scalar(tensor *, tensor condition, scalar self_scalar, scalar other);
void atg_where_scalarother(tensor *, tensor condition, tensor self, scalar other);
void atg_where_scalarself(tensor *, tensor condition, scalar self_scalar, tensor other);
void atg_where_self(tensor *, tensor condition, tensor self, tensor other);
void atg_where_self_out(tensor *, tensor out, tensor condition, tensor self, tensor other);
void atg_xlogy(tensor *, tensor self, tensor other);
void atg_xlogy_(tensor *, tensor self, tensor other);
void atg_xlogy_outscalar_other(tensor *, tensor out, tensor self, scalar other);
void atg_xlogy_outscalar_self(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_xlogy_outtensor(tensor *, tensor out, tensor self, tensor other);
void atg_xlogy_scalar_other(tensor *, tensor self, scalar other);
void atg_xlogy_scalar_other_(tensor *, tensor self, scalar other);
void atg_xlogy_scalar_self(tensor *, scalar self_scalar, tensor other);
void atg_zero(tensor *, tensor self);
void atg_zero_(tensor *, tensor self);
void atg_zero_out(tensor *, tensor out, tensor self);
void atg_zeros(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_zeros_like(tensor *, tensor self);
void atg_zeros_like_out(tensor *, tensor out, tensor self);
void atg_zeros_out(tensor *, tensor out, int64_t *size_data, int size_len);
}