tlparse 0.4.8 - Docs.rs

V1206 15:24:45.403000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "d89a3716f835eda9884b375881c8c021"}
	{
	"name": "dynamo",
	"ts": 1733527485402945.8,
	"args": {
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.403000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/convert_frame.py", 0]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", 1]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_inductor/test_case.py", 2]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/_dynamo/test_case.py", 3]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/testing/_internal/common_utils.py", 4]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/main.py", 5]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/runner.py", 6]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/suite.py", 7]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/unittest/case.py", 8]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/home/xmfan/local/a/pytorch-env/lib/python3.11/contextlib.py", 9]}
V1206 15:24:45.404000 1667746 torch/_logging/structured.py:22] {"str": ["/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", 10]}
V1206 15:24:45.405000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 475, "name": "test_flex_attention_caching", "filename": 1}, {"line": 460, "name": "fn", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.405000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "e415c7d5114d113beb39e5b78fc4c96d"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527485405153.2,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.409000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.409000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.409000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.519000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.520000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.520000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 4, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.520000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 0, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 5, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.521000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 6, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.522000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 7, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.523000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.523000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.523000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 8, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.524000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.524000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.524000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 9, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.525000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.525000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.525000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 10, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 11, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 0, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.526000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 12, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 0, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.527000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 0, "id": 13, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.534000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52fbb526c9a1f1575361cb8ed8e7794e"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_args_0_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_1_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_2_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_4_0_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_1_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_2_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_3_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_4_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_5_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_6_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_7_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	        l_args_0_ = L_args_0_
	        l_args_1_ = L_args_1_
	        l_args_2_ = L_args_2_
	        l_args_4_0_ = L_args_4_0_
	        l_args_4_1_ = L_args_4_1_
	        l_args_4_2_ = L_args_4_2_
	        l_args_4_3_ = L_args_4_3_
	        l_args_4_4_ = L_args_4_4_
	        l_args_4_5_ = L_args_4_5_
	        l_args_4_6_ = L_args_4_6_
	        l_args_4_7_ = L_args_4_7_
	        
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1304 in _flex_attention_hop_wrapper, code: return flex_attention_hop(*args, **kwargs)
	        score_mod_0 = self.score_mod_0
	        mask_fn_0 = self.mask_fn_0
	        flex_attention = torch.ops.higher_order.flex_attention(l_args_0_, l_args_1_, l_args_2_, score_mod_0, (l_args_4_0_, l_args_4_1_, l_args_4_2_, l_args_4_3_, l_args_4_4_, l_args_4_5_, l_args_4_6_, l_args_4_7_, 128, 128, mask_fn_0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  l_args_0_ = l_args_1_ = l_args_2_ = score_mod_0 = l_args_4_0_ = l_args_4_1_ = l_args_4_2_ = l_args_4_3_ = l_args_4_4_ = l_args_4_5_ = l_args_4_6_ = l_args_4_7_ = mask_fn_0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0]
	        getitem_1: "f32[1, 4, 512][2048, 512, 1]cuda:0" = flex_attention[1];  flex_attention = None
	        return (getitem, getitem_1)
	        
	    class score_mod_0(torch.nn.Module):
	        def forward(self, child: "f32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0", child_4: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:457 in score_mod, code: return score + (q - kv)
	            sub: "i32[][]cuda:0" = child_3 - child_4;  child_3 = child_4 = None
	            add: "f32[][]cuda:0" = child + sub;  child = sub = None
	            return add
	            
	    class mask_fn_0(torch.nn.Module):
	        def forward(self, child: "i32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
	            return ge
	            
V1206 15:24:45.534000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "ca2a774ae001e87e57bd31e3fb982793"}
	{
	"name": "backend_compile",
	"ts": 1733527485534550.5,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.534000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "dbd3ef3cad5d244dd2996256c488f716"}
	{
	"name": "backend_compile",
	"ts": 1733527485534930.2,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "0/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.547000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c168856da88c1195cba9758754bcd1a1"}
	
	TREE_GUARD_MANAGER:
	+- RootGuardManager
	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:484 in init_ambient_guards
	| +- GLOBAL_STATE: ___check_global_state()
	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
	| +- GuardManager: source=L['args'], accessed_by=DictGetItemGuardAccessor('args')
	| | +- TYPE_MATCH: ___check_type_id(L['args'], 8812224)                        
	| | +- LENGTH_CHECK: len(L['args']) == 7                                         
	| | +- GuardManager: source=L['args'][0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][0], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['args'][0], L['args'][1], L['args'][2], L['args'][4][0], L['args'][4][1], L['args'][4][2], L['args'][4][3], L['args'][4][4], L['args'][4][5], L['args'][4][6], L['args'][4][7])
	| | +- GuardManager: source=L['args'][1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][1], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][1], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['args'][2], accessed_by=TupleGetItemGuardAccessor(2)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][2], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][2], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['args'][3], accessed_by=TupleGetItemGuardAccessor(3)
	| | | +- GuardManager: source=L['args'][3].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 140062268556144)     
	| | +- GuardManager: source=L['args'][4], accessed_by=TupleGetItemGuardAccessor(4)
	| | | +- TYPE_MATCH: ___check_type_id(L['args'][4], 8812224)                     
	| | | +- LENGTH_CHECK: len(L['args'][4]) == 11                                     
	| | | +- GuardManager: source=L['args'][4][0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][0], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][1], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][1], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][2], accessed_by=TupleGetItemGuardAccessor(2)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][2], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][2], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][3], accessed_by=TupleGetItemGuardAccessor(3)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][3], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][3], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][4], accessed_by=TupleGetItemGuardAccessor(4)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][4], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][4], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][5], accessed_by=TupleGetItemGuardAccessor(5)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][5], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][5], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][6], accessed_by=TupleGetItemGuardAccessor(6)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][6], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][6], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][7], accessed_by=TupleGetItemGuardAccessor(7)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][7], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][7], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][8], accessed_by=TupleGetItemGuardAccessor(8)
	| | | | +- EQUALS_MATCH: L['args'][4][8] == 128                                      
	| | | +- GuardManager: source=L['args'][4][9], accessed_by=TupleGetItemGuardAccessor(9)
	| | | | +- EQUALS_MATCH: L['args'][4][9] == 128                                      
	| | | +- GuardManager: source=L['args'][4][10], accessed_by=TupleGetItemGuardAccessor(10)
	| | | | +- GuardManager: source=L['args'][4][10].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 140062268556768) 
	| | +- GuardManager: source=L['args'][5], accessed_by=TupleGetItemGuardAccessor(5)
	| | | +- EQUALS_MATCH: L['args'][5] == 0.125                                       
	| | +- GuardManager: source=L['args'][6], accessed_by=TupleGetItemGuardAccessor(6)
	| | | +- DICT_LENGTH: len(L['args'][6]) == 4                                      
	| | | +- GuardManager: source=L['args'][6]['PRESCALE_QK'], accessed_by=DictGetItemGuardAccessor('PRESCALE_QK')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['PRESCALE_QK'], 8907584)       
	| | | +- GuardManager: source=L['args'][6]['ROWS_GUARANTEED_SAFE'], accessed_by=DictGetItemGuardAccessor('ROWS_GUARANTEED_SAFE')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['ROWS_GUARANTEED_SAFE'], 8907584)
	| | | +- GuardManager: source=L['args'][6]['BLOCKS_ARE_CONTIGUOUS'], accessed_by=DictGetItemGuardAccessor('BLOCKS_ARE_CONTIGUOUS')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['BLOCKS_ARE_CONTIGUOUS'], 8907584)
	| | | +- GuardManager: source=L['args'][6]['OUTPUT_LOGSUMEXP'], accessed_by=DictGetItemGuardAccessor('OUTPUT_LOGSUMEXP')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['OUTPUT_LOGSUMEXP'], 8908032)  
	| +- GuardManager: source=L['kwargs'], accessed_by=DictGetItemGuardAccessor('kwargs')
	| | +- DICT_LENGTH: not L['kwargs']                                             
	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
	| | +- GuardManager: source=G['flex_attention_hop'], accessed_by=DictGetItemGuardAccessor('flex_attention_hop')
	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 96230624)         
	| | | +- GuardManager: source=G['flex_attention_hop'].__name__, accessed_by=GetAttrGuardAccessor(__name__)
	| | | | +- EQUALS_MATCH: G['flex_attention_hop'].__name__ == 'flex_attention'        
	| | +- GuardManager: source=G['_140062263790704_c0'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c0')
	| | | +- GuardManager: source=G['_140062263790704_c0'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c0'].Tensor, 82181376)  
	| | | | +- GuardManager: source=G['_140062263790704_c0'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
	| | | | | +- GuardManager: source=G['_140062263790704_c0'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c0'].Tensor.__bases__[0], 140062119703136)
	| | +- GuardManager: source=G['__builtins_dict___0'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___0')
	| | | +- GuardManager: source=G['__builtins_dict___0']['len'], accessed_by=DictGetItemGuardAccessor('len')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['len'], 140062269592480)
	| | | +- GuardManager: source=G['__builtins_dict___0']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['sum'], 140062269593600)
	| | | +- GuardManager: source=G['__builtins_dict___0']['list'], accessed_by=DictGetItemGuardAccessor('list')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['list'], 8841312)  
	| | | +- GuardManager: source=G['__builtins_dict___0']['type'], accessed_by=DictGetItemGuardAccessor('type')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['type'], 8810240)  
	| | | +- GuardManager: source=G['__builtins_dict___0']['tuple'], accessed_by=DictGetItemGuardAccessor('tuple')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['tuple'], 8812224) 
	| | | +- GuardManager: source=G['__builtins_dict___0']['object'], accessed_by=DictGetItemGuardAccessor('object')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['object'], 8810976)
	| | | +- GuardManager: source=G['__builtins_dict___0']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___0']['isinstance'], 140062269592160)
	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes, accessed_by=GetAttrGuardAccessor(num_nodes)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves, accessed_by=GetAttrGuardAccessor(num_leaves)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children, accessed_by=GetAttrGuardAccessor(num_children)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children == 0
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, accessed_by=GetAttrGuardAccessor(children_specs)
	| | | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, 8841312)
	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
	| | | | +- KeyValueManager pair at index=1
	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
	
V1206 15:24:45.547000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9f584d7434070bbe2c589c1cc5c9fa1c"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527485547627.0,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "0/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.550000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 379, "dynamo_cumulative_compile_time_us": 142473, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.550000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "4808e1bc6347d31bf5778999c536de01"}
	{
	"name": "dynamo",
	"ts": 1733527485550622.8,
	"args": {
	"compile_id": "0/0",
	"frame_key": "1",
	"co_name": "_flex_attention_hop_wrapper",
	"co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py",
	"co_firstlineno": 1303,
	"cache_size": 0,
	"accumulated_cache_size": 0,
	"guard_count": 58,
	"shape_env_guard_count": 0,
	"graph_op_count": 3,
	"graph_node_count": 17,
	"graph_input_count": 11,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": [],
	"compliant_custom_ops": [],
	"restart_reasons": [],
	"dynamo_time_before_restart_s": 0.0,
	"has_guarded_code": true,
	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.976000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b144b519a5591ad277125a12c84b389b"}
	{
	"name": "dynamo",
	"ts": 1733527485976303.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.976000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 475, "name": "test_flex_attention_caching", "filename": 1}, {"line": 459, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.977000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b2b1c355b023de0af140013ba14024ed"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527485977092.0,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:45.979000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.979000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.979000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.985000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.986000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.986000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.987000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 9, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.987000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.987000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.998000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.998000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:45.998000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.071000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.071000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.071000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 7, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.072000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.072000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.072000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 8, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.073000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.073000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.073000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 9, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.074000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.074000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.074000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 10, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.075000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.075000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.075000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 11, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.076000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 9, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.076000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.076000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 12, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.077000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 9, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.077000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 9}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.077000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 9, "id": 13, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:46.082000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e6aa2ecb5d533ab181a6215ce16f359c"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_q_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_k_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_v_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_block_mask_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	        l_q_ = L_q_
	        l_k_ = L_k_
	        l_v_ = L_v_
	        l_block_mask_kv_num_blocks = L_block_mask_kv_num_blocks
	        l_block_mask_kv_indices = L_block_mask_kv_indices
	        l_block_mask_full_kv_num_blocks = L_block_mask_full_kv_num_blocks
	        l_block_mask_full_kv_indices = L_block_mask_full_kv_indices
	        l_block_mask_q_num_blocks = L_block_mask_q_num_blocks
	        l_block_mask_q_indices = L_block_mask_q_indices
	        l_block_mask_full_q_num_blocks = L_block_mask_full_q_num_blocks
	        l_block_mask_full_q_indices = L_block_mask_full_q_indices
	        
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        score_mod_0 = self.score_mod_0
	        mask_fn_0 = self.mask_fn_0
	        flex_attention = torch.ops.higher_order.flex_attention(l_q_, l_k_, l_v_, score_mod_0, (l_block_mask_kv_num_blocks, l_block_mask_kv_indices, l_block_mask_full_kv_num_blocks, l_block_mask_full_kv_indices, l_block_mask_q_num_blocks, l_block_mask_q_indices, l_block_mask_full_q_num_blocks, l_block_mask_full_q_indices, 128, 128, mask_fn_0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  l_q_ = l_k_ = l_v_ = score_mod_0 = l_block_mask_kv_num_blocks = l_block_mask_kv_indices = l_block_mask_full_kv_num_blocks = l_block_mask_full_kv_indices = l_block_mask_q_num_blocks = l_block_mask_q_indices = l_block_mask_full_q_num_blocks = l_block_mask_full_q_indices = mask_fn_0 = None
	        out: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (out,)
	        
	    class score_mod_0(torch.nn.Module):
	        def forward(self, child: "f32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0", child_4: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:457 in score_mod, code: return score + (q - kv)
	            sub: "i32[][]cuda:0" = child_3 - child_4;  child_3 = child_4 = None
	            add: "f32[][]cuda:0" = child + sub;  child = sub = None
	            return add
	            
	    class mask_fn_0(torch.nn.Module):
	        def forward(self, child: "i32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
	            return ge
	            
V1206 15:24:46.082000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "37b24b3f85c3e5d89e19d2ed089a024a"}
	{
	"name": "backend_compile",
	"ts": 1733527486082618.8,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.082000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1cb1b51c68c2aa91138743ed176863dc"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1733527486082898.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.085000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fe96e7264be7dc8710649f24088a12a7"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1733527486085403.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.087000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f3b0a502ac862b43ef8e578cbbbedaef"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1733527486087844.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.137000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True)],
	                    output_info=[OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
	                                                base_idx=None,
	                                                dynamic_dims=set(),
	                                                requires_grad=False,
	                                                functional_tensor=None)],
	                    num_intermediate_bases=0,
	                    keep_input_mutations=True,
	                    traced_tangents=[],
	                    subclass_inp_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=1,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=2,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=3,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=4,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=5,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=6,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=7,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=8,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=9,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=10,
	                                                      memory_format=None)],
	                    subclass_fw_graph_out_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                               memory_format=None)],
	                    subclass_tangent_meta=[],
	                    is_train=False,
	                    traced_tangent_metas=None,
	                    num_symints_saved_for_bw=None,
	                    grad_enabled_mutation=None,
	                    deterministic=None,
	                    static_input_indices=[],
	                    tokens={},
	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
	                    bw_donated_idxs=None,
	                    num_backward_tokens=0)
V1206 15:24:46.138000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	    class sdpa_score0(torch.nn.Module):
	        def forward(self, arg0_1: "f32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0", arg4_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	            sub: "i32[][]cuda:0" = torch.ops.aten.sub.Tensor(arg3_1, arg4_1);  arg3_1 = arg4_1 = None
	            add: "f32[][]cuda:0" = torch.ops.aten.add.Tensor(arg0_1, sub);  arg0_1 = sub = None
	            return add
	            
	    class sdpa_mask0(torch.nn.Module):
	        def forward(self, arg0_1: "i32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
	            return ge
	            
V1206 15:24:46.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "41345436bd100a92ac2e1c11dbcfde73"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1733527486139665.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.140000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ff1f544bdf98bb91144ed9d00ccfd2c4"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1733527486140025.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.240000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b7986424633a93e63eaef105f6813984"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1733527486240539.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.241000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "71eb226a27256d687d98919bf6379b6e"}
	{
	"name": "inductor_compile",
	"ts": 1733527486240972.5,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.241000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e3aa25638cfc1bbeacc1a3b1146ee001"}
	{
	"name": "inductor_codecache_torch_key",
	"ts": 1733527486241893.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.497000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2357634e283643a8bf96005629e2a966"}
	{
	"name": "inductor_codecache_torch_key",
	"ts": 1733527486497765.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.953000 1667746 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2685e2fe8ae1c10315e3657921ebac7a"}
	
	import torch
	from torch import tensor, device
	import torch.fx as fx
	from torch._dynamo.testing import rand_strided
	from math import inf
	import torch._inductor.inductor_prims
	
	import torch._dynamo.config
	import torch._inductor.config
	import torch._functorch.config
	import torch.fx.experimental._config
	torch._dynamo.config.cache_size_limit = 8
	torch._dynamo.config.accumulated_cache_size_limit = 256
	torch._dynamo.config.traceable_tensor_subclasses = set()
	torch._dynamo.config.suppress_errors = False
	torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch._decomp', 'torch.testing', 'torch._prims', 'torch._refs', 'torch.distributions'}
	torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization']
	torch._dynamo.config.raise_on_ctx_manager_usage = True
	torch._dynamo.config._save_config_ignore = {'repro_after', 'skipfiles_inline_module_allowlist', 'repro_level', 'constant_functions'}
	torch._dynamo.config.log_compilation_metrics = False
	torch._dynamo.config.reorderable_logging_functions = set()
	torch._dynamo.config._autograd_backward_strict_mode_banned_ops = ['stride', 'requires_grad', 'storage_offset', 'layout', 'data', 'is_coalesced', 'is_complex', 'is_conj', 'is_contiguous', 'is_cpu', 'is_cuda', 'is_distributed', 'is_floating_point', 'is_inference', 'is_ipu', 'is_leaf', 'is_maia', 'is_meta', 'is_mkldnn', 'is_mps', 'is_mtia', 'is_neg', 'is_nested', 'is_nonzero', 'is_pinned', 'is_quantized', 'is_same_size', 'is_set_to', 'is_shared', 'is_signed', 'is_sparse', 'is_sparse_csr', 'is_vulkan', 'is_xla', 'is_xpu']
	torch._dynamo.config.fake_tensor_cache_enabled = True
	torch._dynamo.config.fake_tensor_cache_crosscheck_enabled = True
	torch._dynamo.config.compiled_autograd_kwargs_override = {}
	torch._inductor.config.fx_graph_cache = True
	torch._inductor.config.fx_graph_remote_cache = False
	torch._inductor.config.autotune_local_cache = False
	torch._inductor.config.autotune_remote_cache = False
	torch._inductor.config.bundled_autotune_remote_cache = False
	torch._inductor.config.pre_grad_fusion_options = {}
	torch._inductor.config.post_grad_fusion_options = {}
	torch._inductor.config.fx_passes_numeric_check = {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}
	torch._inductor.config.reorder_for_compute_comm_overlap_passes = ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']
	torch._inductor.config._fuse_ddp_communication_passes = ['fuse_ddp_with_concat_op', 'schedule_comm_wait']
	torch._inductor.config.aot_inductor.metadata = {}
	torch._inductor.config.aot_inductor.presets = {}
	torch._inductor.config.rocm.arch = []
	torch._inductor.config.rocm.ck_supported_arch = ['gfx90a', 'gfx940', 'gfx941', 'gfx942']
	torch._inductor.config._save_config_ignore = ['trace.upload_tar', 'joint_custom_pre_pass', 'joint_custom_post_pass', 'pre_grad_custom_pass']
	torch._inductor.config._cache_config_ignore_prefix = ['trace', 'cuda.cutlass_dir', 'worker_start_method', 'compile_threads', 'post_grad_custom_post_pass', 'post_grad_custom_pre_pass', 'always_complex_memory_overlap_TESTING_ONLY']
	torch._inductor.config.external_matmul = []
	torch._functorch.config.functionalize_rng_ops = False
	torch._functorch.config.enable_autograd_cache = True
	torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True
	torch._functorch.config.unlift_effect_tokens = True
	
	
	
	isolate_fails_code_str = None
	
	
	
	
	# torch version: 2.6.0a0+giteece9ec
	# torch cuda version: 12.2
	# torch git version: eece9ecd62cae84bc2f915fc48cffe43e30256aa
	
	
	# CUDA Info: 
	# nvcc: NVIDIA (R) Cuda compiler driver 
	# Copyright (c) 2005-2023 NVIDIA Corporation 
	# Built on Tue_Aug_15_22:02:13_PDT_2023 
	# Cuda compilation tools, release 12.2, V12.2.140 
	# Build cuda_12.2.r12.2/compiler.33191640_0 
	
	# GPU Hardware Info: 
	# NVIDIA H100 : 8 
	
	
	from torch.nn import *
	class Repro(torch.nn.Module):
	    def __init__(self) -> None:
	        super().__init__()
	        self.sdpa_score0 = <lambda>()
	        self.sdpa_mask0 = <lambda>()
	
	    
	    
	    def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	def load_args(reader):
	    buf0 = reader.storage(None, 524288, device=device(type='cuda', index=0))
	    reader.tensor(buf0, (1, 4, 512, 64), is_leaf=True)  # arg0_1
	    buf1 = reader.storage(None, 524288, device=device(type='cuda', index=0))
	    reader.tensor(buf1, (1, 4, 512, 64), is_leaf=True)  # arg1_1
	    buf2 = reader.storage(None, 524288, device=device(type='cuda', index=0))
	    reader.tensor(buf2, (1, 4, 512, 64), is_leaf=True)  # arg2_1
	    buf3 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf3, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg3_1
	    buf4 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf4, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg4_1
	    buf5 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf5, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg5_1
	    buf6 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf6, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg6_1
	    buf7 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf7, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg7_1
	    buf8 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf8, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg8_1
	    buf9 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf9, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg9_1
	    buf10 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf10, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg10_1
	load_args._version = 0
	mod = Repro()
	if __name__ == '__main__':
	    from torch._dynamo.repro.after_aot import run_repro
	    with torch.no_grad():
	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
	        # To run it separately, do 
	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
	        # mod(*args)
V1206 15:24:46.957000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "59eead8c2c99ba4c19b6f2e1672cfb1c"}
	{
	"name": "_recursive_post_grad_passes",
	"ts": 1733527486957768.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.966000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "65cd5e088aa4752db9445bb36e57e3aa"}
	{
	"name": "_recursive_post_grad_passes",
	"ts": 1733527486966834.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:46.968000 1667746 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	    class sdpa_score0(torch.nn.Module):
	        def forward(self, arg0_1: "f32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0", arg4_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	            sub: "i32[][]cuda:0" = torch.ops.aten.sub.Tensor(arg3_1, arg4_1);  arg3_1 = arg4_1 = None
	            add: "f32[][]cuda:0" = torch.ops.aten.add.Tensor(arg0_1, sub);  arg0_1 = sub = None
	            return add
	            
	    class sdpa_mask0(torch.nn.Module):
	        def forward(self, arg0_1: "i32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
	            return ge
	            
V1206 15:24:46.972000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "48d0779166f135a77ff5cde83cf71d3e"}
	{
	"name": "GraphLowering.run",
	"ts": 1733527486972460.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.138000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6627ba3ae1644229670ab4d3fb0d4169"}
	{
	"name": "GraphLowering.run",
	"ts": 1733527487138160.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.138000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1cd377482ee4a0ca5764fecedf20d954"}
	{
	"name": "GraphLowering.compile_to_fn",
	"ts": 1733527487138712.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f4590e34449b7bb09aed2c93c625760d"}
	{
	"name": "code_gen",
	"ts": 1733527487139024.8,
	"args": {
	"fn_name": "GraphLowering.compile_to_module",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7b65eb21df5478dbf8fba3b59850be45"}
	{
	"name": "GraphLowering.codegen",
	"ts": 1733527487139268.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.140000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9f9bd4fb5f640d8a4af811e82da8f2b7"}
	{
	"name": "Scheduler.__init__",
	"ts": 1733527487140643.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.144000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "36fb5520981a68e9cd0a7455ec7e42d1"}
	{
	"name": "Scheduler.fused_nodes",
	"ts": 1733527487144035.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.144000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ff4c0585a764bffbe15620787ef7984e"}
	{
	"name": "Scheduler.fused_nodes",
	"ts": 1733527487144372.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.146000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b2127891028545727d5538178e766dfd"}
	{
	"name": "Scheduler.__init__",
	"ts": 1733527487146903.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.147000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7d89af7c7d58849f171d9e78f71d86a2"}
	{
	"name": "Scheduler.codegen",
	"ts": 1733527487147125.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.153000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3b4150db66839c4586992ffee613eae4"}
	{
	"name": "Scheduler.codegen",
	"ts": 1733527487153011.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.153000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bf81162402f2ae6984f652a3f44baa84"}
	{
	"name": "PythonWrapperCodegen.generate",
	"ts": 1733527487153239.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.154000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dcff0d1fc8b779e008cb24ec2e74e7f7"}
	{
	"name": "PythonWrapperCodegen.generate",
	"ts": 1733527487154470.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.154000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9be98be603274f80673ac2f6d241cec4"}
	{
	"name": "GraphLowering.codegen",
	"ts": 1733527487154692.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.155000 1667746 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmpsn77tlsm/sm/csmb7f23sz2oa7mngdcoprpgrrbziqzirwhkjffdmxv6qhdf3e57.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9ced76d2b85dd84790793e85fcadcc28"}
	# AOT ID: ['0_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile
	from torch._inductor.codegen.memory_planning import _align as align
	from torch import device, empty_strided
	from torch._inductor.async_compile import AsyncCompile
	from torch._inductor.select_algorithm import extern_kernels
	from torch._inductor.codegen.multi_kernel import MultiKernelCall
	import torch._inductor.kernel.flex_attention
	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
	import triton
	import triton.language as tl
	from torch._inductor.runtime.triton_heuristics import (
	    grid,
	    split_scan_grid,
	    grid_combo_kernels,
	    start_graph,
	    end_graph,
	    cooperative_reduction_grid,
	)
	
	aten = torch.ops.aten
	inductor_ops = torch.ops.inductor
	_quantized = torch.ops._quantized
	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
	empty_strided_xpu = torch._C._dynamo.guards._empty_strided_xpu
	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
	alloc_from_pool = torch.ops.inductor._alloc_from_pool
	async_compile = AsyncCompile()
	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
	
	
	# kernel path: /tmp/tmpsn77tlsm/xo/cxokkamvqulf4elvqs25iez7wbmqqfbp3i4qy7g2hwvher2zrziz.py
	# Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
	# Source node to ATen node mapping:
	#   flex_attention => flex_attention
	# Graph fragment:
	#   %flex_attention : [num_users=1] = call_function[target=torch.ops.higher_order.flex_attention](args = (%arg0_1, %arg1_1, %arg2_1, %sdpa_score0, (%arg3_1, %arg4_1, %arg5_1, %arg6_1, %arg7_1, %arg8_1, %arg9_1, %arg10_1, 128, 128, %sdpa_mask0), 0.125, {PRESCALE_QK: False, ROWS_GUARANTEED_SAFE: False, BLOCKS_ARE_CONTIGUOUS: False, OUTPUT_LOGSUMEXP: True}, (), ()), kwargs = {})
	triton_tem_fused_0 = async_compile.triton('triton_tem_fused_0', '''
	import triton
	import triton.language as tl
	from triton.compiler.compiler import AttrsDescriptor
	
	from torch._inductor.runtime import triton_helpers, triton_heuristics
	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
	
	@triton_heuristics.template(
	    num_stages=3,
	    num_warps=4,
	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
	)
	@triton.jit
	def triton_tem_fused_0(arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0):
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	    Q = arg_Q
	    K = arg_K
	    V = arg_V
	    LSE = arg_LSE
	    KV_NUM_BLKS = arg_KV_NUM_BLKS
	    KV_IDX = arg_KV_IDX
	    FULL_KV_NUM_BLKS = arg_FULL_KV_NUM_BLKS
	    FULL_KV_IDX = arg_FULL_KV_IDX
	
	    # Sub notation for this kernel:
	    #
	    # Q: Query, K: Key, V: Value
	    # M: Number of queries, N: Number of keys/values, D: Model dimension
	    # QK_HEAD_DIM: The dimension of the query and key embeddings
	    # V_HEAD_DIM: The dimension of the value embeddings
	    # z: Batch size, h: Number of heads, m: Number of queries per head, k: Number of keys per head
	    # GQA_SHARED_HEADS: number of query heads sharing one kv head in GQA setups.
	    #
	    # The following FULL_* and PARTIAL_* is defined in the block sparse mask grid, rather than the thread block grid.
	    # KV_NUM_BLKS: The number of KV blocks (that may or may not require masking) for each query.
	    # KV_IDX: The indices of KV blocks (that may or may not require masking) for each query.
	    # FULL_KV_NUM_BLKS: The number of fully unmasked KV blocks (so we don't need masking) for each query.
	    # FULL_KV_IDX: The indices of fully unmasked KV blocks (so we don't need masking) for each query.
	    #
	    # OUTPUT_LOGSUMEXP: We only need to store the logsumexp if we require grad
	    #
	    # (Modifiable) Performance tuning options
	    # BLOCK_M: The thread block size across the seqlen dim of Q.
	    # BLOCK_N: Iterate over BLOCK_N across the seqlen dim of K/V in each thread block.
	
	    # The below are kernel options that can be applied for certain score_mods,
	    # or involve a numerics vs. perf tradeoff
	    # PRESCALE_QK: Whether to pre-scale QK by 1/sqrt(d) and change of base. Has
	    # about 20% more numerical error, but slightly faster.
	    # ROWS_GUARANTEED_SAFE: Is it guaranteed that at least one value in each row
	    # is not masked out? If so, we can skip an extra safety check
	    # BLOCKS_ARE_CONTIGUOUS: Is it guaranteed that all blocks in the mask are
	    # contiguous? If so, we don't need to do an indirect jump for every block
	
	    tl.static_assert(SPARSE_Q_BLOCK_SIZE >= BLOCK_M and SPARSE_Q_BLOCK_SIZE % BLOCK_M == 0)
	    tl.static_assert(SPARSE_KV_BLOCK_SIZE >= BLOCK_N and SPARSE_KV_BLOCK_SIZE % BLOCK_N == 0)
	
	    # Define strides of inputs
	    stride_qz, stride_qh, stride_qm, stride_qk = 131072, 32768, 64, 1
	    stride_kz, stride_kh, stride_kn, stride_kk = 131072, 32768, 64, 1
	    stride_vz, stride_vh, stride_vn, stride_vk = 131072, 32768, 64, 1
	
	    ZQ = 1
	    HQ = 4
	    Q_LEN = 512
	    ZKV = 1
	    KV_LEN = 512
	
	    MATMUL_PRECISION = Q.dtype.element_ty
	
	    q_start = tl.program_id(0)
	    off_zq = tl.program_id(1) // HQ
	    off_hq = tl.program_id(1) % HQ
	
	    # We support two cases for batch dimension. a) (ZKV == ZQ) where off_zkv = off_zq.
	    # b) (ZKV == 1 and ZQ > 1) where KV is broadcasted along the batch dimension and off_zkv=0.
	    off_zkv = off_zq % ZKV
	    off_hkv = off_hq // GQA_SHARED_HEADS
	    off_g = off_hq % GQA_SHARED_HEADS
	
	    q_offset = off_zq * stride_qz + off_hq * stride_qh
	    k_offset = off_zkv * stride_kz + off_hkv * stride_kh
	    v_offset = off_zkv * stride_vz + off_hkv * stride_vh
	
	    Q = Q + q_offset
	    K = K + k_offset
	    V = V + v_offset
	
	    SPARSE_Z = 1
	    SPARSE_HQ = 1
	
	    sparse_idx_z = off_zq % SPARSE_Z
	    sparse_idx_hq = off_hq % SPARSE_HQ
	
	    SPARSE_Q_MULTIPLE: tl.constexpr = (SPARSE_Q_BLOCK_SIZE // BLOCK_M)
	    SPARSE_KV_MULTIPLE: tl.constexpr = (SPARSE_KV_BLOCK_SIZE // BLOCK_N)
	
	    stride_kv_num_blks_h = 16
	    stride_kv_idx_h = 256
	    stride_kv_idx_m = 16
	
	    # initialize pointer to m and l
	    m_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float("inf")
	    l_i = tl.zeros([BLOCK_M], dtype=tl.float32)
	    acc = tl.zeros([BLOCK_M, V_HEAD_DIM], dtype=tl.float32)
	
	    offs_m = q_start * BLOCK_M + tl.arange(0, BLOCK_M)
	
	    # KV_IDX and KV_NUM_BLKS are always contiguous.
	    sparse_hz_offset = sparse_idx_z * SPARSE_HQ + sparse_idx_hq
	    sparse_kv_num_blks_offset = sparse_hz_offset * stride_kv_num_blks_h + q_start // SPARSE_Q_MULTIPLE
	    sparse_kv_idx_offset = sparse_hz_offset * stride_kv_idx_h + (q_start // SPARSE_Q_MULTIPLE) * stride_kv_idx_m  # noqa: B950
	
	    Q_block_ptr = tl.make_block_ptr(
	        base=Q,
	        shape=(Q_LEN, QK_HEAD_DIM),
	        strides=(stride_qm, stride_qk),
	        offsets=(q_start * BLOCK_M, 0),
	        block_shape=(BLOCK_M, QK_HEAD_DIM),
	        order=(1, 0)
	    )
	
	    # load q: it stays in SRAM throughout the inner loop.
	    if IS_DIVISIBLE:
	        q = tl.load(Q_block_ptr)
	    else:
	        # boundary check is not free, so we only do it when necessary.
	        q = tl.load(Q_block_ptr, boundary_check=(0,), padding_option = "zero")
	
	    # ~~~~~~~~~~~~~~ normal blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    # We don't know anything "special" about these blocks, so we need to apply
	    # both score_mod and mask_mod to it
	    kv_indices = KV_IDX + sparse_kv_idx_offset
	    kv_start = tl.load(kv_indices) * SPARSE_KV_BLOCK_SIZE # first kv block we're loading
	    kv_num_blocks = tl.load(KV_NUM_BLKS + sparse_kv_num_blks_offset)
	    block_n_end = tl.minimum(kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N), 1))
	
	    K_block_ptr = tl.make_block_ptr(
	        base=K,
	        shape=(QK_HEAD_DIM, KV_LEN),
	        strides=(stride_kk, stride_kn),
	        offsets=(0, kv_start),
	        block_shape=(QK_HEAD_DIM, BLOCK_N),
	        order=(0, 1)
	    )
	    V_block_ptr = tl.make_block_ptr(
	        base=V,
	        shape=(KV_LEN, V_HEAD_DIM),
	        strides=(stride_vn, stride_vk),
	        offsets=(kv_start, 0),
	        block_shape=(BLOCK_N, V_HEAD_DIM),
	        order=(1, 0)
	    )
	    offs_n = kv_start + tl.arange(0, BLOCK_N)
	
	    acc, l_i, m_i = forward_inner(
	        arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	        q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	        acc, l_i, m_i,
	        off_zq, off_hq, offs_m[:, None], offs_n[None, :],
	        kv_indices, kv_num_blocks,
	        0, block_n_end,
	        MATMUL_PRECISION,
	        IS_FULL_BLOCKS=False,
	    )
	
	    # ~~~~~~~~~~~~~~ "full" blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    # We know these blocks are guaranteed to be "full", so we don't need to
	    # apply mask_mod to them - only score_mod
	    if HAS_FULL_BLOCKS:
	        # FULL_KV_IDX and FULL_KV_NUM_BLKS are always contiguous.
	        kv_indices = FULL_KV_IDX + sparse_kv_idx_offset
	        kv_start = tl.load(kv_indices) * SPARSE_KV_BLOCK_SIZE # first kv block we're loading
	        kv_num_blocks = tl.load(FULL_KV_NUM_BLKS + sparse_kv_num_blks_offset)
	        block_n_end = tl.minimum(kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N), 1))
	
	        K_block_ptr = tl.make_block_ptr(
	            base=K,
	            shape=(QK_HEAD_DIM, KV_LEN),
	            strides=(stride_kk, stride_kn),
	            offsets=(0, kv_start),
	            block_shape=(QK_HEAD_DIM, BLOCK_N),
	            order=(0, 1)
	        )
	        V_block_ptr = tl.make_block_ptr(
	            base=V,
	            shape=(KV_LEN, V_HEAD_DIM),
	            strides=(stride_vn, stride_vk),
	            offsets=(kv_start, 0),
	            block_shape=(BLOCK_N, V_HEAD_DIM),
	            order=(1, 0)
	        )
	        offs_n = kv_start + tl.arange(0, BLOCK_N)
	
	        acc, l_i, m_i = forward_inner(
	            arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	            q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	            acc, l_i, m_i,
	            off_zq, off_hq, offs_m[:, None], offs_n[None, :],
	            kv_indices, kv_num_blocks,
	            0, block_n_end,
	            MATMUL_PRECISION,
	            IS_FULL_BLOCKS=True,
	        )
	
	
	    # [Note] Handle fully masked out rows:
	    # Li will be the sum(e^(-inf)) == 0.0 for masked out rows, mi will be -inf.
	    # We set Li to 1.0 which will result in lse/out = 0.0 | after the log(li) + mi(0.0) step
	    l_i = tl.where(l_i == 0.0, 1, l_i)
	
	    acc = acc / l_i[:, None]
	    idx_zq = tl.program_id(1) // HQ
	    idx_hq = tl.program_id(1) % HQ
	    idx_m = offs_m[:, None]
	    idx_d = tl.arange(0, V_HEAD_DIM)[None, :]
	
	    mask = idx_m < Q_LEN
	
	    xindex = idx_d + 64*idx_m + 32768*idx_hq + 131072*idx_zq
	    tl.store(out_ptr0 + (tl.broadcast_to(idx_d + 64*idx_m + 32768*idx_hq, acc.shape)), acc, mask)
	
	    if OUTPUT_LOGSUMEXP:
	        off_hz = tl.program_id(1)
	        l_ptrs = LSE + off_hz * Q_LEN + offs_m
	        lse = m_i + tl.math.log2(l_i)
	        if IS_DIVISIBLE:
	            tl.store(l_ptrs, lse)
	        else:
	            tl.store(l_ptrs, lse, mask=offs_m < Q_LEN)
	
	@triton.jit
	def forward_inner(
	    arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	    q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	    # accumulated values
	    acc, l_i, m_i,
	    # Offsets used as inputs to score_mod & mask_mod
	    # of size [BLOCK_M, BLOCK_N] or scalar.
	    off_z, off_h, offs_m, offs_n,
	    # blocksparse data
	    kv_indices, kv_num_blocks,
	    # start kv and end kv block
	    block_n_start, block_n_end,
	    MATMUL_PRECISION,
	    IS_FULL_BLOCKS,
	):
	    # Redefines all kernel parameters (BLOCK_M, etc.) so we don't need to plumb them all through
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	
	
	    SPARSE_KV_MULTIPLE: tl.constexpr = (SPARSE_KV_BLOCK_SIZE // BLOCK_N)
	    RCP_LN2: tl.constexpr = 1.44269504
	
	    if PRESCALE_QK:
	        q = (q * SM_SCALE * RCP_LN2).to(MATMUL_PRECISION)
	
	    # loop over k, v and update accumulator until block_n_end
	    for start_n in range(block_n_start, block_n_end):
	        if IS_DIVISIBLE:
	            acc, l_i, m_i = forward_block_mn(
	                arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	                q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	                # accumulated values
	                acc, l_i, m_i,
	                # Offsets
	                off_z, off_h, offs_m, offs_n,
	                MATMUL_PRECISION, RCP_LN2,
	                IS_FULL_BLOCKS,
	            )
	        else:
	            # Benchmark shows even we applied mod & mask to each block for non divisible seqlen,
	            # it's on par or slightly faster than only applying to the last block in fwd.
	            # However, we choose different strategy for bwd, where we only apply mod & mask
	            # to the last block because it's faster a lot.
	            acc, l_i, m_i = forward_block_mn(
	                arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	                q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	                # accumulated values
	                acc, l_i, m_i,
	                # Offsets
	                off_z, off_h, offs_m, offs_n,
	                MATMUL_PRECISION, RCP_LN2,
	                IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=True,
	            )
	
	        # update pointers
	        offset = get_offset_for_next_block(
	            start_n, kv_indices, kv_num_blocks,
	            SPARSE_KV_BLOCK_SIZE, SPARSE_KV_MULTIPLE, BLOCK_N, BLOCKS_ARE_CONTIGUOUS
	        )
	
	        V_block_ptr = tl.advance(V_block_ptr, (offset, 0))
	        K_block_ptr = tl.advance(K_block_ptr, (0, offset))
	
	        offs_n = offs_n + offset
	
	    return acc, l_i, m_i
	
	
	@triton.jit
	def get_offset_for_next_block(
	    loop_iter, col_indices, total_blocks,
	    SPARSE_BLOCK, SPARSE_BLOCK_MULTIPLE, BLOCK,
	    BLOCKS_ARE_CONTIGUOUS: tl.constexpr
	):
	    if BLOCKS_ARE_CONTIGUOUS:
	        return BLOCK
	    cur_block_idx = loop_iter // SPARSE_BLOCK_MULTIPLE
	    cur_block = tl.load(col_indices + cur_block_idx, eviction_policy="evict_last")
	    next_block = tl.load(col_indices + cur_block_idx + 1, eviction_policy="evict_last", mask=cur_block_idx + 1 < total_blocks)
	    needs_jump = (loop_iter + 1) % SPARSE_BLOCK_MULTIPLE == 0
	    jump_to_block = (next_block - cur_block ) * SPARSE_BLOCK - (SPARSE_BLOCK_MULTIPLE - 1) * BLOCK
	    offset = jump_to_block * needs_jump + (1 - needs_jump) * BLOCK
	    return offset
	
	@triton.jit
	def forward_block_mn(
	    arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	    q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	    # accumulated values
	    acc, l_i, m_i,
	    # Offsets
	    off_z, off_h, offs_m, offs_n,
	    MATMUL_PRECISION, RCP_LN2,
	    IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=False,
	):
	    # Redefines all kernel parameters (BLOCK_M, etc.) so we don't need to plumb them all through
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	
	
	    # -- load k --
	    if IS_DIVISIBLE:
	        k = tl.load(K_block_ptr)
	    else:
	        k = tl.load(K_block_ptr, boundary_check=(1,), padding_option = "zero")
	    # -- compute qk ---
	    qk = tl.dot(q, k, input_precision=FLOAT32_PRECISION) # TODO: use cuda matmul when q_len <= 2.
	    if not PRESCALE_QK:
	        qk *= SM_SCALE
	    # ~~~~~~~~~~~~~~~~~~~ Apply score modification  ~~~~~~~~~~~~~~~~~~~
	    if CHECK_BLOCK_BOUNDARY:
	        # If this is the last block of a non divisible seqlen, we still need to load [BLOCK_M, BLOCK_N] elements,
	        # which is larger than the actual number of elements. To avoid access memory out of bound,
	        # we need to mask out the elements that are out of Q_LEN & KV_LEN.
	        m = offs_m % Q_LEN
	        n = offs_n % KV_LEN
	    else:
	        m = offs_m
	        n = offs_n
	
	    tmp0 = (m) - (n)
	    tmp1 = tmp0.to(tl.float32)
	    tmp2 = (qk) + tmp1
	    post_mod_scores = tmp2
	
	
	    if CHECK_BLOCK_BOUNDARY:
	        # Mask out the elements that are out of the KV_LEN for non divisible seqlen.
	        post_mod_scores = tl.where(offs_n < KV_LEN, post_mod_scores, float("-inf"))
	
	    if not IS_FULL_BLOCKS:
	        tmp3 = (m) >= (n)
	        mask_mod_output = tmp3
	
	
	        if CHECK_BLOCK_BOUNDARY:
	            mask_mod_output = tl.where(offs_n < KV_LEN, mask_mod_output, False)
	        # apply mask for partially unmasked blocks
	        post_mod_scores = tl.where(mask_mod_output, post_mod_scores, float("-inf"))
	
	    # TODO: In the case that score_mod is linear, this can be LICMed
	    if not PRESCALE_QK:
	        post_mod_scores *= RCP_LN2
	    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	
	    # -- compute scaling constant ---
	    m_ij = tl.maximum(m_i, tl.max(post_mod_scores, 1))
	    if not ROWS_GUARANTEED_SAFE:
	        masked_out_rows = (m_ij == float("-inf"))
	        m_ij_masked = tl.where(masked_out_rows, 0, m_ij)
	    else:
	        m_ij_masked = m_ij
	
	    alpha = tl.math.exp2(m_i - m_ij_masked)
	    p = tl.math.exp2(post_mod_scores - m_ij_masked[:, None])
	
	    # NB: l_i update is pulled up here since it's a bit faster
	    # NB: For headdim=256, it's faster to move it back down to after m_i =
	    # m_ij
	    l_i = l_i * alpha + tl.sum(p, 1)
	    # # -- scale and update acc --
	    acc = acc * alpha[:, None]
	
	    if IS_DIVISIBLE:
	        v = tl.load(V_block_ptr)
	    else:
	        v = tl.load(V_block_ptr, boundary_check=(0,), padding_option = "zero")
	    acc = tl.dot(p.to(MATMUL_PRECISION), v, acc, input_precision=FLOAT32_PRECISION)
	
	    # -- update m_i
	    m_i = m_ij
	
	    return acc, l_i, m_i
	''', device_str='cuda')
	meta0 = {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True, 'FLOAT32_PRECISION': "'ieee'", 'IS_DIVISIBLE': True, 'SM_SCALE': 0.125, 'GQA_SHARED_HEADS': 1, 'HAS_FULL_BLOCKS': True, 'QK_HEAD_DIM': 64, 'V_HEAD_DIM': 64, 'BLOCK_M': 128, 'BLOCK_N': 32, 'SPARSE_Q_BLOCK_SIZE': 128, 'SPARSE_KV_BLOCK_SIZE': 128}
	
	
	async_compile.wait(globals())
	del async_compile
	
	def call(args):
	    arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1 = args
	    args.clear()
	    assert_size_stride(arg0_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg1_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg2_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg3_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg4_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg5_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg6_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg7_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg8_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg9_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg10_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    buf0 = empty_strided_cuda((1, 4, 512), (2048, 512, 1), torch.float32)
	    with torch.cuda._DeviceGuard(0):
	        torch.cuda.set_device(0)
	        buf1 = empty_strided_cuda((1, 4, 512, 64), (131072, 32768, 64, 1), torch.float32)
	        # Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
	        stream0 = get_raw_stream(0)
	        triton_tem_fused_0.run(arg0_1, arg1_1, arg2_1, buf0, arg3_1, arg4_1, arg5_1, arg6_1, buf1, grid=torch._inductor.kernel.flex_attention.flex_attention_grid(1, 4, 512, 64, meta0), stream=stream0)
	        del arg0_1
	        del arg1_1
	        del arg2_1
	        del arg3_1
	        del arg4_1
	        del arg5_1
	        del arg6_1
	        del buf0
	    return (buf1, )
	
	
	def benchmark_compiled_module(times=10, repeat=10):
	    from torch._dynamo.testing import rand_strided
	    from torch._inductor.utils import print_performance
	    arg0_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg1_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg2_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg3_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg4_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg5_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg6_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg7_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg8_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg9_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg10_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    fn = lambda: call([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1])
	    return print_performance(fn, times=times, repeat=repeat)
	
	
	if __name__ == "__main__":
	    from torch._inductor.wrapper_benchmark import compiled_module_main
	    compiled_module_main('None', benchmark_compiled_module)
	
V1206 15:24:47.155000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "495b4d06507b7298850f179495a17b02"}
	{
	"name": "PyCodeCache.load_by_key_path",
	"ts": 1733527487155553.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:47.173000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6dee17cfbaa0ca0649062deba0d7d459"}
	{
	"name": "async_compile.wait",
	"ts": 1733527487173738.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.157000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2d51e2d3dcba45c4d0d4d29f299f024a"}
	{
	"name": "async_compile.wait",
	"ts": 1733527490157741.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.158000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0b68c334c8533667a781e2f662968ca9"}
	{
	"name": "PyCodeCache.load_by_key_path",
	"ts": 1733527490158210.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.158000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "60e7a41770b95fe1d25ce4274e02d3ef"}
	{
	"name": "code_gen",
	"ts": 1733527490158517.5,
	"args": {
	"fn_name": "GraphLowering.compile_to_module",
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.158000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02264d8aae3ebc56684debf0c3f9f4de"}
	{
	"name": "GraphLowering.compile_to_fn",
	"ts": 1733527490158861.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.251000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a469a724b0141b56eff140f8388e6f2b"}
	{
	"name": "TritonBundler.collect",
	"ts": 1733527490250938.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.252000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b19aaa17a3d9b352d63772e3deb35b7e"}
	{
	"name": "TritonBundler.collect",
	"ts": 1733527490252606.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.256000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c0c67812e1ca34090ce7aea9849e792e"}
	{
	"name": "fx_graph_cache_miss",
	"ts": 1733527486241755.2,
	"args": {
	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
	"components": [
	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
	"[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2",
	"[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4",
	"[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5",
	"[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6",
	"[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8",
	"[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True",
	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128",
	"[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False",
	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
	"[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
	],
	"cache_event_time": 1733527486949451176,
	"cache_state": "miss",
	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])",
	"time_taken_ns": 4009141776,
	"compile_id": "1/0"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V1206 15:24:50.257000 1667746 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0ad616f7fe717a86ed9e39486f7955e2"}
	{"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau", "components": ["[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527486949451176, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])", "time_taken_ns": 4009141776, "compile_id": "1/0"}
V1206 15:24:50.257000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c7114cd61939a8286e15f75f4b1dcb23"}
	{
	"name": "inductor_compile",
	"ts": 1733527490257456.0,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "1/0",
	"is_backward": false,
	"cache_state": "miss",
	"cache_event_time": 1733527486241755330,
	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
	"components": [
	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
	"[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2",
	"[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4",
	"[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5",
	"[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6",
	"[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8",
	"[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True",
	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128",
	"[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False",
	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
	"[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
	],
	"cache_bypass_reason": null,
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.258000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "df504ff9f036fd058109eb7eba60947f"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1733527490258109.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.259000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8774d579b741586b86c16bfd883c37fc"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1733527490259870.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.260000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dee620e7977ddcc771eb4099546c78e8"}
	{
	"name": "autograd_cache_bypass",
	"ts": 1733527486087831.0,
	"args": {
	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
	"cache_bypass_hard_exception": false,
	"key": null,
	"cache_state": "bypass",
	"components": [],
	"compile_id": "1/0"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V1206 15:24:50.260000 1667746 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_bypass", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5601d02186053adcc1ba29fd248c1d20"}
	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "1/0"}
V1206 15:24:50.260000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1b940c0ffee7b576f294d159fb3b7a84"}
	{
	"name": "backend_compile",
	"ts": 1733527490260502.8,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "1/0",
	"requires_subclass_dispatch": false,
	"dispatch_mode": "inference",
	"cache_state": "bypass",
	"cache_event_time": 1733527486087830904,
	"key": null,
	"components": [],
	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.275000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "004a7ba640839d40992c1546206d56ff"}
	
	TREE_GUARD_MANAGER:
	+- RootGuardManager
	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:484 in init_ambient_guards
	| +- GLOBAL_STATE: ___check_global_state()
	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
	| +- GuardManager: source=L['k'], accessed_by=DictGetItemGuardAccessor('k')
	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['k'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['k'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['k'], L['q'], L['v'], L['block_mask'].q_indices, L['block_mask'].kv_indices, L['block_mask'].q_num_blocks, L['block_mask'].kv_num_blocks, L['block_mask'].full_q_indices, L['block_mask'].full_kv_indices, L['block_mask'].full_q_num_blocks, L['block_mask'].full_kv_num_blocks)
	| | +- GuardManager: source=L['k'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['k'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['q'], accessed_by=DictGetItemGuardAccessor('q')
	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['q'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['q'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['q'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['q'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['v'], accessed_by=DictGetItemGuardAccessor('v')
	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['v'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['v'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['v'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['v'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['score_mod'], accessed_by=DictGetItemGuardAccessor('score_mod')
	| | +- GuardManager: source=L['score_mod'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | +- ID_MATCH: ___check_obj_id(L['score_mod'].__code__, 140062268556144)   
	| +- GuardManager: source=L['block_mask'], accessed_by=DictGetItemGuardAccessor('block_mask')
	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 139679664)                
	| | +- GuardManager: source=L['block_mask'].mask_mod, accessed_by=GetAttrGuardAccessor(mask_mod)
	| | | +- GuardManager: source=L['block_mask'].mask_mod.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 140062268556768)
	| | +- GuardManager: source=L['block_mask'].q_indices, accessed_by=GetAttrGuardAccessor(q_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].BLOCK_SIZE, accessed_by=GetAttrGuardAccessor(BLOCK_SIZE)
	| | | +- TYPE_MATCH: ___check_type_id(L['block_mask'].BLOCK_SIZE, 8812224)       
	| | | +- LENGTH_CHECK: len(L['block_mask'].BLOCK_SIZE) == 2                        
	| | | +- GuardManager: source=L['block_mask'].BLOCK_SIZE[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | +- EQUALS_MATCH: L['block_mask'].BLOCK_SIZE[0] == 128                        
	| | | +- GuardManager: source=L['block_mask'].BLOCK_SIZE[1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | | +- EQUALS_MATCH: L['block_mask'].BLOCK_SIZE[1] == 128                        
	| | +- GuardManager: source=L['block_mask'].kv_indices, accessed_by=GetAttrGuardAccessor(kv_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].kv_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].kv_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].q_num_blocks, accessed_by=GetAttrGuardAccessor(q_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].kv_num_blocks, accessed_by=GetAttrGuardAccessor(kv_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].kv_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].kv_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_q_indices, accessed_by=GetAttrGuardAccessor(full_q_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_q_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_kv_indices, accessed_by=GetAttrGuardAccessor(full_kv_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_kv_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_kv_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_q_num_blocks, accessed_by=GetAttrGuardAccessor(full_q_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_q_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_q_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_kv_num_blocks, accessed_by=GetAttrGuardAccessor(full_kv_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_kv_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_kv_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].as_tuple, accessed_by=GetAttrGuardAccessor(as_tuple)
	| | | +- GuardManager: source=L['block_mask'].as_tuple, accessed_by=FuncDefaultsGuardAccessor
	| | | | +- GuardManager: source=L['block_mask'].as_tuple.__defaults__[0], accessed_by=GetItemGuardAccessor(0)
	| | | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].as_tuple.__defaults__[0], 8908032)
	| +- GuardManager: source=L['flex_attention'], accessed_by=DictGetItemGuardAccessor('flex_attention')
	| | +- GuardManager: source=L['flex_attention'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 139318784)    
	| | +- GuardManager: source=L['flex_attention'], accessed_by=FuncDefaultsGuardAccessor
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[2], accessed_by=GetItemGuardAccessor(2)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[2], 8822752)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[3], accessed_by=GetItemGuardAccessor(3)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[3], 8907584)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[4], accessed_by=GetItemGuardAccessor(4)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[4], 8907584)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[5], accessed_by=GetItemGuardAccessor(5)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[5], 8822752)
	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
	| | +- GuardManager: source=G['_140062263790704_c1'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c1')
	| | | +- GuardManager: source=G['_140062263790704_c1'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c1'].Tensor, 82181376)  
	| | | | +- GuardManager: source=G['_140062263790704_c1'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
	| | | | | +- GuardManager: source=G['_140062263790704_c1'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c1'].Tensor.__bases__[0], 140062119703136)
	| | +- GuardManager: source=G['__builtins_dict___2'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___2')
	| | | +- GuardManager: source=G['__builtins_dict___2']['len'], accessed_by=DictGetItemGuardAccessor('len')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['len'], 140062269592480)
	| | | +- GuardManager: source=G['__builtins_dict___2']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['sum'], 140062269593600)
	| | | +- GuardManager: source=G['__builtins_dict___2']['list'], accessed_by=DictGetItemGuardAccessor('list')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['list'], 8841312)  
	| | | +- GuardManager: source=G['__builtins_dict___2']['type'], accessed_by=DictGetItemGuardAccessor('type')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['type'], 8810240)  
	| | | +- GuardManager: source=G['__builtins_dict___2']['tuple'], accessed_by=DictGetItemGuardAccessor('tuple')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['tuple'], 8812224) 
	| | | +- GuardManager: source=G['__builtins_dict___2']['object'], accessed_by=DictGetItemGuardAccessor('object')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['object'], 8810976)
	| | | +- GuardManager: source=G['__builtins_dict___2']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___2']['isinstance'], 140062269592160)
	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes, accessed_by=GetAttrGuardAccessor(num_nodes)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves, accessed_by=GetAttrGuardAccessor(num_leaves)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children, accessed_by=GetAttrGuardAccessor(num_children)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children == 0
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, accessed_by=GetAttrGuardAccessor(children_specs)
	| | | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, 8841312)
	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
	| | | | +- KeyValueManager pair at index=1
	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_comptime'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_comptime')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 140057359527872)
	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_decorators')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 140057359526192)
	| | | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 140057422814624)
	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot__utils')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 140048551571072)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, accessed_by=GetAttrGuardAccessor(_SUPPORTED_HEAD_DIMS)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, 8841312)
	| | | | +- LENGTH_CHECK: len(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS) == 10
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[0], accessed_by=ListGetItemGuardAccessor(0)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[0] == 2
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[1], accessed_by=ListGetItemGuardAccessor(1)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[1] == 4
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[2], accessed_by=ListGetItemGuardAccessor(2)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[2] == 8
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[3], accessed_by=ListGetItemGuardAccessor(3)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[3] == 16
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[4], accessed_by=ListGetItemGuardAccessor(4)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[4] == 32
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5], accessed_by=ListGetItemGuardAccessor(5)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5] == 64
	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot_flex_attention')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 140048551568912)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, accessed_by=GetAttrGuardAccessor(math)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 140062267404384)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 140062267408144)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, accessed_by=GetAttrGuardAccessor(torch)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 140062263790704)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 140062262057760)
	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static, accessed_by=GetAttrGuardAccessor(mark_static)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 125680000)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 140057571877776)
	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, accessed_by=GetAttrGuardAccessor(is_dynamo_compiling)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 140057572232544)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, accessed_by=GetAttrGuardAccessor(is_grad_enabled)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 140062253115936)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device, accessed_by=GetAttrGuardAccessor(_validate_device)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 140048552018544)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, accessed_by=GetAttrGuardAccessor(flex_attention_hop)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 96230624)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__, accessed_by=GetAttrGuardAccessor(__name__)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__ == 'flex_attention'
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim, accessed_by=GetAttrGuardAccessor(_supported_head_dim)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 140048552864992)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim, accessed_by=GetAttrGuardAccessor(_validate_embed_dim)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 139781872)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness, accessed_by=GetAttrGuardAccessor(_validate_nestedness)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 140048553100560)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input, accessed_by=GetAttrGuardAccessor(_validate_sdpa_input)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 139823744)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options, accessed_by=GetAttrGuardAccessor(_apply_kernel_options)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 140048554555888)
	
V1206 15:24:50.275000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f0dcf66ffff430aab1506701a2358ee4"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490275762.2,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.278000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 2504, "joint_graph_pass_time_us": 100514, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 9066, "inductor_code_gen_cumulative_compile_time_us": 3019492, "inductor_cumulative_compile_time_us": 4016483, "aot_autograd_cumulative_compile_time_us": 4177883, "dynamo_cumulative_compile_time_us": 4298670, "frame_key": "2", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 459, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.278000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e38d71b3f183088f1bc592d1f308f959"}
	{
	"name": "dynamo",
	"ts": 1733527490278673.0,
	"args": {
	"compile_id": "1/0",
	"frame_key": "2",
	"co_name": "fn",
	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py",
	"co_firstlineno": 459,
	"cache_size": 0,
	"accumulated_cache_size": 0,
	"guard_count": 91,
	"shape_env_guard_count": 0,
	"graph_op_count": 2,
	"graph_node_count": 16,
	"graph_input_count": 11,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": [],
	"compliant_custom_ops": [],
	"restart_reasons": [],
	"dynamo_time_before_restart_s": 0.0,
	"has_guarded_code": true,
	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.282000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "394328a4ef4c4a7736913bac1b8ace11"}
	{
	"name": "dynamo",
	"ts": 1733527490282024.0,
	"args": {
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.282000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 483, "name": "test_flex_attention_caching", "filename": 1}, {"line": 460, "name": "fn", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.282000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "db937f3641ee59365a7de351869e46cf"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490282590.2,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.284000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.284000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.284000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.346000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.346000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 4, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 228, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.347000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 5, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.348000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.348000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.348000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 6, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.349000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.349000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.349000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 7, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 8, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.350000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 9, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.351000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 10, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.352000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.352000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.352000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 11, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 228, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 12, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.353000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 228, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.354000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 228}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.354000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 228, "id": 13, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.358000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "52fbb526c9a1f1575361cb8ed8e7794e"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_args_0_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_1_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_2_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_4_0_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_1_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_2_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_3_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_4_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_5_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_6_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_7_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	        l_args_0_ = L_args_0_
	        l_args_1_ = L_args_1_
	        l_args_2_ = L_args_2_
	        l_args_4_0_ = L_args_4_0_
	        l_args_4_1_ = L_args_4_1_
	        l_args_4_2_ = L_args_4_2_
	        l_args_4_3_ = L_args_4_3_
	        l_args_4_4_ = L_args_4_4_
	        l_args_4_5_ = L_args_4_5_
	        l_args_4_6_ = L_args_4_6_
	        l_args_4_7_ = L_args_4_7_
	        
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1304 in _flex_attention_hop_wrapper, code: return flex_attention_hop(*args, **kwargs)
	        score_mod_0 = self.score_mod_0
	        mask_fn_0 = self.mask_fn_0
	        flex_attention = torch.ops.higher_order.flex_attention(l_args_0_, l_args_1_, l_args_2_, score_mod_0, (l_args_4_0_, l_args_4_1_, l_args_4_2_, l_args_4_3_, l_args_4_4_, l_args_4_5_, l_args_4_6_, l_args_4_7_, 128, 128, mask_fn_0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  l_args_0_ = l_args_1_ = l_args_2_ = score_mod_0 = l_args_4_0_ = l_args_4_1_ = l_args_4_2_ = l_args_4_3_ = l_args_4_4_ = l_args_4_5_ = l_args_4_6_ = l_args_4_7_ = mask_fn_0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0]
	        getitem_1: "f32[1, 4, 512][2048, 512, 1]cuda:0" = flex_attention[1];  flex_attention = None
	        return (getitem, getitem_1)
	        
	    class score_mod_0(torch.nn.Module):
	        def forward(self, child: "f32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0", child_4: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:457 in score_mod, code: return score + (q - kv)
	            sub: "i32[][]cuda:0" = child_3 - child_4;  child_3 = child_4 = None
	            add: "f32[][]cuda:0" = child + sub;  child = sub = None
	            return add
	            
	    class mask_fn_0(torch.nn.Module):
	        def forward(self, child: "i32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
	            return ge
	            
V1206 15:24:50.358000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7ea1fa27ae5b4f6ebbb5c849ee69a9a2"}
	{
	"name": "backend_compile",
	"ts": 1733527490358949.8,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.359000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "7f0ded16da08aebef0f53116ced241f9"}
	{
	"name": "backend_compile",
	"ts": 1733527490359226.2,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "0/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.370000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "217903d8e83efab45bc8d1821a378153"}
	
	TREE_GUARD_MANAGER:
	+- RootGuardManager
	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:484 in init_ambient_guards
	| +- GLOBAL_STATE: ___check_global_state()
	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
	| +- GuardManager: source=L['args'], accessed_by=DictGetItemGuardAccessor('args')
	| | +- TYPE_MATCH: ___check_type_id(L['args'], 8812224)                        
	| | +- LENGTH_CHECK: len(L['args']) == 7                                         
	| | +- GuardManager: source=L['args'][0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][0], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['args'][0], L['args'][1], L['args'][2], L['args'][4][0], L['args'][4][1], L['args'][4][2], L['args'][4][3], L['args'][4][4], L['args'][4][5], L['args'][4][6], L['args'][4][7])
	| | +- GuardManager: source=L['args'][1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][1], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][1], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['args'][2], accessed_by=TupleGetItemGuardAccessor(2)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][2], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][2], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['args'][3], accessed_by=TupleGetItemGuardAccessor(3)
	| | | +- GuardManager: source=L['args'][3].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 140062268556144)     
	| | +- GuardManager: source=L['args'][4], accessed_by=TupleGetItemGuardAccessor(4)
	| | | +- TYPE_MATCH: ___check_type_id(L['args'][4], 8812224)                     
	| | | +- LENGTH_CHECK: len(L['args'][4]) == 11                                     
	| | | +- GuardManager: source=L['args'][4][0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][0], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][1], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][1], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][2], accessed_by=TupleGetItemGuardAccessor(2)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][2], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][2], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][3], accessed_by=TupleGetItemGuardAccessor(3)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][3], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][3], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][4], accessed_by=TupleGetItemGuardAccessor(4)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][4], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][4], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][5], accessed_by=TupleGetItemGuardAccessor(5)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][5], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][5], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][6], accessed_by=TupleGetItemGuardAccessor(6)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][6], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][6], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][7], accessed_by=TupleGetItemGuardAccessor(7)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][7], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][7], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][8], accessed_by=TupleGetItemGuardAccessor(8)
	| | | | +- EQUALS_MATCH: L['args'][4][8] == 128                                      
	| | | +- GuardManager: source=L['args'][4][9], accessed_by=TupleGetItemGuardAccessor(9)
	| | | | +- EQUALS_MATCH: L['args'][4][9] == 128                                      
	| | | +- GuardManager: source=L['args'][4][10], accessed_by=TupleGetItemGuardAccessor(10)
	| | | | +- GuardManager: source=L['args'][4][10].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 140062268556768) 
	| | +- GuardManager: source=L['args'][5], accessed_by=TupleGetItemGuardAccessor(5)
	| | | +- EQUALS_MATCH: L['args'][5] == 0.125                                       
	| | +- GuardManager: source=L['args'][6], accessed_by=TupleGetItemGuardAccessor(6)
	| | | +- DICT_LENGTH: len(L['args'][6]) == 4                                      
	| | | +- GuardManager: source=L['args'][6]['PRESCALE_QK'], accessed_by=DictGetItemGuardAccessor('PRESCALE_QK')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['PRESCALE_QK'], 8907584)       
	| | | +- GuardManager: source=L['args'][6]['ROWS_GUARANTEED_SAFE'], accessed_by=DictGetItemGuardAccessor('ROWS_GUARANTEED_SAFE')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['ROWS_GUARANTEED_SAFE'], 8907584)
	| | | +- GuardManager: source=L['args'][6]['BLOCKS_ARE_CONTIGUOUS'], accessed_by=DictGetItemGuardAccessor('BLOCKS_ARE_CONTIGUOUS')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['BLOCKS_ARE_CONTIGUOUS'], 8907584)
	| | | +- GuardManager: source=L['args'][6]['OUTPUT_LOGSUMEXP'], accessed_by=DictGetItemGuardAccessor('OUTPUT_LOGSUMEXP')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['OUTPUT_LOGSUMEXP'], 8908032)  
	| +- GuardManager: source=L['kwargs'], accessed_by=DictGetItemGuardAccessor('kwargs')
	| | +- DICT_LENGTH: not L['kwargs']                                             
	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
	| | +- GuardManager: source=G['flex_attention_hop'], accessed_by=DictGetItemGuardAccessor('flex_attention_hop')
	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 96230624)         
	| | | +- GuardManager: source=G['flex_attention_hop'].__name__, accessed_by=GetAttrGuardAccessor(__name__)
	| | | | +- EQUALS_MATCH: G['flex_attention_hop'].__name__ == 'flex_attention'        
	| | +- GuardManager: source=G['_140062263790704_c2'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c2')
	| | | +- GuardManager: source=G['_140062263790704_c2'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c2'].Tensor, 82181376)  
	| | | | +- GuardManager: source=G['_140062263790704_c2'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
	| | | | | +- GuardManager: source=G['_140062263790704_c2'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c2'].Tensor.__bases__[0], 140062119703136)
	| | +- GuardManager: source=G['__builtins_dict___4'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___4')
	| | | +- GuardManager: source=G['__builtins_dict___4']['len'], accessed_by=DictGetItemGuardAccessor('len')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['len'], 140062269592480)
	| | | +- GuardManager: source=G['__builtins_dict___4']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['sum'], 140062269593600)
	| | | +- GuardManager: source=G['__builtins_dict___4']['list'], accessed_by=DictGetItemGuardAccessor('list')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['list'], 8841312)  
	| | | +- GuardManager: source=G['__builtins_dict___4']['type'], accessed_by=DictGetItemGuardAccessor('type')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['type'], 8810240)  
	| | | +- GuardManager: source=G['__builtins_dict___4']['tuple'], accessed_by=DictGetItemGuardAccessor('tuple')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['tuple'], 8812224) 
	| | | +- GuardManager: source=G['__builtins_dict___4']['object'], accessed_by=DictGetItemGuardAccessor('object')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['object'], 8810976)
	| | | +- GuardManager: source=G['__builtins_dict___4']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___4']['isinstance'], 140062269592160)
	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes, accessed_by=GetAttrGuardAccessor(num_nodes)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves, accessed_by=GetAttrGuardAccessor(num_leaves)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children, accessed_by=GetAttrGuardAccessor(num_children)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children == 0
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, accessed_by=GetAttrGuardAccessor(children_specs)
	| | | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, 8841312)
	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
	| | | | +- KeyValueManager pair at index=1
	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
	
V1206 15:24:50.370000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "5d9cf4ebb554cb0afa821efb370971a5"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490370919.8,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "0/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.373000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 276, "dynamo_cumulative_compile_time_us": 88329, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.373000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0e7b5fdc5a2525dc764f709284ebcfb6"}
	{
	"name": "dynamo",
	"ts": 1733527490373678.8,
	"args": {
	"compile_id": "0/0",
	"frame_key": "1",
	"co_name": "_flex_attention_hop_wrapper",
	"co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py",
	"co_firstlineno": 1303,
	"cache_size": 0,
	"accumulated_cache_size": 0,
	"guard_count": 58,
	"shape_env_guard_count": 0,
	"graph_op_count": 3,
	"graph_node_count": 17,
	"graph_input_count": 11,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": [],
	"compliant_custom_ops": [],
	"restart_reasons": [],
	"dynamo_time_before_restart_s": 0.0,
	"has_guarded_code": true,
	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.375000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1f4a76ca6b5fcae61cfcc92045397262"}
	{
	"name": "dynamo",
	"ts": 1733527490375674.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.376000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 483, "name": "test_flex_attention_caching", "filename": 1}, {"line": 459, "name": "fn", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.376000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8d1bd9bb5e587d5f1fa509e316288846"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490376205.0,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.377000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.377000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.377000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.383000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.384000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.384000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.384000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 245, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.385000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.385000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.390000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.390000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.390000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.457000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.457000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.458000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 7, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.458000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.459000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.459000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 8, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.459000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.460000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.460000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 9, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.460000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.461000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.461000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 10, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.461000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.462000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.462000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 11, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.462000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 245, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.463000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 12, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.463000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 12, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.463000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 245, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.464000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 13, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 245}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.464000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 245, "id": 13, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.468000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e6aa2ecb5d533ab181a6215ce16f359c"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_q_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_k_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_v_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_block_mask_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	        l_q_ = L_q_
	        l_k_ = L_k_
	        l_v_ = L_v_
	        l_block_mask_kv_num_blocks = L_block_mask_kv_num_blocks
	        l_block_mask_kv_indices = L_block_mask_kv_indices
	        l_block_mask_full_kv_num_blocks = L_block_mask_full_kv_num_blocks
	        l_block_mask_full_kv_indices = L_block_mask_full_kv_indices
	        l_block_mask_q_num_blocks = L_block_mask_q_num_blocks
	        l_block_mask_q_indices = L_block_mask_q_indices
	        l_block_mask_full_q_num_blocks = L_block_mask_full_q_num_blocks
	        l_block_mask_full_q_indices = L_block_mask_full_q_indices
	        
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        score_mod_0 = self.score_mod_0
	        mask_fn_0 = self.mask_fn_0
	        flex_attention = torch.ops.higher_order.flex_attention(l_q_, l_k_, l_v_, score_mod_0, (l_block_mask_kv_num_blocks, l_block_mask_kv_indices, l_block_mask_full_kv_num_blocks, l_block_mask_full_kv_indices, l_block_mask_q_num_blocks, l_block_mask_q_indices, l_block_mask_full_q_num_blocks, l_block_mask_full_q_indices, 128, 128, mask_fn_0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  l_q_ = l_k_ = l_v_ = score_mod_0 = l_block_mask_kv_num_blocks = l_block_mask_kv_indices = l_block_mask_full_kv_num_blocks = l_block_mask_full_kv_indices = l_block_mask_q_num_blocks = l_block_mask_q_indices = l_block_mask_full_q_num_blocks = l_block_mask_full_q_indices = mask_fn_0 = None
	        out: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (out,)
	        
	    class score_mod_0(torch.nn.Module):
	        def forward(self, child: "f32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0", child_4: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:457 in score_mod, code: return score + (q - kv)
	            sub: "i32[][]cuda:0" = child_3 - child_4;  child_3 = child_4 = None
	            add: "f32[][]cuda:0" = child + sub;  child = sub = None
	            return add
	            
	    class mask_fn_0(torch.nn.Module):
	        def forward(self, child: "i32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
	            return ge
	            
V1206 15:24:50.468000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "80655624a23088893ab4a2c409ac95a3"}
	{
	"name": "backend_compile",
	"ts": 1733527490468948.0,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.469000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a719c6c37340b85f41f898361ba8c722"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1733527490469220.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.469000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a15be333d609301edbc3c4ba97a6e27a"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1733527490469741.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.471000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c4c11179a8b2ec9ad87664f12d76dd56"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1733527490471944.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.519000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True)],
	                    output_info=[OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
	                                                base_idx=None,
	                                                dynamic_dims=set(),
	                                                requires_grad=False,
	                                                functional_tensor=None)],
	                    num_intermediate_bases=0,
	                    keep_input_mutations=True,
	                    traced_tangents=[],
	                    subclass_inp_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=1,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=2,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=3,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=4,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=5,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=6,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=7,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=8,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=9,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=10,
	                                                      memory_format=None)],
	                    subclass_fw_graph_out_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                               memory_format=None)],
	                    subclass_tangent_meta=[],
	                    is_train=False,
	                    traced_tangent_metas=None,
	                    num_symints_saved_for_bw=None,
	                    grad_enabled_mutation=None,
	                    deterministic=None,
	                    static_input_indices=[],
	                    tokens={},
	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
	                    bw_donated_idxs=None,
	                    num_backward_tokens=0)
V1206 15:24:50.520000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "02884732a6f03bac463b0e1e5f9b1cf4"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	    class sdpa_score0(torch.nn.Module):
	        def forward(self, arg0_1: "f32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0", arg4_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	            sub: "i32[][]cuda:0" = torch.ops.aten.sub.Tensor(arg3_1, arg4_1);  arg3_1 = arg4_1 = None
	            add: "f32[][]cuda:0" = torch.ops.aten.add.Tensor(arg0_1, sub);  arg0_1 = sub = None
	            return add
	            
	    class sdpa_mask0(torch.nn.Module):
	        def forward(self, arg0_1: "i32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
	            return ge
	            
V1206 15:24:50.521000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2386afea16835c738f6306784edc87e8"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1733527490521683.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.522000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "76ae4cce2f52ad7feca705fd70cb3c6c"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1733527490522094.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.523000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c0a313f319086d99c7003f86a39e8eb0"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1733527490523142.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.523000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "51c50f90b4f3e9efaa73f45c2709dc93"}
	{
	"name": "inductor_compile",
	"ts": 1733527490523375.8,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.531000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "30b00efd4f6faf8b068719e99390e687"}
	{
	"name": "TritonBundler.read_and_emit",
	"ts": 1733527490531851.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.532000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d06401ea7dbeea897c26573b7848e594"}
	{
	"name": "TritonBundler.read_and_emit",
	"ts": 1733527490532212.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.532000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e60ce588f8a8da2d850b98d827e6543e"}
	{
	"name": "PyCodeCache.load_by_key_path",
	"ts": 1733527490532650.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.545000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d4c6869eeaa9ef14de6b518a45d9c651"}
	{
	"name": "async_compile.wait",
	"ts": 1733527490545938.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.599000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a05612636136909ead93ebf04e1a42ad"}
	{
	"name": "async_compile.wait",
	"ts": 1733527490599093.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.599000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "eb0838b082ff5e829a02f8ce1aae27cf"}
	{
	"name": "PyCodeCache.load_by_key_path",
	"ts": 1733527490599434.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.599000 1667746 torch/_inductor/codecache.py:1267] {"inductor_output_code": {"filename": "/tmp/tmpsn77tlsm/sm/csmb7f23sz2oa7mngdcoprpgrrbziqzirwhkjffdmxv6qhdf3e57.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9ced76d2b85dd84790793e85fcadcc28"}
	# AOT ID: ['0_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile
	from torch._inductor.codegen.memory_planning import _align as align
	from torch import device, empty_strided
	from torch._inductor.async_compile import AsyncCompile
	from torch._inductor.select_algorithm import extern_kernels
	from torch._inductor.codegen.multi_kernel import MultiKernelCall
	import torch._inductor.kernel.flex_attention
	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
	import triton
	import triton.language as tl
	from torch._inductor.runtime.triton_heuristics import (
	    grid,
	    split_scan_grid,
	    grid_combo_kernels,
	    start_graph,
	    end_graph,
	    cooperative_reduction_grid,
	)
	
	aten = torch.ops.aten
	inductor_ops = torch.ops.inductor
	_quantized = torch.ops._quantized
	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
	empty_strided_xpu = torch._C._dynamo.guards._empty_strided_xpu
	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
	alloc_from_pool = torch.ops.inductor._alloc_from_pool
	async_compile = AsyncCompile()
	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
	
	
	# kernel path: /tmp/tmpsn77tlsm/xo/cxokkamvqulf4elvqs25iez7wbmqqfbp3i4qy7g2hwvher2zrziz.py
	# Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
	# Source node to ATen node mapping:
	#   flex_attention => flex_attention
	# Graph fragment:
	#   %flex_attention : [num_users=1] = call_function[target=torch.ops.higher_order.flex_attention](args = (%arg0_1, %arg1_1, %arg2_1, %sdpa_score0, (%arg3_1, %arg4_1, %arg5_1, %arg6_1, %arg7_1, %arg8_1, %arg9_1, %arg10_1, 128, 128, %sdpa_mask0), 0.125, {PRESCALE_QK: False, ROWS_GUARANTEED_SAFE: False, BLOCKS_ARE_CONTIGUOUS: False, OUTPUT_LOGSUMEXP: True}, (), ()), kwargs = {})
	triton_tem_fused_0 = async_compile.triton('triton_tem_fused_0', '''
	import triton
	import triton.language as tl
	from triton.compiler.compiler import AttrsDescriptor
	
	from torch._inductor.runtime import triton_helpers, triton_heuristics
	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
	
	@triton_heuristics.template(
	    num_stages=3,
	    num_warps=4,
	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
	)
	@triton.jit
	def triton_tem_fused_0(arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0):
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	    Q = arg_Q
	    K = arg_K
	    V = arg_V
	    LSE = arg_LSE
	    KV_NUM_BLKS = arg_KV_NUM_BLKS
	    KV_IDX = arg_KV_IDX
	    FULL_KV_NUM_BLKS = arg_FULL_KV_NUM_BLKS
	    FULL_KV_IDX = arg_FULL_KV_IDX
	
	    # Sub notation for this kernel:
	    #
	    # Q: Query, K: Key, V: Value
	    # M: Number of queries, N: Number of keys/values, D: Model dimension
	    # QK_HEAD_DIM: The dimension of the query and key embeddings
	    # V_HEAD_DIM: The dimension of the value embeddings
	    # z: Batch size, h: Number of heads, m: Number of queries per head, k: Number of keys per head
	    # GQA_SHARED_HEADS: number of query heads sharing one kv head in GQA setups.
	    #
	    # The following FULL_* and PARTIAL_* is defined in the block sparse mask grid, rather than the thread block grid.
	    # KV_NUM_BLKS: The number of KV blocks (that may or may not require masking) for each query.
	    # KV_IDX: The indices of KV blocks (that may or may not require masking) for each query.
	    # FULL_KV_NUM_BLKS: The number of fully unmasked KV blocks (so we don't need masking) for each query.
	    # FULL_KV_IDX: The indices of fully unmasked KV blocks (so we don't need masking) for each query.
	    #
	    # OUTPUT_LOGSUMEXP: We only need to store the logsumexp if we require grad
	    #
	    # (Modifiable) Performance tuning options
	    # BLOCK_M: The thread block size across the seqlen dim of Q.
	    # BLOCK_N: Iterate over BLOCK_N across the seqlen dim of K/V in each thread block.
	
	    # The below are kernel options that can be applied for certain score_mods,
	    # or involve a numerics vs. perf tradeoff
	    # PRESCALE_QK: Whether to pre-scale QK by 1/sqrt(d) and change of base. Has
	    # about 20% more numerical error, but slightly faster.
	    # ROWS_GUARANTEED_SAFE: Is it guaranteed that at least one value in each row
	    # is not masked out? If so, we can skip an extra safety check
	    # BLOCKS_ARE_CONTIGUOUS: Is it guaranteed that all blocks in the mask are
	    # contiguous? If so, we don't need to do an indirect jump for every block
	
	    tl.static_assert(SPARSE_Q_BLOCK_SIZE >= BLOCK_M and SPARSE_Q_BLOCK_SIZE % BLOCK_M == 0)
	    tl.static_assert(SPARSE_KV_BLOCK_SIZE >= BLOCK_N and SPARSE_KV_BLOCK_SIZE % BLOCK_N == 0)
	
	    # Define strides of inputs
	    stride_qz, stride_qh, stride_qm, stride_qk = 131072, 32768, 64, 1
	    stride_kz, stride_kh, stride_kn, stride_kk = 131072, 32768, 64, 1
	    stride_vz, stride_vh, stride_vn, stride_vk = 131072, 32768, 64, 1
	
	    ZQ = 1
	    HQ = 4
	    Q_LEN = 512
	    ZKV = 1
	    KV_LEN = 512
	
	    MATMUL_PRECISION = Q.dtype.element_ty
	
	    q_start = tl.program_id(0)
	    off_zq = tl.program_id(1) // HQ
	    off_hq = tl.program_id(1) % HQ
	
	    # We support two cases for batch dimension. a) (ZKV == ZQ) where off_zkv = off_zq.
	    # b) (ZKV == 1 and ZQ > 1) where KV is broadcasted along the batch dimension and off_zkv=0.
	    off_zkv = off_zq % ZKV
	    off_hkv = off_hq // GQA_SHARED_HEADS
	    off_g = off_hq % GQA_SHARED_HEADS
	
	    q_offset = off_zq * stride_qz + off_hq * stride_qh
	    k_offset = off_zkv * stride_kz + off_hkv * stride_kh
	    v_offset = off_zkv * stride_vz + off_hkv * stride_vh
	
	    Q = Q + q_offset
	    K = K + k_offset
	    V = V + v_offset
	
	    SPARSE_Z = 1
	    SPARSE_HQ = 1
	
	    sparse_idx_z = off_zq % SPARSE_Z
	    sparse_idx_hq = off_hq % SPARSE_HQ
	
	    SPARSE_Q_MULTIPLE: tl.constexpr = (SPARSE_Q_BLOCK_SIZE // BLOCK_M)
	    SPARSE_KV_MULTIPLE: tl.constexpr = (SPARSE_KV_BLOCK_SIZE // BLOCK_N)
	
	    stride_kv_num_blks_h = 16
	    stride_kv_idx_h = 256
	    stride_kv_idx_m = 16
	
	    # initialize pointer to m and l
	    m_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float("inf")
	    l_i = tl.zeros([BLOCK_M], dtype=tl.float32)
	    acc = tl.zeros([BLOCK_M, V_HEAD_DIM], dtype=tl.float32)
	
	    offs_m = q_start * BLOCK_M + tl.arange(0, BLOCK_M)
	
	    # KV_IDX and KV_NUM_BLKS are always contiguous.
	    sparse_hz_offset = sparse_idx_z * SPARSE_HQ + sparse_idx_hq
	    sparse_kv_num_blks_offset = sparse_hz_offset * stride_kv_num_blks_h + q_start // SPARSE_Q_MULTIPLE
	    sparse_kv_idx_offset = sparse_hz_offset * stride_kv_idx_h + (q_start // SPARSE_Q_MULTIPLE) * stride_kv_idx_m  # noqa: B950
	
	    Q_block_ptr = tl.make_block_ptr(
	        base=Q,
	        shape=(Q_LEN, QK_HEAD_DIM),
	        strides=(stride_qm, stride_qk),
	        offsets=(q_start * BLOCK_M, 0),
	        block_shape=(BLOCK_M, QK_HEAD_DIM),
	        order=(1, 0)
	    )
	
	    # load q: it stays in SRAM throughout the inner loop.
	    if IS_DIVISIBLE:
	        q = tl.load(Q_block_ptr)
	    else:
	        # boundary check is not free, so we only do it when necessary.
	        q = tl.load(Q_block_ptr, boundary_check=(0,), padding_option = "zero")
	
	    # ~~~~~~~~~~~~~~ normal blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    # We don't know anything "special" about these blocks, so we need to apply
	    # both score_mod and mask_mod to it
	    kv_indices = KV_IDX + sparse_kv_idx_offset
	    kv_start = tl.load(kv_indices) * SPARSE_KV_BLOCK_SIZE # first kv block we're loading
	    kv_num_blocks = tl.load(KV_NUM_BLKS + sparse_kv_num_blks_offset)
	    block_n_end = tl.minimum(kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N), 1))
	
	    K_block_ptr = tl.make_block_ptr(
	        base=K,
	        shape=(QK_HEAD_DIM, KV_LEN),
	        strides=(stride_kk, stride_kn),
	        offsets=(0, kv_start),
	        block_shape=(QK_HEAD_DIM, BLOCK_N),
	        order=(0, 1)
	    )
	    V_block_ptr = tl.make_block_ptr(
	        base=V,
	        shape=(KV_LEN, V_HEAD_DIM),
	        strides=(stride_vn, stride_vk),
	        offsets=(kv_start, 0),
	        block_shape=(BLOCK_N, V_HEAD_DIM),
	        order=(1, 0)
	    )
	    offs_n = kv_start + tl.arange(0, BLOCK_N)
	
	    acc, l_i, m_i = forward_inner(
	        arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	        q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	        acc, l_i, m_i,
	        off_zq, off_hq, offs_m[:, None], offs_n[None, :],
	        kv_indices, kv_num_blocks,
	        0, block_n_end,
	        MATMUL_PRECISION,
	        IS_FULL_BLOCKS=False,
	    )
	
	    # ~~~~~~~~~~~~~~ "full" blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    # We know these blocks are guaranteed to be "full", so we don't need to
	    # apply mask_mod to them - only score_mod
	    if HAS_FULL_BLOCKS:
	        # FULL_KV_IDX and FULL_KV_NUM_BLKS are always contiguous.
	        kv_indices = FULL_KV_IDX + sparse_kv_idx_offset
	        kv_start = tl.load(kv_indices) * SPARSE_KV_BLOCK_SIZE # first kv block we're loading
	        kv_num_blocks = tl.load(FULL_KV_NUM_BLKS + sparse_kv_num_blks_offset)
	        block_n_end = tl.minimum(kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N), 1))
	
	        K_block_ptr = tl.make_block_ptr(
	            base=K,
	            shape=(QK_HEAD_DIM, KV_LEN),
	            strides=(stride_kk, stride_kn),
	            offsets=(0, kv_start),
	            block_shape=(QK_HEAD_DIM, BLOCK_N),
	            order=(0, 1)
	        )
	        V_block_ptr = tl.make_block_ptr(
	            base=V,
	            shape=(KV_LEN, V_HEAD_DIM),
	            strides=(stride_vn, stride_vk),
	            offsets=(kv_start, 0),
	            block_shape=(BLOCK_N, V_HEAD_DIM),
	            order=(1, 0)
	        )
	        offs_n = kv_start + tl.arange(0, BLOCK_N)
	
	        acc, l_i, m_i = forward_inner(
	            arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	            q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	            acc, l_i, m_i,
	            off_zq, off_hq, offs_m[:, None], offs_n[None, :],
	            kv_indices, kv_num_blocks,
	            0, block_n_end,
	            MATMUL_PRECISION,
	            IS_FULL_BLOCKS=True,
	        )
	
	
	    # [Note] Handle fully masked out rows:
	    # Li will be the sum(e^(-inf)) == 0.0 for masked out rows, mi will be -inf.
	    # We set Li to 1.0 which will result in lse/out = 0.0 | after the log(li) + mi(0.0) step
	    l_i = tl.where(l_i == 0.0, 1, l_i)
	
	    acc = acc / l_i[:, None]
	    idx_zq = tl.program_id(1) // HQ
	    idx_hq = tl.program_id(1) % HQ
	    idx_m = offs_m[:, None]
	    idx_d = tl.arange(0, V_HEAD_DIM)[None, :]
	
	    mask = idx_m < Q_LEN
	
	    xindex = idx_d + 64*idx_m + 32768*idx_hq + 131072*idx_zq
	    tl.store(out_ptr0 + (tl.broadcast_to(idx_d + 64*idx_m + 32768*idx_hq, acc.shape)), acc, mask)
	
	    if OUTPUT_LOGSUMEXP:
	        off_hz = tl.program_id(1)
	        l_ptrs = LSE + off_hz * Q_LEN + offs_m
	        lse = m_i + tl.math.log2(l_i)
	        if IS_DIVISIBLE:
	            tl.store(l_ptrs, lse)
	        else:
	            tl.store(l_ptrs, lse, mask=offs_m < Q_LEN)
	
	@triton.jit
	def forward_inner(
	    arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	    q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	    # accumulated values
	    acc, l_i, m_i,
	    # Offsets used as inputs to score_mod & mask_mod
	    # of size [BLOCK_M, BLOCK_N] or scalar.
	    off_z, off_h, offs_m, offs_n,
	    # blocksparse data
	    kv_indices, kv_num_blocks,
	    # start kv and end kv block
	    block_n_start, block_n_end,
	    MATMUL_PRECISION,
	    IS_FULL_BLOCKS,
	):
	    # Redefines all kernel parameters (BLOCK_M, etc.) so we don't need to plumb them all through
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	
	
	    SPARSE_KV_MULTIPLE: tl.constexpr = (SPARSE_KV_BLOCK_SIZE // BLOCK_N)
	    RCP_LN2: tl.constexpr = 1.44269504
	
	    if PRESCALE_QK:
	        q = (q * SM_SCALE * RCP_LN2).to(MATMUL_PRECISION)
	
	    # loop over k, v and update accumulator until block_n_end
	    for start_n in range(block_n_start, block_n_end):
	        if IS_DIVISIBLE:
	            acc, l_i, m_i = forward_block_mn(
	                arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	                q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	                # accumulated values
	                acc, l_i, m_i,
	                # Offsets
	                off_z, off_h, offs_m, offs_n,
	                MATMUL_PRECISION, RCP_LN2,
	                IS_FULL_BLOCKS,
	            )
	        else:
	            # Benchmark shows even we applied mod & mask to each block for non divisible seqlen,
	            # it's on par or slightly faster than only applying to the last block in fwd.
	            # However, we choose different strategy for bwd, where we only apply mod & mask
	            # to the last block because it's faster a lot.
	            acc, l_i, m_i = forward_block_mn(
	                arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	                q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	                # accumulated values
	                acc, l_i, m_i,
	                # Offsets
	                off_z, off_h, offs_m, offs_n,
	                MATMUL_PRECISION, RCP_LN2,
	                IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=True,
	            )
	
	        # update pointers
	        offset = get_offset_for_next_block(
	            start_n, kv_indices, kv_num_blocks,
	            SPARSE_KV_BLOCK_SIZE, SPARSE_KV_MULTIPLE, BLOCK_N, BLOCKS_ARE_CONTIGUOUS
	        )
	
	        V_block_ptr = tl.advance(V_block_ptr, (offset, 0))
	        K_block_ptr = tl.advance(K_block_ptr, (0, offset))
	
	        offs_n = offs_n + offset
	
	    return acc, l_i, m_i
	
	
	@triton.jit
	def get_offset_for_next_block(
	    loop_iter, col_indices, total_blocks,
	    SPARSE_BLOCK, SPARSE_BLOCK_MULTIPLE, BLOCK,
	    BLOCKS_ARE_CONTIGUOUS: tl.constexpr
	):
	    if BLOCKS_ARE_CONTIGUOUS:
	        return BLOCK
	    cur_block_idx = loop_iter // SPARSE_BLOCK_MULTIPLE
	    cur_block = tl.load(col_indices + cur_block_idx, eviction_policy="evict_last")
	    next_block = tl.load(col_indices + cur_block_idx + 1, eviction_policy="evict_last", mask=cur_block_idx + 1 < total_blocks)
	    needs_jump = (loop_iter + 1) % SPARSE_BLOCK_MULTIPLE == 0
	    jump_to_block = (next_block - cur_block ) * SPARSE_BLOCK - (SPARSE_BLOCK_MULTIPLE - 1) * BLOCK
	    offset = jump_to_block * needs_jump + (1 - needs_jump) * BLOCK
	    return offset
	
	@triton.jit
	def forward_block_mn(
	    arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	    q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	    # accumulated values
	    acc, l_i, m_i,
	    # Offsets
	    off_z, off_h, offs_m, offs_n,
	    MATMUL_PRECISION, RCP_LN2,
	    IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=False,
	):
	    # Redefines all kernel parameters (BLOCK_M, etc.) so we don't need to plumb them all through
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	
	
	    # -- load k --
	    if IS_DIVISIBLE:
	        k = tl.load(K_block_ptr)
	    else:
	        k = tl.load(K_block_ptr, boundary_check=(1,), padding_option = "zero")
	    # -- compute qk ---
	    qk = tl.dot(q, k, input_precision=FLOAT32_PRECISION) # TODO: use cuda matmul when q_len <= 2.
	    if not PRESCALE_QK:
	        qk *= SM_SCALE
	    # ~~~~~~~~~~~~~~~~~~~ Apply score modification  ~~~~~~~~~~~~~~~~~~~
	    if CHECK_BLOCK_BOUNDARY:
	        # If this is the last block of a non divisible seqlen, we still need to load [BLOCK_M, BLOCK_N] elements,
	        # which is larger than the actual number of elements. To avoid access memory out of bound,
	        # we need to mask out the elements that are out of Q_LEN & KV_LEN.
	        m = offs_m % Q_LEN
	        n = offs_n % KV_LEN
	    else:
	        m = offs_m
	        n = offs_n
	
	    tmp0 = (m) - (n)
	    tmp1 = tmp0.to(tl.float32)
	    tmp2 = (qk) + tmp1
	    post_mod_scores = tmp2
	
	
	    if CHECK_BLOCK_BOUNDARY:
	        # Mask out the elements that are out of the KV_LEN for non divisible seqlen.
	        post_mod_scores = tl.where(offs_n < KV_LEN, post_mod_scores, float("-inf"))
	
	    if not IS_FULL_BLOCKS:
	        tmp3 = (m) >= (n)
	        mask_mod_output = tmp3
	
	
	        if CHECK_BLOCK_BOUNDARY:
	            mask_mod_output = tl.where(offs_n < KV_LEN, mask_mod_output, False)
	        # apply mask for partially unmasked blocks
	        post_mod_scores = tl.where(mask_mod_output, post_mod_scores, float("-inf"))
	
	    # TODO: In the case that score_mod is linear, this can be LICMed
	    if not PRESCALE_QK:
	        post_mod_scores *= RCP_LN2
	    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	
	    # -- compute scaling constant ---
	    m_ij = tl.maximum(m_i, tl.max(post_mod_scores, 1))
	    if not ROWS_GUARANTEED_SAFE:
	        masked_out_rows = (m_ij == float("-inf"))
	        m_ij_masked = tl.where(masked_out_rows, 0, m_ij)
	    else:
	        m_ij_masked = m_ij
	
	    alpha = tl.math.exp2(m_i - m_ij_masked)
	    p = tl.math.exp2(post_mod_scores - m_ij_masked[:, None])
	
	    # NB: l_i update is pulled up here since it's a bit faster
	    # NB: For headdim=256, it's faster to move it back down to after m_i =
	    # m_ij
	    l_i = l_i * alpha + tl.sum(p, 1)
	    # # -- scale and update acc --
	    acc = acc * alpha[:, None]
	
	    if IS_DIVISIBLE:
	        v = tl.load(V_block_ptr)
	    else:
	        v = tl.load(V_block_ptr, boundary_check=(0,), padding_option = "zero")
	    acc = tl.dot(p.to(MATMUL_PRECISION), v, acc, input_precision=FLOAT32_PRECISION)
	
	    # -- update m_i
	    m_i = m_ij
	
	    return acc, l_i, m_i
	''', device_str='cuda')
	meta0 = {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True, 'FLOAT32_PRECISION': "'ieee'", 'IS_DIVISIBLE': True, 'SM_SCALE': 0.125, 'GQA_SHARED_HEADS': 1, 'HAS_FULL_BLOCKS': True, 'QK_HEAD_DIM': 64, 'V_HEAD_DIM': 64, 'BLOCK_M': 128, 'BLOCK_N': 32, 'SPARSE_Q_BLOCK_SIZE': 128, 'SPARSE_KV_BLOCK_SIZE': 128}
	
	
	async_compile.wait(globals())
	del async_compile
	
	def call(args):
	    arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1 = args
	    args.clear()
	    assert_size_stride(arg0_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg1_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg2_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg3_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg4_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg5_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg6_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg7_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg8_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg9_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg10_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    buf0 = empty_strided_cuda((1, 4, 512), (2048, 512, 1), torch.float32)
	    with torch.cuda._DeviceGuard(0):
	        torch.cuda.set_device(0)
	        buf1 = empty_strided_cuda((1, 4, 512, 64), (131072, 32768, 64, 1), torch.float32)
	        # Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
	        stream0 = get_raw_stream(0)
	        triton_tem_fused_0.run(arg0_1, arg1_1, arg2_1, buf0, arg3_1, arg4_1, arg5_1, arg6_1, buf1, grid=torch._inductor.kernel.flex_attention.flex_attention_grid(1, 4, 512, 64, meta0), stream=stream0)
	        del arg0_1
	        del arg1_1
	        del arg2_1
	        del arg3_1
	        del arg4_1
	        del arg5_1
	        del arg6_1
	        del buf0
	    return (buf1, )
	
	
	def benchmark_compiled_module(times=10, repeat=10):
	    from torch._dynamo.testing import rand_strided
	    from torch._inductor.utils import print_performance
	    arg0_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg1_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg2_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg3_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg4_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg5_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg6_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg7_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg8_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg9_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg10_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    fn = lambda: call([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1])
	    return print_performance(fn, times=times, repeat=repeat)
	
	
	if __name__ == "__main__":
	    from torch._inductor.wrapper_benchmark import compiled_module_main
	    compiled_module_main('None', benchmark_compiled_module)
	
V1206 15:24:50.600000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0b77d8bc6ee36d773c74713ef29c7921"}
	{
	"name": "fx_graph_cache_hit",
	"ts": 1733527490524020.8,
	"args": {
	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=[])",
	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
	"components": [
	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
	"[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2",
	"[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4",
	"[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5",
	"[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6",
	"[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8",
	"[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True",
	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128",
	"[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False",
	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
	"[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
	],
	"cache_event_time": 1733527490600011529,
	"cache_state": "hit",
	"time_saved_ns": 4009141776,
	"compile_id": "1/0"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V1206 15:24:50.600000 1667746 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_hit", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d8cb8a6a0ac5b1d58d28337f3f1a055a"}
	{"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=[])", "key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau", "components": ["[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527490600011529, "cache_state": "hit", "time_saved_ns": 4009141776, "compile_id": "1/0"}
V1206 15:24:50.601000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "13cc116eca5abf2a6d7e2ad12c7d39c5"}
	{
	"name": "inductor_compile",
	"ts": 1733527490600831.8,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "1/0",
	"is_backward": false,
	"cached_kernel_names": [],
	"cache_state": "hit",
	"cache_event_time": 1733527490524020812,
	"key": "ff6i4wr5gqsaog5zgs3qrwebcodgoghruloxh5pzs6iul3cmayau",
	"components": [
	"[v4wl2w2eawehu2yuzv4ftucrovobmkotky2oimjdvonwbz3g4ir] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
	"[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2",
	"[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4",
	"[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5",
	"[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6",
	"[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8",
	"[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True",
	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128",
	"[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False",
	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
	"[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
	],
	"cache_bypass_reason": null,
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.601000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ad742b9e5576eedb50497df147df1721"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1733527490601481.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.603000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "cb401f4f44ce40d91fb40593e2e0fab8"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1733527490603290.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.603000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "034a66d21d6ca4969f8613317ccf781b"}
	{
	"name": "autograd_cache_bypass",
	"ts": 1733527490471931.2,
	"args": {
	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
	"cache_bypass_hard_exception": false,
	"key": null,
	"cache_state": "bypass",
	"components": [],
	"compile_id": "1/0"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V1206 15:24:50.603000 1667746 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_bypass", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5601d02186053adcc1ba29fd248c1d20"}
	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "1/0"}
V1206 15:24:50.603000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bd634b54ab5138da3c38e2434aae7337"}
	{
	"name": "backend_compile",
	"ts": 1733527490603908.2,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "1/0",
	"requires_subclass_dispatch": false,
	"dispatch_mode": "inference",
	"cache_state": "bypass",
	"cache_event_time": 1733527490471931164,
	"key": null,
	"components": [],
	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.618000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "771ab34a6bd9546eb139960f16071592"}
	
	TREE_GUARD_MANAGER:
	+- RootGuardManager
	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:484 in init_ambient_guards
	| +- GLOBAL_STATE: ___check_global_state()
	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
	| +- GuardManager: source=L['k'], accessed_by=DictGetItemGuardAccessor('k')
	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['k'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['k'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['k'], L['q'], L['v'], L['block_mask'].q_indices, L['block_mask'].kv_indices, L['block_mask'].q_num_blocks, L['block_mask'].kv_num_blocks, L['block_mask'].full_q_indices, L['block_mask'].full_kv_indices, L['block_mask'].full_q_num_blocks, L['block_mask'].full_kv_num_blocks)
	| | +- GuardManager: source=L['k'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['k'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['q'], accessed_by=DictGetItemGuardAccessor('q')
	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['q'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['q'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['q'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['q'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['v'], accessed_by=DictGetItemGuardAccessor('v')
	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['v'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['v'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['v'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['v'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['score_mod'], accessed_by=DictGetItemGuardAccessor('score_mod')
	| | +- GuardManager: source=L['score_mod'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | +- ID_MATCH: ___check_obj_id(L['score_mod'].__code__, 140062268556144)   
	| +- GuardManager: source=L['block_mask'], accessed_by=DictGetItemGuardAccessor('block_mask')
	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 139679664)                
	| | +- GuardManager: source=L['block_mask'].mask_mod, accessed_by=GetAttrGuardAccessor(mask_mod)
	| | | +- GuardManager: source=L['block_mask'].mask_mod.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 140062268556768)
	| | +- GuardManager: source=L['block_mask'].q_indices, accessed_by=GetAttrGuardAccessor(q_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].BLOCK_SIZE, accessed_by=GetAttrGuardAccessor(BLOCK_SIZE)
	| | | +- TYPE_MATCH: ___check_type_id(L['block_mask'].BLOCK_SIZE, 8812224)       
	| | | +- LENGTH_CHECK: len(L['block_mask'].BLOCK_SIZE) == 2                        
	| | | +- GuardManager: source=L['block_mask'].BLOCK_SIZE[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | +- EQUALS_MATCH: L['block_mask'].BLOCK_SIZE[0] == 128                        
	| | | +- GuardManager: source=L['block_mask'].BLOCK_SIZE[1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | | +- EQUALS_MATCH: L['block_mask'].BLOCK_SIZE[1] == 128                        
	| | +- GuardManager: source=L['block_mask'].kv_indices, accessed_by=GetAttrGuardAccessor(kv_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].kv_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].kv_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].q_num_blocks, accessed_by=GetAttrGuardAccessor(q_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].kv_num_blocks, accessed_by=GetAttrGuardAccessor(kv_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].kv_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].kv_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_q_indices, accessed_by=GetAttrGuardAccessor(full_q_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_q_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_kv_indices, accessed_by=GetAttrGuardAccessor(full_kv_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_kv_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_kv_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_q_num_blocks, accessed_by=GetAttrGuardAccessor(full_q_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_q_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_q_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_kv_num_blocks, accessed_by=GetAttrGuardAccessor(full_kv_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_kv_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_kv_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].as_tuple, accessed_by=GetAttrGuardAccessor(as_tuple)
	| | | +- GuardManager: source=L['block_mask'].as_tuple, accessed_by=FuncDefaultsGuardAccessor
	| | | | +- GuardManager: source=L['block_mask'].as_tuple.__defaults__[0], accessed_by=GetItemGuardAccessor(0)
	| | | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].as_tuple.__defaults__[0], 8908032)
	| +- GuardManager: source=L['flex_attention'], accessed_by=DictGetItemGuardAccessor('flex_attention')
	| | +- GuardManager: source=L['flex_attention'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 139318784)    
	| | +- GuardManager: source=L['flex_attention'], accessed_by=FuncDefaultsGuardAccessor
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[2], accessed_by=GetItemGuardAccessor(2)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[2], 8822752)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[3], accessed_by=GetItemGuardAccessor(3)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[3], 8907584)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[4], accessed_by=GetItemGuardAccessor(4)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[4], 8907584)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[5], accessed_by=GetItemGuardAccessor(5)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[5], 8822752)
	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
	| | +- GuardManager: source=G['_140062263790704_c3'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c3')
	| | | +- GuardManager: source=G['_140062263790704_c3'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c3'].Tensor, 82181376)  
	| | | | +- GuardManager: source=G['_140062263790704_c3'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
	| | | | | +- GuardManager: source=G['_140062263790704_c3'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c3'].Tensor.__bases__[0], 140062119703136)
	| | +- GuardManager: source=G['__builtins_dict___6'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___6')
	| | | +- GuardManager: source=G['__builtins_dict___6']['len'], accessed_by=DictGetItemGuardAccessor('len')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['len'], 140062269592480)
	| | | +- GuardManager: source=G['__builtins_dict___6']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['sum'], 140062269593600)
	| | | +- GuardManager: source=G['__builtins_dict___6']['list'], accessed_by=DictGetItemGuardAccessor('list')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['list'], 8841312)  
	| | | +- GuardManager: source=G['__builtins_dict___6']['type'], accessed_by=DictGetItemGuardAccessor('type')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['type'], 8810240)  
	| | | +- GuardManager: source=G['__builtins_dict___6']['tuple'], accessed_by=DictGetItemGuardAccessor('tuple')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['tuple'], 8812224) 
	| | | +- GuardManager: source=G['__builtins_dict___6']['object'], accessed_by=DictGetItemGuardAccessor('object')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['object'], 8810976)
	| | | +- GuardManager: source=G['__builtins_dict___6']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___6']['isinstance'], 140062269592160)
	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes, accessed_by=GetAttrGuardAccessor(num_nodes)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves, accessed_by=GetAttrGuardAccessor(num_leaves)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children, accessed_by=GetAttrGuardAccessor(num_children)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children == 0
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, accessed_by=GetAttrGuardAccessor(children_specs)
	| | | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, 8841312)
	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
	| | | | +- KeyValueManager pair at index=1
	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_comptime'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_comptime')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 140057359527872)
	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_decorators')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 140057359526192)
	| | | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 140057422814624)
	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot__utils')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 140048551571072)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, accessed_by=GetAttrGuardAccessor(_SUPPORTED_HEAD_DIMS)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, 8841312)
	| | | | +- LENGTH_CHECK: len(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS) == 10
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[0], accessed_by=ListGetItemGuardAccessor(0)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[0] == 2
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[1], accessed_by=ListGetItemGuardAccessor(1)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[1] == 4
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[2], accessed_by=ListGetItemGuardAccessor(2)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[2] == 8
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[3], accessed_by=ListGetItemGuardAccessor(3)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[3] == 16
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[4], accessed_by=ListGetItemGuardAccessor(4)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[4] == 32
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5], accessed_by=ListGetItemGuardAccessor(5)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5] == 64
	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot_flex_attention')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 140048551568912)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, accessed_by=GetAttrGuardAccessor(math)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 140062267404384)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 140062267408144)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, accessed_by=GetAttrGuardAccessor(torch)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 140062263790704)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 140062262057760)
	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static, accessed_by=GetAttrGuardAccessor(mark_static)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 125680000)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 140057571877776)
	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, accessed_by=GetAttrGuardAccessor(is_dynamo_compiling)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 140057572232544)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, accessed_by=GetAttrGuardAccessor(is_grad_enabled)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 140062253115936)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device, accessed_by=GetAttrGuardAccessor(_validate_device)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 140048552018544)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, accessed_by=GetAttrGuardAccessor(flex_attention_hop)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 96230624)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__, accessed_by=GetAttrGuardAccessor(__name__)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__ == 'flex_attention'
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim, accessed_by=GetAttrGuardAccessor(_supported_head_dim)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 140048552864992)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim, accessed_by=GetAttrGuardAccessor(_validate_embed_dim)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 139781872)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness, accessed_by=GetAttrGuardAccessor(_validate_nestedness)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 140048553100560)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input, accessed_by=GetAttrGuardAccessor(_validate_sdpa_input)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 139823744)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options, accessed_by=GetAttrGuardAccessor(_apply_kernel_options)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 140048554555888)
	
V1206 15:24:50.619000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b414209b816a551182c3cbb3ebebb2ce"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490619189.8,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.621000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 521, "joint_graph_pass_time_us": 1048, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true}, "distributed_ephemeral_timeout_us": 4009141, "inductor_cumulative_compile_time_us": 77456, "aot_autograd_cumulative_compile_time_us": 134960, "dynamo_cumulative_compile_time_us": 242984, "frame_key": "2", "co_name": "fn", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 459, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.622000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c94fa0f0ebcc0c15f083237f9bd40372"}
	{
	"name": "dynamo",
	"ts": 1733527490621989.8,
	"args": {
	"compile_id": "1/0",
	"frame_key": "2",
	"co_name": "fn",
	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py",
	"co_firstlineno": 459,
	"cache_size": 0,
	"accumulated_cache_size": 0,
	"guard_count": 91,
	"shape_env_guard_count": 0,
	"graph_op_count": 2,
	"graph_node_count": 16,
	"graph_input_count": 11,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": [],
	"compliant_custom_ops": [],
	"restart_reasons": [],
	"dynamo_time_before_restart_s": 0.0,
	"has_guarded_code": true,
	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.623000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "f6ab5cb1835b6c744bb2f21749f1693d"}
	{
	"name": "dynamo",
	"ts": 1733527490623683.8,
	"args": {
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.624000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 490, "name": "test_flex_attention_caching", "filename": 1}, {"line": 466, "name": "fn2", "filename": 1}, {"line": 1316, "name": "flex_attention", "filename": 10}, {"line": 1303, "name": "_flex_attention_hop_wrapper", "filename": 10}]}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.624000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "0a08617336a65119a060bbb5cc88acf6"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490624241.5,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.625000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.626000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.626000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 0, "source": "L['args'][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.686000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.686000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.686000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 2, "source": "L['args'][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.687000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 320, "size": 524288}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.687000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.687000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 3, "source": "L['args'][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 4, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 4, "source": "L['args'][4][0]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.688000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 5, "source": "L['args'][4][1]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.689000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 6, "source": "L['args'][4][2]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.690000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.690000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.690000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 7, "source": "L['args'][4][3]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 8, "source": "L['args'][4][4]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.691000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 9, "source": "L['args'][4][5]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 320, "size": 64}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.692000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 10, "source": "L['args'][4][6]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.693000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 320, "size": 1024}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.693000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 320}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.693000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 320, "id": 11, "source": "L['args'][4][7]"}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.697000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_args_0_": [1, 4, 512, 64], "l_args_1_": [1, 4, 512, 64], "l_args_2_": [1, 4, 512, 64], "l_args_4_0_": [1, 1, 16], "l_args_4_1_": [1, 1, 16, 16], "l_args_4_2_": [1, 1, 16], "l_args_4_3_": [1, 1, 16, 16], "l_args_4_4_": [1, 1, 16], "l_args_4_5_": [1, 1, 16, 16], "l_args_4_6_": [1, 1, 16], "l_args_4_7_": [1, 1, 16, 16], "getitem": [1, 4, 512, 64], "getitem_1": [1, 4, 512]}}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "9bb6d56ee069d3045a9d8d21bcfdcd31"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_args_0_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_1_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_2_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_args_4_0_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_1_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_2_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_3_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_4_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_5_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_args_4_6_: "i32[1, 1, 16][16, 16, 1]cuda:0", L_args_4_7_: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	        l_args_0_ = L_args_0_
	        l_args_1_ = L_args_1_
	        l_args_2_ = L_args_2_
	        l_args_4_0_ = L_args_4_0_
	        l_args_4_1_ = L_args_4_1_
	        l_args_4_2_ = L_args_4_2_
	        l_args_4_3_ = L_args_4_3_
	        l_args_4_4_ = L_args_4_4_
	        l_args_4_5_ = L_args_4_5_
	        l_args_4_6_ = L_args_4_6_
	        l_args_4_7_ = L_args_4_7_
	        
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1304 in _flex_attention_hop_wrapper, code: return flex_attention_hop(*args, **kwargs)
	        score_mod_0 = self.score_mod_0
	        mask_fn_0 = self.mask_fn_0
	        flex_attention = torch.ops.higher_order.flex_attention(l_args_0_, l_args_1_, l_args_2_, score_mod_0, (l_args_4_0_, l_args_4_1_, l_args_4_2_, l_args_4_3_, l_args_4_4_, l_args_4_5_, l_args_4_6_, l_args_4_7_, 128, 128, mask_fn_0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  l_args_0_ = l_args_1_ = l_args_2_ = score_mod_0 = l_args_4_0_ = l_args_4_1_ = l_args_4_2_ = l_args_4_3_ = l_args_4_4_ = l_args_4_5_ = l_args_4_6_ = l_args_4_7_ = mask_fn_0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0]
	        getitem_1: "f32[1, 4, 512][2048, 512, 1]cuda:0" = flex_attention[1];  flex_attention = None
	        return (getitem, getitem_1)
	        
	    class score_mod_0(torch.nn.Module):
	        def forward(self, child: "f32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0", child_4: "i32[][]cuda:0"):
	            return child
	            
	    class mask_fn_0(torch.nn.Module):
	        def forward(self, child: "i32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
	            return ge
	            
V1206 15:24:50.698000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "c8302b73abb12ac1bce37cef0c47d244"}
	{
	"name": "backend_compile",
	"ts": 1733527490698251.5,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "0/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.698000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6b9dbcf1651e5e99de517f92d22baa7b"}
	{
	"name": "backend_compile",
	"ts": 1733527490698529.8,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "0/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.709000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "a32f9935e2d4f1dc06e2de195d9c24b2"}
	
	TREE_GUARD_MANAGER:
	+- RootGuardManager
	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:484 in init_ambient_guards
	| +- GLOBAL_STATE: ___check_global_state()
	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
	| +- GuardManager: source=L['args'], accessed_by=DictGetItemGuardAccessor('args')
	| | +- TYPE_MATCH: ___check_type_id(L['args'], 8812224)                        
	| | +- LENGTH_CHECK: len(L['args']) == 7                                         
	| | +- GuardManager: source=L['args'][0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][0], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['args'][0], L['args'][1], L['args'][2], L['args'][4][0], L['args'][4][1], L['args'][4][2], L['args'][4][3], L['args'][4][4], L['args'][4][5], L['args'][4][6], L['args'][4][7])
	| | +- GuardManager: source=L['args'][1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][1], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][1], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['args'][2], accessed_by=TupleGetItemGuardAccessor(2)
	| | | +- TENSOR_MATCH: check_tensor(L['args'][2], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | | +- NO_HASATTR: hasattr(L['args'][2], '_dynamo_dynamic_indices') == False   
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['args'][3], accessed_by=TupleGetItemGuardAccessor(3)
	| | | +- GuardManager: source=L['args'][3].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][3].__code__, 140062269255152)     
	| | +- GuardManager: source=L['args'][4], accessed_by=TupleGetItemGuardAccessor(4)
	| | | +- TYPE_MATCH: ___check_type_id(L['args'][4], 8812224)                     
	| | | +- LENGTH_CHECK: len(L['args'][4]) == 11                                     
	| | | +- GuardManager: source=L['args'][4][0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][0], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][0], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][1], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][1], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][2], accessed_by=TupleGetItemGuardAccessor(2)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][2], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][2], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][3], accessed_by=TupleGetItemGuardAccessor(3)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][3], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][3], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][4], accessed_by=TupleGetItemGuardAccessor(4)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][4], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][4], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][5], accessed_by=TupleGetItemGuardAccessor(5)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][5], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][5], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][6], accessed_by=TupleGetItemGuardAccessor(6)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][6], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][6], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][7], accessed_by=TupleGetItemGuardAccessor(7)
	| | | | +- TENSOR_MATCH: check_tensor(L['args'][4][7], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | | +- NO_HASATTR: hasattr(L['args'][4][7], '_dynamo_dynamic_indices') == False
	| | | | +- NO_TENSOR_ALIASING
	| | | +- GuardManager: source=L['args'][4][8], accessed_by=TupleGetItemGuardAccessor(8)
	| | | | +- EQUALS_MATCH: L['args'][4][8] == 128                                      
	| | | +- GuardManager: source=L['args'][4][9], accessed_by=TupleGetItemGuardAccessor(9)
	| | | | +- EQUALS_MATCH: L['args'][4][9] == 128                                      
	| | | +- GuardManager: source=L['args'][4][10], accessed_by=TupleGetItemGuardAccessor(10)
	| | | | +- GuardManager: source=L['args'][4][10].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(L['args'][4][10].__code__, 140062268556768) 
	| | +- GuardManager: source=L['args'][5], accessed_by=TupleGetItemGuardAccessor(5)
	| | | +- EQUALS_MATCH: L['args'][5] == 0.125                                       
	| | +- GuardManager: source=L['args'][6], accessed_by=TupleGetItemGuardAccessor(6)
	| | | +- DICT_LENGTH: len(L['args'][6]) == 4                                      
	| | | +- GuardManager: source=L['args'][6]['PRESCALE_QK'], accessed_by=DictGetItemGuardAccessor('PRESCALE_QK')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['PRESCALE_QK'], 8907584)       
	| | | +- GuardManager: source=L['args'][6]['ROWS_GUARANTEED_SAFE'], accessed_by=DictGetItemGuardAccessor('ROWS_GUARANTEED_SAFE')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['ROWS_GUARANTEED_SAFE'], 8907584)
	| | | +- GuardManager: source=L['args'][6]['BLOCKS_ARE_CONTIGUOUS'], accessed_by=DictGetItemGuardAccessor('BLOCKS_ARE_CONTIGUOUS')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['BLOCKS_ARE_CONTIGUOUS'], 8907584)
	| | | +- GuardManager: source=L['args'][6]['OUTPUT_LOGSUMEXP'], accessed_by=DictGetItemGuardAccessor('OUTPUT_LOGSUMEXP')
	| | | | +- ID_MATCH: ___check_obj_id(L['args'][6]['OUTPUT_LOGSUMEXP'], 8908032)  
	| +- GuardManager: source=L['kwargs'], accessed_by=DictGetItemGuardAccessor('kwargs')
	| | +- DICT_LENGTH: not L['kwargs']                                             
	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
	| | +- GuardManager: source=G['flex_attention_hop'], accessed_by=DictGetItemGuardAccessor('flex_attention_hop')
	| | | +- TYPE_MATCH: ___check_type_id(G['flex_attention_hop'], 96230624)         
	| | | +- GuardManager: source=G['flex_attention_hop'].__name__, accessed_by=GetAttrGuardAccessor(__name__)
	| | | | +- EQUALS_MATCH: G['flex_attention_hop'].__name__ == 'flex_attention'        
	| | +- GuardManager: source=G['_140062263790704_c4'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c4')
	| | | +- GuardManager: source=G['_140062263790704_c4'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c4'].Tensor, 82181376)  
	| | | | +- GuardManager: source=G['_140062263790704_c4'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
	| | | | | +- GuardManager: source=G['_140062263790704_c4'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c4'].Tensor.__bases__[0], 140062119703136)
	| | +- GuardManager: source=G['__builtins_dict___8'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___8')
	| | | +- GuardManager: source=G['__builtins_dict___8']['len'], accessed_by=DictGetItemGuardAccessor('len')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['len'], 140062269592480)
	| | | +- GuardManager: source=G['__builtins_dict___8']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['sum'], 140062269593600)
	| | | +- GuardManager: source=G['__builtins_dict___8']['list'], accessed_by=DictGetItemGuardAccessor('list')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['list'], 8841312)  
	| | | +- GuardManager: source=G['__builtins_dict___8']['type'], accessed_by=DictGetItemGuardAccessor('type')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['type'], 8810240)  
	| | | +- GuardManager: source=G['__builtins_dict___8']['tuple'], accessed_by=DictGetItemGuardAccessor('tuple')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['tuple'], 8812224) 
	| | | +- GuardManager: source=G['__builtins_dict___8']['object'], accessed_by=DictGetItemGuardAccessor('object')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['object'], 8810976)
	| | | +- GuardManager: source=G['__builtins_dict___8']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___8']['isinstance'], 140062269592160)
	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes, accessed_by=GetAttrGuardAccessor(num_nodes)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves, accessed_by=GetAttrGuardAccessor(num_leaves)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children, accessed_by=GetAttrGuardAccessor(num_children)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children == 0
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, accessed_by=GetAttrGuardAccessor(children_specs)
	| | | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, 8841312)
	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
	| | | | +- KeyValueManager pair at index=1
	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
	
V1206 15:24:50.710000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "6d01a47ed965240f30333b7963f37f52"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490710248.2,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "0/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.712000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"aot_autograd_cumulative_compile_time_us": 278, "dynamo_cumulative_compile_time_us": 86006, "frame_key": "1", "co_name": "_flex_attention_hop_wrapper", "co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py", "co_firstlineno": 1303, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 58, "shape_env_guard_count": 0, "graph_op_count": 3, "graph_node_count": 17, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.713000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 0, "frame_compile_id": 0, "attempt": 0, "has_payload": "bf4cb9e110f5e8f2910d1d589e140933"}
	{
	"name": "dynamo",
	"ts": 1733527490713016.2,
	"args": {
	"compile_id": "0/0",
	"frame_key": "1",
	"co_name": "_flex_attention_hop_wrapper",
	"co_filename": "/data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py",
	"co_firstlineno": 1303,
	"cache_size": 0,
	"accumulated_cache_size": 0,
	"guard_count": 58,
	"shape_env_guard_count": 0,
	"graph_op_count": 3,
	"graph_node_count": 17,
	"graph_input_count": 11,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": [],
	"compliant_custom_ops": [],
	"restart_reasons": [],
	"dynamo_time_before_restart_s": 0.0,
	"has_guarded_code": true,
	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 9223372036854775807, \"accumulated_cache_size_limit\": 9223372036854775807, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.715000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "34b5378127dd1adc3ac18035ff2352ab"}
	{
	"name": "dynamo",
	"ts": 1733527490715020.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.715000 1667746 torch/_dynamo/convert_frame.py:961] {"dynamo_start": {"stack": [{"line": 1439, "name": "<module>", "filename": 1}, {"line": 15, "name": "run_tests", "filename": 2}, {"line": 39, "name": "run_tests", "filename": 3}, {"line": 1353, "name": "run_tests", "filename": 4}, {"line": 102, "name": "__init__", "filename": 5}, {"line": 274, "name": "runTests", "filename": 5}, {"line": 217, "name": "run", "filename": 6}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 84, "name": "__call__", "filename": 7}, {"line": 122, "name": "run", "filename": 7}, {"line": 678, "name": "__call__", "filename": 8}, {"line": 3234, "name": "run", "filename": 4}, {"line": 3206, "name": "_run_custom", "filename": 4}, {"line": 623, "name": "run", "filename": 8}, {"line": 579, "name": "_callTestMethod", "filename": 8}, {"line": 3099, "name": "wrapper", "filename": 4}, {"line": 81, "name": "inner", "filename": 9}, {"line": 81, "name": "inner", "filename": 9}, {"line": 490, "name": "test_flex_attention_caching", "filename": 1}, {"line": 465, "name": "fn2", "filename": 1}]}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.715000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "514fa302b9c8e853a86097889a21966b"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527490715568.8,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.716000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 0, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.717000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 0, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 0, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab890>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.717000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 0, "source": "L['q']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.723000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 1, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.723000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 1, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 1, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982ab950>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.723000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 1, "source": "L['k']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.724000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 2, "describer_id": 335, "size": 524288}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.724000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 2, "ndim": 4, "dtype": "torch.float32", "device": "device(type='cuda', index=0)", "size": [1, 4, 512, 64], "is_leaf": true, "stride": [131072, 32768, 64, 1], "storage": 2, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5730>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.724000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 2, "source": "L['v']"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.729000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 3, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.730000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 3, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 3, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982aaed0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.730000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 3, "source": "L['block_mask'].kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.795000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 4, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.795000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 5, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 4, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5850>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.796000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 5, "source": "L['block_mask'].kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.796000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 5, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.797000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 6, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 5, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5790>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.797000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 6, "source": "L['block_mask'].full_kv_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.798000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 6, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.798000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 7, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 6, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c58b0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.798000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 7, "source": "L['block_mask'].full_kv_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.799000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 7, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.799000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 8, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 7, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5c10>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.799000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 8, "source": "L['block_mask'].q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.800000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 8, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.800000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 9, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 8, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5b50>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.800000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 9, "source": "L['block_mask'].q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.801000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 9, "describer_id": 335, "size": 64}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.801000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 10, "ndim": 3, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16], "is_leaf": true, "stride": [16, 16, 1], "storage": 9, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5cd0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.801000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 10, "source": "L['block_mask'].full_q_num_blocks"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.802000 1667746 torch/_subclasses/meta_utils.py:241] {"describe_storage": {"id": 10, "describer_id": 335, "size": 1024}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.802000 1667746 torch/_subclasses/meta_utils.py:454] {"describe_tensor": {"id": 11, "ndim": 4, "dtype": "torch.int32", "device": "device(type='cuda', index=0)", "size": [1, 1, 16, 16], "is_leaf": true, "stride": [256, 256, 16, 1], "storage": 10, "view_func": "<built-in method _view_func_unsafe of Tensor object at 0x7f5f982c5bb0>", "describer_id": 335}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.802000 1667746 torch/_subclasses/meta_utils.py:1779] {"describe_source": {"describer_id": 335, "id": 11, "source": "L['block_mask'].full_q_indices"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:50.806000 1667746 torch/_dynamo/output_graph.py:1336] {"dynamo_output_graph": {"sizes": {"l_q_": [1, 4, 512, 64], "l_k_": [1, 4, 512, 64], "l_v_": [1, 4, 512, 64], "l_block_mask_kv_num_blocks": [1, 1, 16], "l_block_mask_kv_indices": [1, 1, 16, 16], "l_block_mask_full_kv_num_blocks": [1, 1, 16], "l_block_mask_full_kv_indices": [1, 1, 16, 16], "l_block_mask_q_num_blocks": [1, 1, 16], "l_block_mask_q_indices": [1, 1, 16, 16], "l_block_mask_full_q_num_blocks": [1, 1, 16], "l_block_mask_full_q_indices": [1, 1, 16, 16], "out": [1, 4, 512, 64]}}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "482a8ec46bfbc33e69d27b2c7e75e43d"}
	class GraphModule(torch.nn.Module):
	    def forward(self, L_q_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_k_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_v_: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", L_block_mask_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_kv_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_kv_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", L_block_mask_full_q_num_blocks: "i32[1, 1, 16][16, 16, 1]cuda:0", L_block_mask_full_q_indices: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	        l_q_ = L_q_
	        l_k_ = L_k_
	        l_v_ = L_v_
	        l_block_mask_kv_num_blocks = L_block_mask_kv_num_blocks
	        l_block_mask_kv_indices = L_block_mask_kv_indices
	        l_block_mask_full_kv_num_blocks = L_block_mask_full_kv_num_blocks
	        l_block_mask_full_kv_indices = L_block_mask_full_kv_indices
	        l_block_mask_q_num_blocks = L_block_mask_q_num_blocks
	        l_block_mask_q_indices = L_block_mask_q_indices
	        l_block_mask_full_q_num_blocks = L_block_mask_full_q_num_blocks
	        l_block_mask_full_q_indices = L_block_mask_full_q_indices
	        
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        score_mod_0 = self.score_mod_0
	        mask_fn_0 = self.mask_fn_0
	        flex_attention = torch.ops.higher_order.flex_attention(l_q_, l_k_, l_v_, score_mod_0, (l_block_mask_kv_num_blocks, l_block_mask_kv_indices, l_block_mask_full_kv_num_blocks, l_block_mask_full_kv_indices, l_block_mask_q_num_blocks, l_block_mask_q_indices, l_block_mask_full_q_num_blocks, l_block_mask_full_q_indices, 128, 128, mask_fn_0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  l_q_ = l_k_ = l_v_ = score_mod_0 = l_block_mask_kv_num_blocks = l_block_mask_kv_indices = l_block_mask_full_kv_num_blocks = l_block_mask_full_kv_indices = l_block_mask_q_num_blocks = l_block_mask_q_indices = l_block_mask_full_q_num_blocks = l_block_mask_full_q_indices = mask_fn_0 = None
	        out: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (out,)
	        
	    class score_mod_0(torch.nn.Module):
	        def forward(self, child: "f32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0", child_4: "i32[][]cuda:0"):
	            return child
	            
	    class mask_fn_0(torch.nn.Module):
	        def forward(self, child: "i32[][]cuda:0", child_1: "i32[][]cuda:0", child_2: "i32[][]cuda:0", child_3: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = child_2 >= child_3;  child_2 = child_3 = None
	            return ge
	            
V1206 15:24:50.807000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1d40dde81d7850073106ae3cf2229e48"}
	{
	"name": "backend_compile",
	"ts": 1733527490807005.5,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.807000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ca5c77d2199fb0261e09e1800f877003"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1733527490807277.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.807000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a8bfb8ba2a3fefc2b30a7110871d7e3a"}
	{
	"name": "_recursive_pre_grad_passes",
	"ts": 1733527490807804.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.810000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "3b3ffc9d594503c9a35ba96fe32b6bde"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1733527490809984.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.852000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:214] {"artifact": {"name": "aot_forward_graph_fw_metadata", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d952b3071dd9ba606a04d644841d9aba"}
	ViewAndMutationMeta(input_info=[InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True),
	                               InputAliasInfo(is_leaf=True,
	                                              mutates_data=False,
	                                              mutates_metadata=False,
	                                              mutations_hidden_from_autograd=True,
	                                              mutations_under_no_grad_or_inference_mode=False,
	                                              mutation_inductor_storage_resize=False,
	                                              mutates_storage_metadata=False,
	                                              requires_grad=False,
	                                              keep_input_mutations=True)],
	                    output_info=[OutputAliasInfo(output_type=<OutputType.non_alias: 1>,
	                                                raw_type=<class 'torch._subclasses.functional_tensor.FunctionalTensor'>,
	                                                base_idx=None,
	                                                dynamic_dims=set(),
	                                                requires_grad=False,
	                                                functional_tensor=None)],
	                    num_intermediate_bases=0,
	                    keep_input_mutations=True,
	                    traced_tangents=[],
	                    subclass_inp_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=1,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=2,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=3,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=4,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=5,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=6,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=7,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=8,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=9,
	                                                      memory_format=None),
	                                      PlainTensorMeta(unwrapped_idx=10,
	                                                      memory_format=None)],
	                    subclass_fw_graph_out_meta=[PlainTensorMeta(unwrapped_idx=0,
	                                                               memory_format=None)],
	                    subclass_tangent_meta=[],
	                    is_train=False,
	                    traced_tangent_metas=None,
	                    num_symints_saved_for_bw=None,
	                    grad_enabled_mutation=None,
	                    deterministic=None,
	                    static_input_indices=[],
	                    tokens={},
	                    indices_of_inputs_that_requires_grad_with_mutations_in_bw=[],
	                    bw_donated_idxs=None,
	                    num_backward_tokens=0)
V1206 15:24:50.853000 1667746 torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py:232] {"aot_inference_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ecc276ab6d6e35dc44c5f2bbc091ace0"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	    class sdpa_score0(torch.nn.Module):
	        def forward(self, arg0_1: "f32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0", arg4_1: "i32[][]cuda:0"):
	            return arg0_1
	            
	    class sdpa_mask0(torch.nn.Module):
	        def forward(self, arg0_1: "i32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
	            return ge
	            
V1206 15:24:50.853000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "de5dcb5ec710567996f5f45241f82616"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1733527490853953.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.854000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f7192ebd762dae9c8cf2d7972d6bf48e"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1733527490854257.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.855000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "bfb4a0e07ad2c3524d5a0a3c69449196"}
	{
	"name": "_recursive_joint_graph_passes",
	"ts": 1733527490855286.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.855000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8d3b095b4ae2ad23dfdd31e5633971b0"}
	{
	"name": "inductor_compile",
	"ts": 1733527490855525.2,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.863000 1667746 torch/_inductor/compile_fx.py:835] {"artifact": {"name": "fx_graph_runnable", "encoding": "string"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2685e2fe8ae1c10315e3657921ebac7a"}
	
	import torch
	from torch import tensor, device
	import torch.fx as fx
	from torch._dynamo.testing import rand_strided
	from math import inf
	import torch._inductor.inductor_prims
	
	import torch._dynamo.config
	import torch._inductor.config
	import torch._functorch.config
	import torch.fx.experimental._config
	torch._dynamo.config.cache_size_limit = 8
	torch._dynamo.config.accumulated_cache_size_limit = 256
	torch._dynamo.config.traceable_tensor_subclasses = set()
	torch._dynamo.config.suppress_errors = False
	torch._dynamo.config.allowed_functions_module_string_ignorelist = {'torch._decomp', 'torch.testing', 'torch._prims', 'torch._refs', 'torch.distributions'}
	torch._dynamo.config._ddp_optimization_mode = ['ddp_optimizer', 'python_reducer', 'python_reducer_without_compiled_forward', 'no_optimization']
	torch._dynamo.config.raise_on_ctx_manager_usage = True
	torch._dynamo.config._save_config_ignore = {'repro_after', 'skipfiles_inline_module_allowlist', 'repro_level', 'constant_functions'}
	torch._dynamo.config.log_compilation_metrics = False
	torch._dynamo.config.reorderable_logging_functions = set()
	torch._dynamo.config._autograd_backward_strict_mode_banned_ops = ['stride', 'requires_grad', 'storage_offset', 'layout', 'data', 'is_coalesced', 'is_complex', 'is_conj', 'is_contiguous', 'is_cpu', 'is_cuda', 'is_distributed', 'is_floating_point', 'is_inference', 'is_ipu', 'is_leaf', 'is_maia', 'is_meta', 'is_mkldnn', 'is_mps', 'is_mtia', 'is_neg', 'is_nested', 'is_nonzero', 'is_pinned', 'is_quantized', 'is_same_size', 'is_set_to', 'is_shared', 'is_signed', 'is_sparse', 'is_sparse_csr', 'is_vulkan', 'is_xla', 'is_xpu']
	torch._dynamo.config.fake_tensor_cache_enabled = True
	torch._dynamo.config.fake_tensor_cache_crosscheck_enabled = True
	torch._dynamo.config.compiled_autograd_kwargs_override = {}
	torch._inductor.config.fx_graph_cache = True
	torch._inductor.config.fx_graph_remote_cache = False
	torch._inductor.config.autotune_local_cache = False
	torch._inductor.config.autotune_remote_cache = False
	torch._inductor.config.bundled_autotune_remote_cache = False
	torch._inductor.config.pre_grad_fusion_options = {}
	torch._inductor.config.post_grad_fusion_options = {}
	torch._inductor.config.fx_passes_numeric_check = {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}
	torch._inductor.config.reorder_for_compute_comm_overlap_passes = ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']
	torch._inductor.config._fuse_ddp_communication_passes = ['fuse_ddp_with_concat_op', 'schedule_comm_wait']
	torch._inductor.config.aot_inductor.metadata = {}
	torch._inductor.config.aot_inductor.presets = {}
	torch._inductor.config.rocm.arch = []
	torch._inductor.config.rocm.ck_supported_arch = ['gfx90a', 'gfx940', 'gfx941', 'gfx942']
	torch._inductor.config._save_config_ignore = ['trace.upload_tar', 'joint_custom_pre_pass', 'joint_custom_post_pass', 'pre_grad_custom_pass']
	torch._inductor.config._cache_config_ignore_prefix = ['trace', 'cuda.cutlass_dir', 'worker_start_method', 'compile_threads', 'post_grad_custom_post_pass', 'post_grad_custom_pre_pass', 'always_complex_memory_overlap_TESTING_ONLY']
	torch._inductor.config.external_matmul = []
	torch._functorch.config.functionalize_rng_ops = False
	torch._functorch.config.enable_autograd_cache = True
	torch._functorch.config.fake_tensor_allow_unsafe_data_ptr_access = True
	torch._functorch.config.unlift_effect_tokens = True
	
	
	
	isolate_fails_code_str = None
	
	
	
	
	# torch version: 2.6.0a0+giteece9ec
	# torch cuda version: 12.2
	# torch git version: eece9ecd62cae84bc2f915fc48cffe43e30256aa
	
	
	# CUDA Info: 
	# nvcc: NVIDIA (R) Cuda compiler driver 
	# Copyright (c) 2005-2023 NVIDIA Corporation 
	# Built on Tue_Aug_15_22:02:13_PDT_2023 
	# Cuda compilation tools, release 12.2, V12.2.140 
	# Build cuda_12.2.r12.2/compiler.33191640_0 
	
	# GPU Hardware Info: 
	# NVIDIA H100 : 8 
	
	
	from torch.nn import *
	class Repro(torch.nn.Module):
	    def __init__(self) -> None:
	        super().__init__()
	        self.sdpa_score0 = <lambda>()
	        self.sdpa_mask0 = <lambda>()
	
	    
	    
	    def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	def load_args(reader):
	    buf0 = reader.storage(None, 524288, device=device(type='cuda', index=0))
	    reader.tensor(buf0, (1, 4, 512, 64), is_leaf=True)  # arg0_1
	    buf1 = reader.storage(None, 524288, device=device(type='cuda', index=0))
	    reader.tensor(buf1, (1, 4, 512, 64), is_leaf=True)  # arg1_1
	    buf2 = reader.storage(None, 524288, device=device(type='cuda', index=0))
	    reader.tensor(buf2, (1, 4, 512, 64), is_leaf=True)  # arg2_1
	    buf3 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf3, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg3_1
	    buf4 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf4, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg4_1
	    buf5 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf5, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg5_1
	    buf6 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf6, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg6_1
	    buf7 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf7, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg7_1
	    buf8 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf8, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg8_1
	    buf9 = reader.storage(None, 64, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf9, (1, 1, 16), dtype=torch.int32, is_leaf=True)  # arg9_1
	    buf10 = reader.storage(None, 1024, device=device(type='cuda', index=0), dtype_hint=torch.int32)
	    reader.tensor(buf10, (1, 1, 16, 16), dtype=torch.int32, is_leaf=True)  # arg10_1
	load_args._version = 0
	mod = Repro()
	if __name__ == '__main__':
	    from torch._dynamo.repro.after_aot import run_repro
	    with torch.no_grad():
	        run_repro(mod, load_args, accuracy=False, command='run', save_dir=None, tracing_mode='real', check_str=None)
	        # To run it separately, do 
	        # mod, args = run_repro(mod, load_args, accuracy=False, command='get_args', save_dir=None, tracing_mode='real', check_str=None)
	        # mod(*args)
V1206 15:24:50.866000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "dfa5a083adca30c7330158b04eac504c"}
	{
	"name": "_recursive_post_grad_passes",
	"ts": 1733527490866428.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.867000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "92fea8d9f2c2af6ab5a7b7726a25e9c7"}
	{
	"name": "_recursive_post_grad_passes",
	"ts": 1733527490867486.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.868000 1667746 torch/_inductor/compile_fx.py:898] {"inductor_post_grad_graph": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ecc276ab6d6e35dc44c5f2bbc091ace0"}
	class <lambda>(torch.nn.Module):
	    def forward(self, arg0_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg1_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg2_1: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0", arg3_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg4_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg5_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg6_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg7_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg8_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0", arg9_1: "i32[1, 1, 16][16, 16, 1]cuda:0", arg10_1: "i32[1, 1, 16, 16][256, 256, 16, 1]cuda:0"):
	         # File: /data/users/xmfan/a/pytorch/torch/nn/attention/flex_attention.py:1286 in flex_attention, code: out, lse = flex_attention_hop(
	        sdpa_score0 = self.sdpa_score0
	        sdpa_mask0 = self.sdpa_mask0
	        flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None
	        getitem: "f32[1, 4, 512, 64][131072, 32768, 64, 1]cuda:0" = flex_attention[0];  flex_attention = None
	        return (getitem,)
	        
	    class sdpa_score0(torch.nn.Module):
	        def forward(self, arg0_1: "f32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0", arg4_1: "i32[][]cuda:0"):
	            return arg0_1
	            
	    class sdpa_mask0(torch.nn.Module):
	        def forward(self, arg0_1: "i32[][]cuda:0", arg1_1: "i32[][]cuda:0", arg2_1: "i32[][]cuda:0", arg3_1: "i32[][]cuda:0"):
	             # File: /data/users/xmfan/a/pytorch/test/inductor/test_codecache.py:453 in <lambda>, code: lambda b, h, q, kv: q >= kv, None, None, 2048, 2048
	            ge: "b8[][]cuda:0" = torch.ops.aten.ge.Tensor(arg2_1, arg3_1);  arg2_1 = arg3_1 = None
	            return ge
	            
V1206 15:24:50.869000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "2c4e2b8ec85ec846c7bfb5687ddbf4eb"}
	{
	"name": "GraphLowering.run",
	"ts": 1733527490869198.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.889000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4cf953068399b7703999c88a61b534a8"}
	{
	"name": "GraphLowering.run",
	"ts": 1733527490889235.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.889000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8512f5af880647d00bdcc6cb55c979b2"}
	{
	"name": "GraphLowering.compile_to_fn",
	"ts": 1733527490889665.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.889000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ba5d98c4ab6385588c824e9ba44d02fe"}
	{
	"name": "code_gen",
	"ts": 1733527490889960.8,
	"args": {
	"fn_name": "GraphLowering.compile_to_module",
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.890000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "d2916632a86a0b07c4acc6705aa100b6"}
	{
	"name": "GraphLowering.codegen",
	"ts": 1733527490890197.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.891000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e0af71ca8453aaa26c1d6ae93f92a7e0"}
	{
	"name": "Scheduler.__init__",
	"ts": 1733527490891018.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.892000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "6c816233fc4897d4d8763de32b6039c5"}
	{
	"name": "Scheduler.fused_nodes",
	"ts": 1733527490892516.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.892000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "398867777111a34eaebfec44bc9bc0e0"}
	{
	"name": "Scheduler.fused_nodes",
	"ts": 1733527490892802.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.895000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4348580c03d299cf39e06d0f77951243"}
	{
	"name": "Scheduler.__init__",
	"ts": 1733527490895243.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.895000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "37369aaefd17f9a63be8fcd31ae1e98c"}
	{
	"name": "Scheduler.codegen",
	"ts": 1733527490895454.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.900000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "1b9346463f71bc6d13a662a5be2be40b"}
	{
	"name": "Scheduler.codegen",
	"ts": 1733527490900442.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.900000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "54ce92e7f7ba26f1e39c7094e3194d16"}
	{
	"name": "PythonWrapperCodegen.generate",
	"ts": 1733527490900664.2,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.901000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "505184b3d58c91c853c86f14cf8b9246"}
	{
	"name": "PythonWrapperCodegen.generate",
	"ts": 1733527490901874.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.902000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fac85554adaad8ec3734fccc5ed505af"}
	{
	"name": "GraphLowering.codegen",
	"ts": 1733527490902086.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.902000 1667746 torch/_inductor/graph.py:2030] {"inductor_output_code": {"filename": "/tmp/tmpsn77tlsm/gg/cgg6gukzbkegr5nqxvu5c6limjypanrlgt5z3sah5y2lzsjiugp7.py"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "45e11dd29b32835a24d78e4a6ab62a07"}
	# AOT ID: ['2_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile
	from torch._inductor.codegen.memory_planning import _align as align
	from torch import device, empty_strided
	from torch._inductor.async_compile import AsyncCompile
	from torch._inductor.select_algorithm import extern_kernels
	from torch._inductor.codegen.multi_kernel import MultiKernelCall
	import torch._inductor.kernel.flex_attention
	from torch._C import _cuda_getCurrentRawStream as get_raw_stream
	import triton
	import triton.language as tl
	from torch._inductor.runtime.triton_heuristics import (
	    grid,
	    split_scan_grid,
	    grid_combo_kernels,
	    start_graph,
	    end_graph,
	    cooperative_reduction_grid,
	)
	
	aten = torch.ops.aten
	inductor_ops = torch.ops.inductor
	_quantized = torch.ops._quantized
	assert_size_stride = torch._C._dynamo.guards.assert_size_stride
	empty_strided_cpu = torch._C._dynamo.guards._empty_strided_cpu
	empty_strided_cuda = torch._C._dynamo.guards._empty_strided_cuda
	empty_strided_xpu = torch._C._dynamo.guards._empty_strided_xpu
	reinterpret_tensor = torch._C._dynamo.guards._reinterpret_tensor
	alloc_from_pool = torch.ops.inductor._alloc_from_pool
	async_compile = AsyncCompile()
	empty_strided_p2p = torch._C._distributed_c10d._SymmetricMemory.empty_strided_p2p
	
	
	# kernel path: /tmp/tmpsn77tlsm/4s/c4sfc3fngwlnbougmdi6kqtjf5vlgrowumdc7a2kkh5gxxav655m.py
	# Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
	# Source node to ATen node mapping:
	#   flex_attention => flex_attention
	# Graph fragment:
	#   %flex_attention : [num_users=1] = call_function[target=torch.ops.higher_order.flex_attention](args = (%arg0_1, %arg1_1, %arg2_1, %sdpa_score0, (%arg3_1, %arg4_1, %arg5_1, %arg6_1, %arg7_1, %arg8_1, %arg9_1, %arg10_1, 128, 128, %sdpa_mask0), 0.125, {PRESCALE_QK: False, ROWS_GUARANTEED_SAFE: False, BLOCKS_ARE_CONTIGUOUS: False, OUTPUT_LOGSUMEXP: True}, (), ()), kwargs = {})
	triton_tem_fused_0 = async_compile.triton('triton_tem_fused_0', '''
	import triton
	import triton.language as tl
	from triton.compiler.compiler import AttrsDescriptor
	
	from torch._inductor.runtime import triton_helpers, triton_heuristics
	from torch._inductor.runtime.triton_helpers import libdevice, math as tl_math
	from torch._inductor.runtime.hints import AutotuneHint, ReductionHint, TileHint, DeviceProperties
	
	@triton_heuristics.template(
	    num_stages=3,
	    num_warps=4,
	    triton_meta={'signature': {'arg_Q': '*fp32', 'arg_K': '*fp32', 'arg_V': '*fp32', 'arg_LSE': '*fp32', 'arg_KV_NUM_BLKS': '*i32', 'arg_KV_IDX': '*i32', 'arg_FULL_KV_NUM_BLKS': '*i32', 'arg_FULL_KV_IDX': '*i32', 'out_ptr0': '*fp32'}, 'device': DeviceProperties(type='cuda', index=0, cc=90, major=9, regs_per_multiprocessor=65536, max_threads_per_multi_processor=2048, multi_processor_count=132, warp_size=32), 'constants': {}, 'configs': [AttrsDescriptor(divisible_by_16=(0, 1, 2, 3, 4, 5, 6, 7, 8), equal_to_1=())]},
	    inductor_meta={'kernel_name': 'triton_tem_fused_0', 'backend_hash': '562E840D41CEB1D8E51DE726EA7592B0C37A0C6FBD72CF4E958863CEC11D41A7', 'are_deterministic_algorithms_enabled': False, 'assert_indirect_indexing': True, 'autotune_local_cache': False, 'autotune_pointwise': True, 'autotune_remote_cache': False, 'force_disable_caches': False, 'dynamic_scale_rblock': True, 'max_autotune': False, 'max_autotune_pointwise': False, 'min_split_scan_rblock': 256, 'spill_threshold': 16, 'store_cubin': False},
	)
	@triton.jit
	def triton_tem_fused_0(arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0):
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	    Q = arg_Q
	    K = arg_K
	    V = arg_V
	    LSE = arg_LSE
	    KV_NUM_BLKS = arg_KV_NUM_BLKS
	    KV_IDX = arg_KV_IDX
	    FULL_KV_NUM_BLKS = arg_FULL_KV_NUM_BLKS
	    FULL_KV_IDX = arg_FULL_KV_IDX
	
	    # Sub notation for this kernel:
	    #
	    # Q: Query, K: Key, V: Value
	    # M: Number of queries, N: Number of keys/values, D: Model dimension
	    # QK_HEAD_DIM: The dimension of the query and key embeddings
	    # V_HEAD_DIM: The dimension of the value embeddings
	    # z: Batch size, h: Number of heads, m: Number of queries per head, k: Number of keys per head
	    # GQA_SHARED_HEADS: number of query heads sharing one kv head in GQA setups.
	    #
	    # The following FULL_* and PARTIAL_* is defined in the block sparse mask grid, rather than the thread block grid.
	    # KV_NUM_BLKS: The number of KV blocks (that may or may not require masking) for each query.
	    # KV_IDX: The indices of KV blocks (that may or may not require masking) for each query.
	    # FULL_KV_NUM_BLKS: The number of fully unmasked KV blocks (so we don't need masking) for each query.
	    # FULL_KV_IDX: The indices of fully unmasked KV blocks (so we don't need masking) for each query.
	    #
	    # OUTPUT_LOGSUMEXP: We only need to store the logsumexp if we require grad
	    #
	    # (Modifiable) Performance tuning options
	    # BLOCK_M: The thread block size across the seqlen dim of Q.
	    # BLOCK_N: Iterate over BLOCK_N across the seqlen dim of K/V in each thread block.
	
	    # The below are kernel options that can be applied for certain score_mods,
	    # or involve a numerics vs. perf tradeoff
	    # PRESCALE_QK: Whether to pre-scale QK by 1/sqrt(d) and change of base. Has
	    # about 20% more numerical error, but slightly faster.
	    # ROWS_GUARANTEED_SAFE: Is it guaranteed that at least one value in each row
	    # is not masked out? If so, we can skip an extra safety check
	    # BLOCKS_ARE_CONTIGUOUS: Is it guaranteed that all blocks in the mask are
	    # contiguous? If so, we don't need to do an indirect jump for every block
	
	    tl.static_assert(SPARSE_Q_BLOCK_SIZE >= BLOCK_M and SPARSE_Q_BLOCK_SIZE % BLOCK_M == 0)
	    tl.static_assert(SPARSE_KV_BLOCK_SIZE >= BLOCK_N and SPARSE_KV_BLOCK_SIZE % BLOCK_N == 0)
	
	    # Define strides of inputs
	    stride_qz, stride_qh, stride_qm, stride_qk = 131072, 32768, 64, 1
	    stride_kz, stride_kh, stride_kn, stride_kk = 131072, 32768, 64, 1
	    stride_vz, stride_vh, stride_vn, stride_vk = 131072, 32768, 64, 1
	
	    ZQ = 1
	    HQ = 4
	    Q_LEN = 512
	    ZKV = 1
	    KV_LEN = 512
	
	    MATMUL_PRECISION = Q.dtype.element_ty
	
	    q_start = tl.program_id(0)
	    off_zq = tl.program_id(1) // HQ
	    off_hq = tl.program_id(1) % HQ
	
	    # We support two cases for batch dimension. a) (ZKV == ZQ) where off_zkv = off_zq.
	    # b) (ZKV == 1 and ZQ > 1) where KV is broadcasted along the batch dimension and off_zkv=0.
	    off_zkv = off_zq % ZKV
	    off_hkv = off_hq // GQA_SHARED_HEADS
	    off_g = off_hq % GQA_SHARED_HEADS
	
	    q_offset = off_zq * stride_qz + off_hq * stride_qh
	    k_offset = off_zkv * stride_kz + off_hkv * stride_kh
	    v_offset = off_zkv * stride_vz + off_hkv * stride_vh
	
	    Q = Q + q_offset
	    K = K + k_offset
	    V = V + v_offset
	
	    SPARSE_Z = 1
	    SPARSE_HQ = 1
	
	    sparse_idx_z = off_zq % SPARSE_Z
	    sparse_idx_hq = off_hq % SPARSE_HQ
	
	    SPARSE_Q_MULTIPLE: tl.constexpr = (SPARSE_Q_BLOCK_SIZE // BLOCK_M)
	    SPARSE_KV_MULTIPLE: tl.constexpr = (SPARSE_KV_BLOCK_SIZE // BLOCK_N)
	
	    stride_kv_num_blks_h = 16
	    stride_kv_idx_h = 256
	    stride_kv_idx_m = 16
	
	    # initialize pointer to m and l
	    m_i = tl.zeros([BLOCK_M], dtype=tl.float32) - float("inf")
	    l_i = tl.zeros([BLOCK_M], dtype=tl.float32)
	    acc = tl.zeros([BLOCK_M, V_HEAD_DIM], dtype=tl.float32)
	
	    offs_m = q_start * BLOCK_M + tl.arange(0, BLOCK_M)
	
	    # KV_IDX and KV_NUM_BLKS are always contiguous.
	    sparse_hz_offset = sparse_idx_z * SPARSE_HQ + sparse_idx_hq
	    sparse_kv_num_blks_offset = sparse_hz_offset * stride_kv_num_blks_h + q_start // SPARSE_Q_MULTIPLE
	    sparse_kv_idx_offset = sparse_hz_offset * stride_kv_idx_h + (q_start // SPARSE_Q_MULTIPLE) * stride_kv_idx_m  # noqa: B950
	
	    Q_block_ptr = tl.make_block_ptr(
	        base=Q,
	        shape=(Q_LEN, QK_HEAD_DIM),
	        strides=(stride_qm, stride_qk),
	        offsets=(q_start * BLOCK_M, 0),
	        block_shape=(BLOCK_M, QK_HEAD_DIM),
	        order=(1, 0)
	    )
	
	    # load q: it stays in SRAM throughout the inner loop.
	    if IS_DIVISIBLE:
	        q = tl.load(Q_block_ptr)
	    else:
	        # boundary check is not free, so we only do it when necessary.
	        q = tl.load(Q_block_ptr, boundary_check=(0,), padding_option = "zero")
	
	    # ~~~~~~~~~~~~~~ normal blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    # We don't know anything "special" about these blocks, so we need to apply
	    # both score_mod and mask_mod to it
	    kv_indices = KV_IDX + sparse_kv_idx_offset
	    kv_start = tl.load(kv_indices) * SPARSE_KV_BLOCK_SIZE # first kv block we're loading
	    kv_num_blocks = tl.load(KV_NUM_BLKS + sparse_kv_num_blks_offset)
	    block_n_end = tl.minimum(kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N), 1))
	
	    K_block_ptr = tl.make_block_ptr(
	        base=K,
	        shape=(QK_HEAD_DIM, KV_LEN),
	        strides=(stride_kk, stride_kn),
	        offsets=(0, kv_start),
	        block_shape=(QK_HEAD_DIM, BLOCK_N),
	        order=(0, 1)
	    )
	    V_block_ptr = tl.make_block_ptr(
	        base=V,
	        shape=(KV_LEN, V_HEAD_DIM),
	        strides=(stride_vn, stride_vk),
	        offsets=(kv_start, 0),
	        block_shape=(BLOCK_N, V_HEAD_DIM),
	        order=(1, 0)
	    )
	    offs_n = kv_start + tl.arange(0, BLOCK_N)
	
	    acc, l_i, m_i = forward_inner(
	        arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	        q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	        acc, l_i, m_i,
	        off_zq, off_hq, offs_m[:, None], offs_n[None, :],
	        kv_indices, kv_num_blocks,
	        0, block_n_end,
	        MATMUL_PRECISION,
	        IS_FULL_BLOCKS=False,
	    )
	
	    # ~~~~~~~~~~~~~~ "full" blocks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	    # We know these blocks are guaranteed to be "full", so we don't need to
	    # apply mask_mod to them - only score_mod
	    if HAS_FULL_BLOCKS:
	        # FULL_KV_IDX and FULL_KV_NUM_BLKS are always contiguous.
	        kv_indices = FULL_KV_IDX + sparse_kv_idx_offset
	        kv_start = tl.load(kv_indices) * SPARSE_KV_BLOCK_SIZE # first kv block we're loading
	        kv_num_blocks = tl.load(FULL_KV_NUM_BLKS + sparse_kv_num_blks_offset)
	        block_n_end = tl.minimum(kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N), 1))
	
	        K_block_ptr = tl.make_block_ptr(
	            base=K,
	            shape=(QK_HEAD_DIM, KV_LEN),
	            strides=(stride_kk, stride_kn),
	            offsets=(0, kv_start),
	            block_shape=(QK_HEAD_DIM, BLOCK_N),
	            order=(0, 1)
	        )
	        V_block_ptr = tl.make_block_ptr(
	            base=V,
	            shape=(KV_LEN, V_HEAD_DIM),
	            strides=(stride_vn, stride_vk),
	            offsets=(kv_start, 0),
	            block_shape=(BLOCK_N, V_HEAD_DIM),
	            order=(1, 0)
	        )
	        offs_n = kv_start + tl.arange(0, BLOCK_N)
	
	        acc, l_i, m_i = forward_inner(
	            arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	            q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	            acc, l_i, m_i,
	            off_zq, off_hq, offs_m[:, None], offs_n[None, :],
	            kv_indices, kv_num_blocks,
	            0, block_n_end,
	            MATMUL_PRECISION,
	            IS_FULL_BLOCKS=True,
	        )
	
	
	    # [Note] Handle fully masked out rows:
	    # Li will be the sum(e^(-inf)) == 0.0 for masked out rows, mi will be -inf.
	    # We set Li to 1.0 which will result in lse/out = 0.0 | after the log(li) + mi(0.0) step
	    l_i = tl.where(l_i == 0.0, 1, l_i)
	
	    acc = acc / l_i[:, None]
	    idx_zq = tl.program_id(1) // HQ
	    idx_hq = tl.program_id(1) % HQ
	    idx_m = offs_m[:, None]
	    idx_d = tl.arange(0, V_HEAD_DIM)[None, :]
	
	    mask = idx_m < Q_LEN
	
	    xindex = idx_d + 64*idx_m + 32768*idx_hq + 131072*idx_zq
	    tl.store(out_ptr0 + (tl.broadcast_to(idx_d + 64*idx_m + 32768*idx_hq, acc.shape)), acc, mask)
	
	    if OUTPUT_LOGSUMEXP:
	        off_hz = tl.program_id(1)
	        l_ptrs = LSE + off_hz * Q_LEN + offs_m
	        lse = m_i + tl.math.log2(l_i)
	        if IS_DIVISIBLE:
	            tl.store(l_ptrs, lse)
	        else:
	            tl.store(l_ptrs, lse, mask=offs_m < Q_LEN)
	
	@triton.jit
	def forward_inner(
	    arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	    q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	    # accumulated values
	    acc, l_i, m_i,
	    # Offsets used as inputs to score_mod & mask_mod
	    # of size [BLOCK_M, BLOCK_N] or scalar.
	    off_z, off_h, offs_m, offs_n,
	    # blocksparse data
	    kv_indices, kv_num_blocks,
	    # start kv and end kv block
	    block_n_start, block_n_end,
	    MATMUL_PRECISION,
	    IS_FULL_BLOCKS,
	):
	    # Redefines all kernel parameters (BLOCK_M, etc.) so we don't need to plumb them all through
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	
	
	    SPARSE_KV_MULTIPLE: tl.constexpr = (SPARSE_KV_BLOCK_SIZE // BLOCK_N)
	    RCP_LN2: tl.constexpr = 1.44269504
	
	    if PRESCALE_QK:
	        q = (q * SM_SCALE * RCP_LN2).to(MATMUL_PRECISION)
	
	    # loop over k, v and update accumulator until block_n_end
	    for start_n in range(block_n_start, block_n_end):
	        if IS_DIVISIBLE:
	            acc, l_i, m_i = forward_block_mn(
	                arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	                q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	                # accumulated values
	                acc, l_i, m_i,
	                # Offsets
	                off_z, off_h, offs_m, offs_n,
	                MATMUL_PRECISION, RCP_LN2,
	                IS_FULL_BLOCKS,
	            )
	        else:
	            # Benchmark shows even we applied mod & mask to each block for non divisible seqlen,
	            # it's on par or slightly faster than only applying to the last block in fwd.
	            # However, we choose different strategy for bwd, where we only apply mod & mask
	            # to the last block because it's faster a lot.
	            acc, l_i, m_i = forward_block_mn(
	                arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	                q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	                # accumulated values
	                acc, l_i, m_i,
	                # Offsets
	                off_z, off_h, offs_m, offs_n,
	                MATMUL_PRECISION, RCP_LN2,
	                IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=True,
	            )
	
	        # update pointers
	        offset = get_offset_for_next_block(
	            start_n, kv_indices, kv_num_blocks,
	            SPARSE_KV_BLOCK_SIZE, SPARSE_KV_MULTIPLE, BLOCK_N, BLOCKS_ARE_CONTIGUOUS
	        )
	
	        V_block_ptr = tl.advance(V_block_ptr, (offset, 0))
	        K_block_ptr = tl.advance(K_block_ptr, (0, offset))
	
	        offs_n = offs_n + offset
	
	    return acc, l_i, m_i
	
	
	@triton.jit
	def get_offset_for_next_block(
	    loop_iter, col_indices, total_blocks,
	    SPARSE_BLOCK, SPARSE_BLOCK_MULTIPLE, BLOCK,
	    BLOCKS_ARE_CONTIGUOUS: tl.constexpr
	):
	    if BLOCKS_ARE_CONTIGUOUS:
	        return BLOCK
	    cur_block_idx = loop_iter // SPARSE_BLOCK_MULTIPLE
	    cur_block = tl.load(col_indices + cur_block_idx, eviction_policy="evict_last")
	    next_block = tl.load(col_indices + cur_block_idx + 1, eviction_policy="evict_last", mask=cur_block_idx + 1 < total_blocks)
	    needs_jump = (loop_iter + 1) % SPARSE_BLOCK_MULTIPLE == 0
	    jump_to_block = (next_block - cur_block ) * SPARSE_BLOCK - (SPARSE_BLOCK_MULTIPLE - 1) * BLOCK
	    offset = jump_to_block * needs_jump + (1 - needs_jump) * BLOCK
	    return offset
	
	@triton.jit
	def forward_block_mn(
	    arg_Q, arg_K, arg_V, arg_LSE, arg_KV_NUM_BLKS, arg_KV_IDX, arg_FULL_KV_NUM_BLKS, arg_FULL_KV_IDX, out_ptr0,
	    q, K_block_ptr, V_block_ptr, Q_LEN, KV_LEN,
	    # accumulated values
	    acc, l_i, m_i,
	    # Offsets
	    off_z, off_h, offs_m, offs_n,
	    MATMUL_PRECISION, RCP_LN2,
	    IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=False,
	):
	    # Redefines all kernel parameters (BLOCK_M, etc.) so we don't need to plumb them all through
	    PRESCALE_QK : tl.constexpr = False
	    ROWS_GUARANTEED_SAFE : tl.constexpr = False
	    BLOCKS_ARE_CONTIGUOUS : tl.constexpr = False
	    OUTPUT_LOGSUMEXP : tl.constexpr = True
	    FLOAT32_PRECISION : tl.constexpr = 'ieee'
	    IS_DIVISIBLE : tl.constexpr = True
	    SM_SCALE : tl.constexpr = 0.125
	    GQA_SHARED_HEADS : tl.constexpr = 1
	    HAS_FULL_BLOCKS : tl.constexpr = True
	    QK_HEAD_DIM : tl.constexpr = 64
	    V_HEAD_DIM : tl.constexpr = 64
	    BLOCK_M : tl.constexpr = 128
	    BLOCK_N : tl.constexpr = 32
	    SPARSE_Q_BLOCK_SIZE : tl.constexpr = 128
	    SPARSE_KV_BLOCK_SIZE : tl.constexpr = 128
	
	
	    # -- load k --
	    if IS_DIVISIBLE:
	        k = tl.load(K_block_ptr)
	    else:
	        k = tl.load(K_block_ptr, boundary_check=(1,), padding_option = "zero")
	    # -- compute qk ---
	    qk = tl.dot(q, k, input_precision=FLOAT32_PRECISION) # TODO: use cuda matmul when q_len <= 2.
	    if not PRESCALE_QK:
	        qk *= SM_SCALE
	    # ~~~~~~~~~~~~~~~~~~~ Apply score modification  ~~~~~~~~~~~~~~~~~~~
	    if CHECK_BLOCK_BOUNDARY:
	        # If this is the last block of a non divisible seqlen, we still need to load [BLOCK_M, BLOCK_N] elements,
	        # which is larger than the actual number of elements. To avoid access memory out of bound,
	        # we need to mask out the elements that are out of Q_LEN & KV_LEN.
	        m = offs_m % Q_LEN
	        n = offs_n % KV_LEN
	    else:
	        m = offs_m
	        n = offs_n
	
	    post_mod_scores = (qk)
	
	
	    if CHECK_BLOCK_BOUNDARY:
	        # Mask out the elements that are out of the KV_LEN for non divisible seqlen.
	        post_mod_scores = tl.where(offs_n < KV_LEN, post_mod_scores, float("-inf"))
	
	    if not IS_FULL_BLOCKS:
	        tmp0 = (m) >= (n)
	        mask_mod_output = tmp0
	
	
	        if CHECK_BLOCK_BOUNDARY:
	            mask_mod_output = tl.where(offs_n < KV_LEN, mask_mod_output, False)
	        # apply mask for partially unmasked blocks
	        post_mod_scores = tl.where(mask_mod_output, post_mod_scores, float("-inf"))
	
	    # TODO: In the case that score_mod is linear, this can be LICMed
	    if not PRESCALE_QK:
	        post_mod_scores *= RCP_LN2
	    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	
	    # -- compute scaling constant ---
	    m_ij = tl.maximum(m_i, tl.max(post_mod_scores, 1))
	    if not ROWS_GUARANTEED_SAFE:
	        masked_out_rows = (m_ij == float("-inf"))
	        m_ij_masked = tl.where(masked_out_rows, 0, m_ij)
	    else:
	        m_ij_masked = m_ij
	
	    alpha = tl.math.exp2(m_i - m_ij_masked)
	    p = tl.math.exp2(post_mod_scores - m_ij_masked[:, None])
	
	    # NB: l_i update is pulled up here since it's a bit faster
	    # NB: For headdim=256, it's faster to move it back down to after m_i =
	    # m_ij
	    l_i = l_i * alpha + tl.sum(p, 1)
	    # # -- scale and update acc --
	    acc = acc * alpha[:, None]
	
	    if IS_DIVISIBLE:
	        v = tl.load(V_block_ptr)
	    else:
	        v = tl.load(V_block_ptr, boundary_check=(0,), padding_option = "zero")
	    acc = tl.dot(p.to(MATMUL_PRECISION), v, acc, input_precision=FLOAT32_PRECISION)
	
	    # -- update m_i
	    m_i = m_ij
	
	    return acc, l_i, m_i
	''', device_str='cuda')
	meta0 = {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True, 'FLOAT32_PRECISION': "'ieee'", 'IS_DIVISIBLE': True, 'SM_SCALE': 0.125, 'GQA_SHARED_HEADS': 1, 'HAS_FULL_BLOCKS': True, 'QK_HEAD_DIM': 64, 'V_HEAD_DIM': 64, 'BLOCK_M': 128, 'BLOCK_N': 32, 'SPARSE_Q_BLOCK_SIZE': 128, 'SPARSE_KV_BLOCK_SIZE': 128}
	
	
	async_compile.wait(globals())
	del async_compile
	
	def call(args):
	    arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1 = args
	    args.clear()
	    assert_size_stride(arg0_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg1_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg2_1, (1, 4, 512, 64), (131072, 32768, 64, 1))
	    assert_size_stride(arg3_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg4_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg5_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg6_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg7_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg8_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    assert_size_stride(arg9_1, (1, 1, 16), (16, 16, 1))
	    assert_size_stride(arg10_1, (1, 1, 16, 16), (256, 256, 16, 1))
	    buf0 = empty_strided_cuda((1, 4, 512), (2048, 512, 1), torch.float32)
	    with torch.cuda._DeviceGuard(0):
	        torch.cuda.set_device(0)
	        buf1 = empty_strided_cuda((1, 4, 512, 64), (131072, 32768, 64, 1), torch.float32)
	        # Topologically Sorted Source Nodes: [flex_attention], Original ATen: []
	        stream0 = get_raw_stream(0)
	        triton_tem_fused_0.run(arg0_1, arg1_1, arg2_1, buf0, arg3_1, arg4_1, arg5_1, arg6_1, buf1, grid=torch._inductor.kernel.flex_attention.flex_attention_grid(1, 4, 512, 64, meta0), stream=stream0)
	        del arg0_1
	        del arg1_1
	        del arg2_1
	        del arg3_1
	        del arg4_1
	        del arg5_1
	        del arg6_1
	        del buf0
	    return (buf1, )
	
	
	def benchmark_compiled_module(times=10, repeat=10):
	    from torch._dynamo.testing import rand_strided
	    from torch._inductor.utils import print_performance
	    arg0_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg1_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg2_1 = rand_strided((1, 4, 512, 64), (131072, 32768, 64, 1), device='cuda:0', dtype=torch.float32)
	    arg3_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg4_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg5_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg6_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg7_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg8_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg9_1 = rand_strided((1, 1, 16), (16, 16, 1), device='cuda:0', dtype=torch.int32)
	    arg10_1 = rand_strided((1, 1, 16, 16), (256, 256, 16, 1), device='cuda:0', dtype=torch.int32)
	    fn = lambda: call([arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1])
	    return print_performance(fn, times=times, repeat=repeat)
	
	
	if __name__ == "__main__":
	    from torch._inductor.wrapper_benchmark import compiled_module_main
	    compiled_module_main('None', benchmark_compiled_module)
	
V1206 15:24:50.902000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7466f2d4d61dfc1690516495f37138a1"}
	{
	"name": "PyCodeCache.load_by_key_path",
	"ts": 1733527490902875.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:50.916000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c77ba023033616cfe1f998da2fef4de2"}
	{
	"name": "async_compile.wait",
	"ts": 1733527490916172.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.126000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "07f6598362ca233fcf839b83bd69ac34"}
	{
	"name": "async_compile.wait",
	"ts": 1733527494126241.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.126000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "7a197b62b8d8d93579b42512d9f7a21f"}
	{
	"name": "PyCodeCache.load_by_key_path",
	"ts": 1733527494126712.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.127000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "31dde7ddbbe82ef40947971a84c580bc"}
	{
	"name": "code_gen",
	"ts": 1733527494127082.2,
	"args": {
	"fn_name": "GraphLowering.compile_to_module",
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.127000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "e32bbd039d869c1f2168335fbd92d76f"}
	{
	"name": "GraphLowering.compile_to_fn",
	"ts": 1733527494127503.0,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.128000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "fcdee348cde25167c18c8dcbcf951830"}
	{
	"name": "TritonBundler.collect",
	"ts": 1733527494128406.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "B",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.129000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "4c9ec5066284075c872b557d2cf8b5e2"}
	{
	"name": "TritonBundler.collect",
	"ts": 1733527494129814.8,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.134000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "ab69c518495e835b07c9f8fc02777882"}
	{
	"name": "fx_graph_cache_miss",
	"ts": 1733527490856164.5,
	"args": {
	"key": "fmjzafxtfmjyhhkl3afwwy3gjat2obuydjrd6epcvjw6l5s2v7yd",
	"components": [
	"[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
	"[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2",
	"[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4",
	"[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5",
	"[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6",
	"[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8",
	"[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True",
	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128",
	"[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False",
	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
	"[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
	],
	"cache_event_time": 1733527490862706044,
	"cache_state": "miss",
	"triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])",
	"time_taken_ns": 3272216042,
	"compile_id": "1/0"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V1206 15:24:54.135000 1667746 torch/_inductor/compile_fx.py:751] {"artifact": {"name": "fx_graph_cache_miss", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "c9c893906731ce022aed26f875fcd820"}
	{"key": "fmjzafxtfmjyhhkl3afwwy3gjat2obuydjrd6epcvjw6l5s2v7yd", "components": ["[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False", "[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False", "[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2", "[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4", "[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5", "[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6", "[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8", "[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10", "[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)", "[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)", "[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>", "[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}", "[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}", "[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order", "[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False", "[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False", "[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True", "[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}", "[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False", "[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True", "[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default", "[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300", "[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False", "[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP", "[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True", "[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False", "[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0", "[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0", "[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ", "[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm", "[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT", "[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False", "[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4", "[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1", "[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8", "[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None", "[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128", "[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False", "[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None", "[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096", "[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True", "[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128", "[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False", "[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True", "[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False", "[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True", "[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False", "[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False", "[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ", "[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False", "[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None", "[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None", "[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None", "[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []", "[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']", "[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True", "[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False", "[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp", "[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton", "[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host", "[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda", "[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021", "[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False", "[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False", "[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None", "[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"], "cache_event_time": 1733527490862706044, "cache_state": "miss", "triton_bundler_meta": "TritonBundlerMetadata(cached_kernel_names=['triton_tem_fused_0'])", "time_taken_ns": 3272216042, "compile_id": "1/0"}
V1206 15:24:54.135000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "8448954b82f0bb83dace88546d83f2e6"}
	{
	"name": "inductor_compile",
	"ts": 1733527494135534.2,
	"args": {
	"fn_name": "compile_fx_inner",
	"compile_id": "1/0",
	"is_backward": false,
	"cache_state": "miss",
	"cache_event_time": 1733527490856164558,
	"key": "fmjzafxtfmjyhhkl3afwwy3gjat2obuydjrd6epcvjw6l5s2v7yd",
	"components": [
	"[osxu7jvxrxvfgwz7co25iilkcoi7r6wwkfxjkdf7i35nrmqqnas] gm: <lambda>(\n  (sdpa_score0): <lambda>()\n  (sdpa_mask0): <lambda>()\n)\n\n\n\ndef forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1):\n    sdpa_score0 = self.sdpa_score0\n    sdpa_mask0 = self.sdpa_mask0\n    flex_attention = torch.ops.higher_order.flex_attention(arg0_1, arg1_1, arg2_1, sdpa_score0, (arg3_1, arg4_1, arg5_1, arg6_1, arg7_1, arg8_1, arg9_1, arg10_1, 128, 128, sdpa_mask0), 0.125, {'PRESCALE_QK': False, 'ROWS_GUARANTEED_SAFE': False, 'BLOCKS_ARE_CONTIGUOUS': False, 'OUTPUT_LOGSUMEXP': True}, (), ());  arg0_1 = arg1_1 = arg2_1 = sdpa_score0 = arg3_1 = arg4_1 = arg5_1 = arg6_1 = arg7_1 = arg8_1 = arg9_1 = arg10_1 = sdpa_mask0 = None\n    getitem = flex_attention[0];  flex_attention = None\n    return (getitem,)\n    \n# To see more debug info, please use `graph_module.print_readable()`",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[0]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[1]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[avf2u3luxvyabchjhbddapcjn5gev47wfdtkrprayuhv6lf2z6u] example_inputs[2]: TensorMetadata(dtype=torch.float32, shape=torch.Size([1, 4, 512, 64]), stride=(131072, 32768, 64, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[3]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[4]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[5]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[6]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[7]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[8]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[zsk3gejenkcvvwhiyk36u5zdnlrcs6wgy3pina3csuierfd2zri] example_inputs[9]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16]), stride=(16, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[hnbjjzmb63q27mbr22eubaelyb423burv27meouma6ccysmwu6g] example_inputs[10]: TensorMetadata(dtype=torch.int32, shape=torch.Size([1, 1, 16, 16]), stride=(256, 256, 16, 1), device=device(type='cuda', index=0), layout=torch.strided, memory_format=torch.contiguous_format, storage_offset=0, storage_bytes=None, requires_grad=False, is_quantized=False, is_conj=False, is_neg=False, is_inference=False, is_sparse=False, is_coalesced=None, dense_dim=None, sparse_dim=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[aot_mode]: False",
	"[lmglpn4zi7vob56n34r2j2rk7flv5xfgrcvmo7xcpirqsitygqx] fx_kwargs[boxed_forward_device_index]: BoxedDeviceIndex(value=None)",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[cpp_wrapper]: False",
	"[xq2hdkbfkbcuye6rgtypayrkhqf4cntij2dsd24rei3lsknakkf] fx_kwargs[cudagraphs]: BoxedBool(value=False)",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[extern_node_serializer]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] fx_kwargs[is_backward]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] fx_kwargs[is_inference]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] fx_kwargs[layout_opt]: None",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] fx_kwargs[static_input_idxs]: []",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inputs_to_check[0]: 0",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inputs_to_check[1]: 1",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inputs_to_check[2]: 2",
	"[kcuxe2zwm3mzv2uk6adm6iskoy35bqfv725twacrdewod2dbl5d] inputs_to_check[3]: 3",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inputs_to_check[4]: 4",
	"[qs5hilycp4ew4ivtc7m5jaxp7q4pm5slioxw3fi3ur6ei65ybz4] inputs_to_check[5]: 5",
	"[agkvbkaha53nbz3aeeuhvxjvvc4glhfjofzkg6g2qjoo2e5otcx] inputs_to_check[6]: 6",
	"[j3s5elu6itwgjafc7rzhy4whrbufl6kfmlufjhh25grt643bk5f] inputs_to_check[7]: 7",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inputs_to_check[8]: 8",
	"[qlgfiyqewrmkgqth2qm6wkq2ja5lzkapg3ypgnvoyfqqnidaoj3] inputs_to_check[9]: 9",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inputs_to_check[10]: 10",
	"[du4vyrfyozrfxcf6kk6ma7oqwatapifazeelfsawmsiu6gjdtxp] deterministic_algorithms_settings: (False, False, True)",
	"[qiptf2633zubseuei4bkisoq3not35l6lud6p23p4qmcsxiw2uq] cuda_matmul_settings: (False, True, True)",
	"[svke3c6tlftklkb3z4oku47swr6tsobagmp2dlfuxz5mzlefnlw] torch_version: <bytes>",
	"[poglqjwowp4gnkmehjby2lvdjrwuo5tbxa2gayd6smgasl2hgsd] system_info[device]: {'name': 'NVIDIA H100'}",
	"[zhk6mbgyespwiq2kqql3qkec5aaj467ll4jlebwu35uhzivsy6u] system_info[version]: {'triton': '3.0.0+dedb7bdf33dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-835d4fc33500e1accafc5c5e00f4f73d87432c114860c04b68849bf6f942b8e5-dc767c8fadcf23ea82d79e257c37d44077eae7f681cf967565fd43e9c017937b-23d635e690d670bf61798e1259674b78c0ed5ba222ab6a455f329f27a758fc2d-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-20b017e9c4d858ab05e783f77df50b86c6d6eee5d79f3f4b158562b4a54f8443-f44338a31e0534290b08653050804c3fabbde403a6d3004ae04f0c28495f0802-e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855-a979896b9c0acfd41dd953b90bdc4b10968f7c0b45a286eae3f829aaddb2bb55-da771298f7bc45d24a61f35ef51742304421df1ab49d50bf1fc510dd5a46ea4b-dfa6e0edf1d43ef41636f628897e0ef501fd2b01159a3357f5c7130be523863b-71330f394e584b0df29595d49f6ac8ac0c5503db9147090dc58ad888cebac7be-f24adfd52383f7866791ebaa5d45a5d2cc826e56ee2fd285f438e85d201fe643-a34be0d3ae4b3ac9aede195cfda42f8a0a097b2bc9642fb59673ce6b3b607f10-36130a37af1b19a0dec569aa08d30b00c74c8f02b6b632999d86dea169146792-36d42f0429aae027cb985b53b9abc616fae4dad9e0ea03953e1e9fb46d0fb9a0-e5d2cb724c08d0ef4130f3ba858d22cf21f834bfd970a5388aa6ad2a6bab91f9', 'cuda': '12.2'}",
	"[2qv36hmzaa3pkd42j2bzjmfpjvr32xx7ahdfiiepfwehvvcq45y] system_info[hash]: b2f391b9cfc799798db5e5f32606e9451cbd00c288b4ee846b49ad55396a924d",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[TYPE_CHECKING]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[enable_auto_functionalized_v2]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_progress]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[verbose_progress]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fx_graph_remote_cache]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bundle_triton_into_fx_graph_cache]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_local_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[bundled_autotune_remote_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_disable_caches]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[sleep_sec_TESTING_ONLY]: None",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[custom_op_default_layout_constraint]: needs_fixed_stride_order",
	"[pikr7bbcoixfzftsazp5ggufhdklj24babfry77bl4nuvyrrcp4] inductor_config[triton_kernel_default_layout_constraint]: needs_fixed_stride_order",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp_wrapper]: False",
	"[b4ha3ravs3qv237q65hpfqegbnoww7tf2ahcbu2i7xo6te5spqs] inductor_config[c_shim_version]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[dce]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[static_weight_shapes]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[size_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[nan_asserts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pick_loop_orders]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[inplace_buffers]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[allow_buffer_reuse]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[memory_planning]: False",
	"[x75won4jmsgeb63pcvwr2y4eteyzzdhmf5rv6xhjppie4hx2yu5] inductor_config[memory_pool]: intermediates",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_harness]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[epilogue_fusion]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[epilogue_fusion_first]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[pattern_matcher]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[b2b_gemm_pass]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_pre_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[joint_custom_post_pass]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[pre_grad_custom_pass]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_cat_fx_passes]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[efficient_conv_bn_eval_fx_passes]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[is_predispatch]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[group_fusion]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[batch_fusion]: True",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[pre_grad_fusion_options]: {}",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[post_grad_fusion_options]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_locality]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[dynamic_scale_rblock]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_fuse_int_mm_with_mul]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[use_mixed_mm]: True",
	"[zwmmbkdkarexuhbigurz5lfnhx64tht7fznecjkrvznh6rzivbv] inductor_config[fx_passes_numeric_check]: {'pre_grad': False, 'precision': 0.0001, 'num_iterations': 1, 'requires_optimizer': True}",
	"[v2td5s4lnsvyxvaevy4chx6kc5h3mm2axazbgwimqule5zrzao7] inductor_config[mixed_mm_choice]: heuristic",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[reorder_for_compute_comm_overlap]: False",
	"[ssupi7bu3rrhdpg2jyegzncu3kg3nnhklyliqvutaxgs7y7k3dx] inductor_config[reorder_for_compute_comm_overlap_passes]: ['reorder_compute_for_overlap', 'sink_waits', 'raise_comms']",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[reorder_for_peak_memory]: True",
	"[lxxtoqhcoepwfokeiibd575gnxo3uzwiv4hmpomlwkpzqz3qzsh] inductor_config[estimate_op_runtime]: default",
	"[yezuzjtg4h3jjur4jwtwiehbyixa7eonq4tqsqmwqve2lvvmrem] inductor_config[intra_node_bw]: 300",
	"[5fxczt3ciyxitdhizb7sfsgn7fhpczcqsngttnt5ot2wyctk7co] inductor_config[inter_node_bw]: 25",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_pointwise]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[max_autotune_gemm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_same_precision]: False",
	"[2y7luesktjrque3nr7qtxnum2mkbeegzdrsvkm3rvdlhqboajhx] inductor_config[max_autotune_gemm_backends]: ATEN,TRITON,CPP",
	"[uqlsbif4zxd75vt522p52txyuguieipi2lwz5g5awt56lccqk7s] inductor_config[max_autotune_conv_backends]: ATEN,TRITON",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[max_autotune_gemm_search_space]: DEFAULT",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[autotune_fallback_to_aten]: True",
	"[wft6ljqsfr3x4m7fa5zuyb7cwknky4irrxz4bjr6uzr2yiopxqj] inductor_config[unbacked_symint_fallback]: 8192",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[search_autotune_cache]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[save_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_in_subproc]: False",
	"[iglov24t7x5ruci344aer2tm6nqshi4veuw4wxlssxtu46cx76m] inductor_config[max_autotune_subproc_result_timeout_seconds]: 60.0",
	"[bh33ranllcgilhgmgr3qvygzxjm6isq5iexnfm3zx6fnr2zwlp2] inductor_config[max_autotune_subproc_graceful_timeout_seconds]: 1.0",
	"[pwoh5aypf4fxbntdvwt67rppxorqos6xr3w7qzeun6kblbfg2ga] inductor_config[max_autotune_subproc_terminate_timeout_seconds]: 2.0",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[autotune_multi_device]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_tuning]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[coordinate_descent_check_all_directions]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[coordinate_descent_search_radius]: 1",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[autoheuristic_collect]: ",
	"[jwbrgxes7vjqumngs5hyj6gn5nytv2whnppnzngvaagfmawhkkd] inductor_config[autoheuristic_use]: mixed_mm",
	"[jvchmi66fvqzlemhr5fcqorz5trfdtdalzfagtj2aolmimwqhdq] inductor_config[autoheuristic_log_path]: DEFAULT",
	"[4p2fdjlvxrcw7c7fvzm5huhtqxnro4kvkx56f7p5zyrxqkwooov] inductor_config[layout_opt_default]: 1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[layout_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_layout_optimization]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[keep_output_stride]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[warn_mix_layout]: False",
	"[lkkae3meylaixfif4thncru4hjqeaislawjoghffrbwuscaagei] inductor_config[realize_reads_threshold]: 4",
	"[rr5m5hsocoyodldz7vcvaizdwvm2rt34evmqdxvng7wz3tufvo6] inductor_config[realize_opcount_threshold]: 30",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[realize_acc_reads_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[fallback_random]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[implicit_fallbacks]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aggressive_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_fusion]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[enabled_metric_tables]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[loop_ordering_after_fusion]: False",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[score_fusion_memory_threshold]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[benchmark_epilogue_fusion]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[max_epilogue_benchmarked_choices]: 1",
	"[jykiys6ynafs3zdylwa5ggq6j655mxeh42d6mtdi22gffkrmiac] inductor_config[max_fusion_size]: 64",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[max_pointwise_cat_inputs]: 8",
	"[yttmfmxblgcbsvbokguzowcorrcxz5uunxtcvsbe6nijgcx45he] inductor_config[unroll_reductions_threshold]: 8",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[comment_origin]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[conv_1x1_as_mm]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[split_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_kernel]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[constant_and_index_propagation]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[always_keep_tensor_constants]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[assert_indirect_indexing]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[compute_all_bounds]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernels]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[benchmark_combo_kernel]: False",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernels_autotune]: 1",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[combo_kernel_allow_mixed_sizes]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[combo_kernel_foreach_dynamic_shapes]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[joint_graph_constant_folding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_index_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[emulate_precision_casts]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[is_nightly_or_source]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[developer_warnings]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[optimize_scatter_upon_const_tensor]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[global_cache_dir]: None",
	"[j6c55jha5r2sdys2rwq7uqhtleea5dgjcye7nicfgft36v7xfvp] inductor_config[kernel_name_max_ops]: 10",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[shape_padding]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[comprehensive_padding]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_channels_last]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[disable_padding_cpu]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[padding_alignment_bytes]: 128",
	"[dnnw5ks3yxrp7mwvihb2hh4tqx35ye637xt33x64kw4fvz2nyzg] inductor_config[padding_stride_threshold]: 1024",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[pad_outputs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[bw_outputs_user_visible]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[force_shape_pad]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[permute_fusion]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profiler_mark_wrapper_call]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[generate_intermediate_hooks]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[debug_ir_traceback]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth]: False",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[profile_bandwidth_regex]: ",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[profile_bandwidth_output]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[profile_bandwidth_with_do_bench_using_profiling]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[disable_cpp_codegen]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[freezing_discard_parameters]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[allow_stack_allocation]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[use_minimal_arrayref_interface]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[decompose_mem_bound_mm]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[assume_aligned_inputs]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[unsafe_ignore_unsupported_triton_autotune_args]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[check_stack_no_cycles_TESTING_ONLY]: False",
	"[sz3im5ogc6asp7g4uqocnovype63tkdexzfrniv6hn2oank3biu] inductor_config[cpp.threads]: -1",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.no_redundant_loops]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.dynamic_threads]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.simdlen]: None",
	"[g7rrnbg5yonzux3cfj5ovre5lob3ayda7qcfpxjvtwmiz4uicii] inductor_config[cpp.min_chunk_size]: 4096",
	"[c7zj4qytmety6keurs3hsh5wn7foxp3dqx4kym2ucszzcb2ngrf] inductor_config[cpp.cxx]: (None, 'g++')",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_kernel_profile]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.weight_prepack]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_relu_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.inject_log1p_bug_TESTING_ONLY]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.vec_isa_ok]: None",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[cpp.descriptive_names]: original_aten",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[cpp.max_horizontal_fusion_size]: 16",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.fallback_scatter_reduce_sum]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_unsafe_math_opt_flag]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_floating_point_contract_flag]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_tiling_heuristics]: True",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cpp.gemm_max_k_slices]: 1",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_cache_blocking]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cpp.gemm_thread_factors]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[cpp.enable_loop_tail_vec]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cpp.enable_concat_linear]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_trees]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_skip_dynamic_graphs]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.slow_path_cudagraph_asserts]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cudagraph_trees_history_recording]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.cudagraph_support_input_mutation]: True",
	"[ljdqgtysl3vdf7j6attlz5gmjg2ncihnveojfyubosplmkrjgra] inductor_config[triton.cudagraph_unexpected_rerecord_limit]: 128",
	"[tuax46wac7rfv2trf5gcps6vleo3cq44lbnrdxtprvo3ljjaddj] inductor_config[triton.cudagraph_dynamic_shape_warn_limit]: 50",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraph_sync]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cudagraphs_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.fast_path_cudagraph_asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.skip_cudagraph_warmup]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_graph]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.debug_sync_kernel]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.dense_indexing]: False",
	"[pr5nr4a7dthirgd2ljo3d2xakc63ywxugusu6mkmr6gmpeliyib] inductor_config[triton.max_tiles]: 2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.prefer_nd_tiling]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_pointwise]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.autotune_cublasLt]: True",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.autotune_at_compile_time]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_pointwise_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.tiling_prevents_reduction_fusion]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.unique_kernel_names]: True",
	"[yrty22bseefglnysuoec4ji7j2rnaggdj3g33zzj7avogwfmgdw] inductor_config[triton.descriptive_names]: original_aten",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.persistent_reductions]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.cooperative_reductions]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.force_cooperative_reductions]: False",
	"[vrl5ktomgtzox5xucd3np6vug3vyj6hwwzahqijuwpmamlv7ohi] inductor_config[triton.multi_kernel]: 0",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.divisible_by_16]: True",
	"[fv6slhtedtydps5s5u2etitscliblzcidyitqf7krsv4e23fzk6] inductor_config[triton.min_split_scan_rblock]: 256",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.store_cubin]: False",
	"[ebt2ncs4f5y7dn7btzi76mnouepvzad474tmp5iju4wiuumjl4s] inductor_config[triton.spill_threshold]: 16",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[triton.use_block_ptr]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[triton.inject_relu_bug_TESTING_ONLY]: None",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[triton.codegen_upcast_to_fp32]: True",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.output_path]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.debug_compile]: False",
	"[ngkkx5e6z7erl6da23zb2cmsctz4yvaqyameyg5hbqln4wrhh7x] inductor_config[aot_inductor.debug_intermediate_value_printer]: 0",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[aot_inductor.filtered_kernel_names]: None",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_in_spec]: ",
	"[v3hzzlv4tjgvp3pyhmzagjd25orl6n7nynoa7svlhhwk73b7u3c] inductor_config[aot_inductor.serialized_out_spec]: ",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.use_runtime_constant_folding]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.force_mmap_weights]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.package_cpp_only]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.metadata]: {}",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[aot_inductor.raise_error_on_ignored_optimization]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[aot_inductor.dump_aoti_minifier]: False",
	"[4bryyl4ahh5whyg3zwqebpwmjnx6w77nqgqbdjlowju6lkqtn7w] inductor_config[aot_inductor.presets]: {}",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.arch]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.version]: None",
	"[tvyftmtdmezlejo2xllu7awzv4pzc4vm4fub4b3gpl5jptjkosi] inductor_config[cuda.compile_opt_level]: -O1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_cuda_lto]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_ptxas_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.enable_debug_info]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.use_fast_math]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_max_profiling_configs]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cuda_cxx]: None",
	"[aghvyrrgwvxijco2pk5wzc3cgmmthrbmgxitiibxuuscxdwrjd3] inductor_config[cuda.cutlass_backend_min_gemm_size]: 1",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[cuda.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[cuda.cutlass_op_allowlist_regex]: None",
	"[lwkz5chtpji756gurqw4foijfi7zfgljtnn5nmnvdi2skpt4mgh] inductor_config[cuda.cutlass_op_denylist_regex]: pingpong",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[rocm.arch]: []",
	"[oartxnko2l7d67tzwwm2otcumaut3n4wwcfgz3o377hmcveu5ft] inductor_config[rocm.ck_supported_arch]: ['gfx90a', 'gfx940', 'gfx941', 'gfx942']",
	"[klfqjprnpfhcdurgvuikvc4rpd5ynkpk77toousr5h3u5roty6p] inductor_config[rocm.compile_opt_level]: -O2",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.is_debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.save_temps]: False",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.use_fast_math]: True",
	"[cev5uo2jlwdhw2uyzcm7vr6cl23azjfw437f5r5lskm7spucos6] inductor_config[rocm.flush_denormals]: True",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.print_kernel_resource_usage]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.rocm_home]: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.ck_dir]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.generate_test_runner]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] inductor_config[rocm.n_max_profiling_configs]: None",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[rocm.use_preselected_instances]: False",
	"[bsvfcwwoczx2rlkdz2eta6doujsymyihmi46hhwk6clrrvwcb6m] inductor_config[cpu_backend]: cpp",
	"[caw4ly2z672k6kjfahoxwpajp5idhhtrpgf3ma2clylcp7c7aid] inductor_config[cuda_backend]: triton",
	"[ljhgflgihidopsfsdcbqynv27nceykby3nutyd5jlcpq7n6e7l4] inductor_config[halide.cpu_target]: host",
	"[wx7vmsmrdpk5ue2txlywp3lj3faqmdjphs5fgg2ehzsyno7uovg] inductor_config[halide.gpu_target]: host-cuda",
	"[svgytlua5wcyeia7wq7e6zgh5tsueikrnzchmdmouvmkpfsc2zq] inductor_config[halide.scheduler_cuda]: Anderson2021",
	"[k5ogk6345jvklsnu7g2njqstiz2g6pm5wmqpgg3kasrmuqwjvl6] inductor_config[halide.scheduler_cpu]: Adams2019",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.asserts]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.debug]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[halide.scan_kernels]: False",
	"[h25wqx6vliw4j5rtzzbv6latydxyei3deyg6v7wzvnzryfktuki] inductor_config[external_matmul]: []",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.force_extern_kernel_in_multi_template]: False",
	"[esstihe2nyydk4mhzpvox3qkajyu5y5t23hk3fi2me7jn75xi3o] inductor_config[test_configs.runtime_triton_dtype_assert]: False",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_pre_pass: None",
	"[tquy2we2efmowuj4wuqzcfcfdcrkzkzmwdae6hprj7fa64jpusq] post_grad_custom_post_pass: None"
	],
	"cache_bypass_reason": null,
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.136000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "a9d90840e9080e38cc5cc8f43d64b9e7"}
	{
	"name": "compile_fx.<locals>.fw_compiler_base",
	"ts": 1733527494136361.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.138000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "9174f1070d6248c4511951edfe3f1dd1"}
	{
	"name": "create_aot_dispatcher_function",
	"ts": 1733527494138570.5,
	"args": {
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.138000 1667746 torch/_dynamo/utils.py:1327] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "32e73583977fb2c6cb9b34dd32b9a8f7"}
	{
	"name": "autograd_cache_bypass",
	"ts": 1733527490809971.2,
	"args": {
	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
	"cache_bypass_hard_exception": false,
	"key": null,
	"cache_state": "bypass",
	"components": [],
	"compile_id": "1/0"
	},
	"ph": "i",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0,
	"s": "p"
	}
V1206 15:24:54.138000 1667746 torch/_functorch/_aot_autograd/autograd_cache.py:763] {"artifact": {"name": "aotautograd_cache_bypass", "encoding": "json"}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "5601d02186053adcc1ba29fd248c1d20"}
	{"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention", "cache_bypass_hard_exception": false, "key": null, "cache_state": "bypass", "components": [], "compile_id": "1/0"}
V1206 15:24:54.139000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "cca36f8b4f9f5ea8ad866b857a634eb8"}
	{
	"name": "backend_compile",
	"ts": 1733527494139170.2,
	"args": {
	"fn_name": "OutputGraph.call_user_compiler",
	"compile_id": "1/0",
	"requires_subclass_dispatch": false,
	"dispatch_mode": "inference",
	"cache_state": "bypass",
	"cache_event_time": 1733527490809971264,
	"key": null,
	"components": [],
	"cache_bypass_reason": "Unsupported call_function target flex_attention. \n Function module: torch.ops.higher_order, \nFunction name: flex_attention",
	"remote_cache_enabled": false,
	"local_cache_enabled": true
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.153000 1667746 torch/_dynamo/guards.py:2315] {"dynamo_cpp_guards_str": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "b9091c16617d57e52682988042d2b9ea"}
	
	TREE_GUARD_MANAGER:
	+- RootGuardManager
	| +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None                           # _dynamo/output_graph.py:484 in init_ambient_guards
	| +- GLOBAL_STATE: ___check_global_state()
	| +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack()
	| +- GuardManager: source=L['k'], accessed_by=DictGetItemGuardAccessor('k')
	| | +- TYPE_MATCH: ___check_type_id(L['k'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['k'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['k'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING: check_no_aliasing(L['k'], L['q'], L['v'], L['block_mask'].q_indices, L['block_mask'].kv_indices, L['block_mask'].q_num_blocks, L['block_mask'].kv_num_blocks, L['block_mask'].full_q_indices, L['block_mask'].full_kv_indices, L['block_mask'].full_q_num_blocks, L['block_mask'].full_kv_num_blocks)
	| | +- GuardManager: source=L['k'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['k'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['q'], accessed_by=DictGetItemGuardAccessor('q')
	| | +- TYPE_MATCH: ___check_type_id(L['q'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['q'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['q'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['q'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['q'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['v'], accessed_by=DictGetItemGuardAccessor('v')
	| | +- TYPE_MATCH: ___check_type_id(L['v'], 82181376)                          
	| | +- TENSOR_MATCH: check_tensor(L['v'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[1, 4, 512, 64], stride=[131072, 32768, 64, 1])
	| | +- NO_HASATTR: hasattr(L['v'], '_dynamo_dynamic_indices') == False         
	| | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['v'].dim, accessed_by=GetAttrGuardAccessor(dim)
	| | +- GuardManager: source=L['v'].size, accessed_by=GetAttrGuardAccessor(size)
	| +- GuardManager: source=L['block_mask'], accessed_by=DictGetItemGuardAccessor('block_mask')
	| | +- TYPE_MATCH: ___check_type_id(L['block_mask'], 139679664)                
	| | +- GuardManager: source=L['block_mask'].mask_mod, accessed_by=GetAttrGuardAccessor(mask_mod)
	| | | +- GuardManager: source=L['block_mask'].mask_mod.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].mask_mod.__code__, 140062268556768)
	| | +- GuardManager: source=L['block_mask'].q_indices, accessed_by=GetAttrGuardAccessor(q_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].BLOCK_SIZE, accessed_by=GetAttrGuardAccessor(BLOCK_SIZE)
	| | | +- TYPE_MATCH: ___check_type_id(L['block_mask'].BLOCK_SIZE, 8812224)       
	| | | +- LENGTH_CHECK: len(L['block_mask'].BLOCK_SIZE) == 2                        
	| | | +- GuardManager: source=L['block_mask'].BLOCK_SIZE[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | +- EQUALS_MATCH: L['block_mask'].BLOCK_SIZE[0] == 128                        
	| | | +- GuardManager: source=L['block_mask'].BLOCK_SIZE[1], accessed_by=TupleGetItemGuardAccessor(1)
	| | | | +- EQUALS_MATCH: L['block_mask'].BLOCK_SIZE[1] == 128                        
	| | +- GuardManager: source=L['block_mask'].kv_indices, accessed_by=GetAttrGuardAccessor(kv_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].kv_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].kv_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].q_num_blocks, accessed_by=GetAttrGuardAccessor(q_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].q_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].q_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].kv_num_blocks, accessed_by=GetAttrGuardAccessor(kv_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].kv_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].kv_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_q_indices, accessed_by=GetAttrGuardAccessor(full_q_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_q_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_q_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_kv_indices, accessed_by=GetAttrGuardAccessor(full_kv_indices)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_kv_indices, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16, 16], stride=[256, 256, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_kv_indices, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_q_num_blocks, accessed_by=GetAttrGuardAccessor(full_q_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_q_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_q_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].full_kv_num_blocks, accessed_by=GetAttrGuardAccessor(full_kv_num_blocks)
	| | | +- TENSOR_MATCH: check_tensor(L['block_mask'].full_kv_num_blocks, Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[1, 1, 16], stride=[16, 16, 1])
	| | | +- NO_HASATTR: hasattr(L['block_mask'].full_kv_num_blocks, '_dynamo_dynamic_indices') == False
	| | | +- NO_TENSOR_ALIASING
	| | +- GuardManager: source=L['block_mask'].as_tuple, accessed_by=GetAttrGuardAccessor(as_tuple)
	| | | +- GuardManager: source=L['block_mask'].as_tuple, accessed_by=FuncDefaultsGuardAccessor
	| | | | +- GuardManager: source=L['block_mask'].as_tuple.__defaults__[0], accessed_by=GetItemGuardAccessor(0)
	| | | | | +- ID_MATCH: ___check_obj_id(L['block_mask'].as_tuple.__defaults__[0], 8908032)
	| +- GuardManager: source=L['score_mod2'], accessed_by=DictGetItemGuardAccessor('score_mod2')
	| | +- GuardManager: source=L['score_mod2'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | +- ID_MATCH: ___check_obj_id(L['score_mod2'].__code__, 140062269255152)  
	| +- GuardManager: source=L['flex_attention'], accessed_by=DictGetItemGuardAccessor('flex_attention')
	| | +- GuardManager: source=L['flex_attention'].__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__code__, 139318784)    
	| | +- GuardManager: source=L['flex_attention'], accessed_by=FuncDefaultsGuardAccessor
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[2], accessed_by=GetItemGuardAccessor(2)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[2], 8822752)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[3], accessed_by=GetItemGuardAccessor(3)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[3], 8907584)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[4], accessed_by=GetItemGuardAccessor(4)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[4], 8907584)
	| | | +- GuardManager: source=L['flex_attention'].__defaults__[5], accessed_by=GetItemGuardAccessor(5)
	| | | | +- ID_MATCH: ___check_obj_id(L['flex_attention'].__defaults__[5], 8822752)
	| +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor
	| | +- GuardManager: source=G['_140062263790704_c5'], accessed_by=DictGetItemGuardAccessor('_140062263790704_c5')
	| | | +- GuardManager: source=G['_140062263790704_c5'].Tensor, accessed_by=GetAttrGuardAccessor(Tensor)
	| | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c5'].Tensor, 82181376)  
	| | | | +- GuardManager: source=G['_140062263790704_c5'].Tensor.__bases__, accessed_by=GetAttrGuardAccessor(__bases__)
	| | | | | +- GuardManager: source=G['_140062263790704_c5'].Tensor.__bases__[0], accessed_by=TupleGetItemGuardAccessor(0)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['_140062263790704_c5'].Tensor.__bases__[0], 140062119703136)
	| | +- GuardManager: source=G['__builtins_dict___10'], accessed_by=DictGetItemGuardAccessor('__builtins_dict___10')
	| | | +- GuardManager: source=G['__builtins_dict___10']['len'], accessed_by=DictGetItemGuardAccessor('len')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['len'], 140062269592480)
	| | | +- GuardManager: source=G['__builtins_dict___10']['sum'], accessed_by=DictGetItemGuardAccessor('sum')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['sum'], 140062269593600)
	| | | +- GuardManager: source=G['__builtins_dict___10']['list'], accessed_by=DictGetItemGuardAccessor('list')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['list'], 8841312) 
	| | | +- GuardManager: source=G['__builtins_dict___10']['type'], accessed_by=DictGetItemGuardAccessor('type')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['type'], 8810240) 
	| | | +- GuardManager: source=G['__builtins_dict___10']['tuple'], accessed_by=DictGetItemGuardAccessor('tuple')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['tuple'], 8812224)
	| | | +- GuardManager: source=G['__builtins_dict___10']['object'], accessed_by=DictGetItemGuardAccessor('object')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['object'], 8810976)
	| | | +- GuardManager: source=G['__builtins_dict___10']['isinstance'], accessed_by=DictGetItemGuardAccessor('isinstance')
	| | | | +- ID_MATCH: ___check_obj_id(G['__builtins_dict___10']['isinstance'], 140062269592160)
	| | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_utils_dot__pytree')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'], 140057600329744)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].TreeSpec, accessed_by=GetAttrGuardAccessor(TreeSpec)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].TreeSpec, 86791872)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf, accessed_by=GetAttrGuardAccessor(_is_leaf)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_leaf.__code__, 140057601749808)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, accessed_by=GetAttrGuardAccessor(_LEAF_SPEC)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC, 87025712)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, accessed_by=GetAttrGuardAccessor(type)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.type, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, accessed_by=GetAttrGuardAccessor(context)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.context, 8822752)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes, accessed_by=GetAttrGuardAccessor(num_nodes)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_nodes == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves, accessed_by=GetAttrGuardAccessor(num_leaves)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_leaves == 1
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children, accessed_by=GetAttrGuardAccessor(num_children)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.num_children == 0
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, accessed_by=GetAttrGuardAccessor(children_specs)
	| | | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs, 8841312)
	| | | | | +- LENGTH_CHECK: not G['__import_torch_dot_utils_dot__pytree']._LEAF_SPEC.children_specs
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type, accessed_by=GetAttrGuardAccessor(_get_node_type)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._get_node_type.__code__, 140057601749536)
	| | | +- DictGuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES, accessed_by=GetAttrGuardAccessor(SUPPORTED_NODES)
	| | | | +- DICT_VERSION: ___dict_version(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES) == 366913
	| | | | +- KeyValueManager pair at index=1
	| | | | | +- ValueManager: GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]]
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn, accessed_by=GetAttrGuardAccessor(flatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].flatten_fn.__code__, 140057599989696)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn, accessed_by=GetAttrGuardAccessor(unflatten_fn)
	| | | | | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES[list(G['__import_torch_dot_utils_dot__pytree'].SUPPORTED_NODES.keys())[1]].unflatten_fn.__code__, 140057600267792)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper, accessed_by=GetAttrGuardAccessor(_tree_flatten_helper)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._tree_flatten_helper.__code__, 140057600001392)
	| | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance, accessed_by=GetAttrGuardAccessor(_is_namedtuple_instance)
	| | | | +- GuardManager: source=G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_utils_dot__pytree']._is_namedtuple_instance.__code__, 140057600000944)
	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_comptime'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_comptime')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_comptime'], 140057359527872)
	| | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot__dynamo_dot_decorators')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'], 140057359526192)
	| | | +- GuardManager: source=G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, accessed_by=GetAttrGuardAccessor(is_compiling)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot__dynamo_dot_decorators'].is_compiling, 140057422814624)
	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot__utils')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot__utils'], 140048551571072)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, accessed_by=GetAttrGuardAccessor(_SUPPORTED_HEAD_DIMS)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS, 8841312)
	| | | | +- LENGTH_CHECK: len(G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS) == 10
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[0], accessed_by=ListGetItemGuardAccessor(0)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[0] == 2
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[1], accessed_by=ListGetItemGuardAccessor(1)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[1] == 4
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[2], accessed_by=ListGetItemGuardAccessor(2)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[2] == 8
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[3], accessed_by=ListGetItemGuardAccessor(3)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[3] == 16
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[4], accessed_by=ListGetItemGuardAccessor(4)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[4] == 32
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5], accessed_by=ListGetItemGuardAccessor(5)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot__utils']._SUPPORTED_HEAD_DIMS[5] == 64
	| | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], accessed_by=DictGetItemGuardAccessor('__import_torch_dot_nn_dot_attention_dot_flex_attention')
	| | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'], 140048551568912)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, accessed_by=GetAttrGuardAccessor(math)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math, 140062267404384)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, accessed_by=GetAttrGuardAccessor(sqrt)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].math.sqrt, 140062267408144)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, accessed_by=GetAttrGuardAccessor(torch)
	| | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch, 140062263790704)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, accessed_by=GetAttrGuardAccessor(_dynamo)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo, 140062262057760)
	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static, accessed_by=GetAttrGuardAccessor(mark_static)
	| | | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch._dynamo.mark_static.__code__, 125680000)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, accessed_by=GetAttrGuardAccessor(compiler)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler, 140057571877776)
	| | | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, accessed_by=GetAttrGuardAccessor(is_dynamo_compiling)
	| | | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.compiler.is_dynamo_compiling, 140057572232544)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, accessed_by=GetAttrGuardAccessor(is_grad_enabled)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].torch.is_grad_enabled, 140062253115936)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device, accessed_by=GetAttrGuardAccessor(_validate_device)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_device.__code__, 140048552018544)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, accessed_by=GetAttrGuardAccessor(flex_attention_hop)
	| | | | +- TYPE_MATCH: ___check_type_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop, 96230624)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__, accessed_by=GetAttrGuardAccessor(__name__)
	| | | | | +- EQUALS_MATCH: G['__import_torch_dot_nn_dot_attention_dot_flex_attention'].flex_attention_hop.__name__ == 'flex_attention'
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim, accessed_by=GetAttrGuardAccessor(_supported_head_dim)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._supported_head_dim.__code__, 140048552864992)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim, accessed_by=GetAttrGuardAccessor(_validate_embed_dim)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_embed_dim.__code__, 139781872)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness, accessed_by=GetAttrGuardAccessor(_validate_nestedness)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_nestedness.__code__, 140048553100560)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input, accessed_by=GetAttrGuardAccessor(_validate_sdpa_input)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._validate_sdpa_input.__code__, 139823744)
	| | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options, accessed_by=GetAttrGuardAccessor(_apply_kernel_options)
	| | | | +- GuardManager: source=G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, accessed_by=GetAttrGuardAccessor(__code__)
	| | | | | +- ID_MATCH: ___check_obj_id(G['__import_torch_dot_nn_dot_attention_dot_flex_attention']._apply_kernel_options.__code__, 140048554555888)
	
V1206 15:24:54.154000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "f5553f623cc33ff3da54884bffba4841"}
	{
	"name": "entire_frame_compile",
	"ts": 1733527494154160.5,
	"args": {
	"fn_name": "_compile.compile_inner",
	"compile_id": "1/0"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}
V1206 15:24:54.156000 1667746 torch/_dynamo/utils.py:1045] {"compilation_metrics": {"pre_grad_pass_time_us": 526, "joint_graph_pass_time_us": 1028, "feature_usage": {"pytorch/remote_cache:fx_graph_memcache_version": true, "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2": true}, "post_grad_pass_time_us": 1058, "inductor_code_gen_cumulative_compile_time_us": 3237121, "inductor_cumulative_compile_time_us": 3280008, "aot_autograd_cumulative_compile_time_us": 3332164, "dynamo_cumulative_compile_time_us": 3438591, "frame_key": "2", "co_name": "fn2", "co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py", "co_firstlineno": 465, "cache_size": 0, "accumulated_cache_size": 0, "guard_count": 91, "shape_env_guard_count": 0, "graph_op_count": 2, "graph_node_count": 16, "graph_input_count": 11, "fail_type": null, "fail_reason": null, "fail_user_frame_filename": null, "fail_user_frame_lineno": null, "non_compliant_ops": [], "compliant_custom_ops": [], "restart_reasons": [], "dynamo_time_before_restart_s": 0.0, "has_guarded_code": true, "config_suppress_errors": false, "config_inline_inbuilt_nn_modules": true, "specialize_float": false, "dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}", "is_forward": true, "dynamo_compile_time_before_restart_us": 0}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0}
V1206 15:24:54.156000 1667746 torch/_dynamo/utils.py:1288] {"chromium_event": {}, "compiled_autograd_id": null, "frame_id": 1, "frame_compile_id": 0, "attempt": 0, "has_payload": "0fdda1350847e58c2900e8c2a60a9a2b"}
	{
	"name": "dynamo",
	"ts": 1733527494156938.5,
	"args": {
	"compile_id": "1/0",
	"frame_key": "2",
	"co_name": "fn2",
	"co_filename": "/data/users/xmfan/a/pytorch/test/inductor/test_codecache.py",
	"co_firstlineno": 465,
	"cache_size": 0,
	"accumulated_cache_size": 0,
	"guard_count": 91,
	"shape_env_guard_count": 0,
	"graph_op_count": 2,
	"graph_node_count": 16,
	"graph_input_count": 11,
	"fail_type": null,
	"fail_reason": null,
	"fail_user_frame_filename": null,
	"fail_user_frame_lineno": null,
	"non_compliant_ops": [],
	"compliant_custom_ops": [],
	"restart_reasons": [],
	"dynamo_time_before_restart_s": 0.0,
	"has_guarded_code": true,
	"dynamo_config": "{\"verify_correctness\": false, \"minimum_call_count\": 1, \"dead_code_elimination\": true, \"cache_size_limit\": 8, \"accumulated_cache_size_limit\": 256, \"skip_code_recursive_on_cache_limit_hit\": true, \"fail_on_cache_limit_hit\": false, \"specialize_int\": false, \"specialize_float\": false, \"dynamic_shapes\": true, \"use_lazy_graph_module\": true, \"assume_static_by_default\": true, \"automatic_dynamic_shapes\": true, \"force_parameter_static_shapes\": true, \"force_nn_module_property_static_shapes\": true, \"allow_ignore_mark_dynamic\": false, \"guard_nn_modules\": true, \"guard_nn_modules_using_dict_tags\": true, \"suppress_errors\": false, \"replay_record_enabled\": false, \"rewrite_assert_with_torch_assert\": true, \"disable\": false, \"cprofile\": false, \"skipfiles_inline_module_allowlist\": {}, \"allowed_functions_module_string_ignorelist\": [\"torch._decomp\", \"torch.testing\", \"torch._prims\", \"torch._refs\", \"torch.distributions\"], \"capture_scalar_outputs\": false, \"capture_dynamic_output_shape_ops\": false, \"prefer_deferred_runtime_asserts_over_guards\": false, \"allow_complex_guards_as_runtime_asserts\": false, \"force_unspec_int_unbacked_size_like_on_torchrec_kjt\": false, \"optimize_ddp\": true, \"do_not_emit_runtime_asserts\": false, \"_ddp_optimization_mode\": [\"ddp_optimizer\", \"python_reducer\", \"python_reducer_without_compiled_forward\", \"no_optimization\"], \"skip_torchrec\": true, \"optimize_ddp_lazy_compile\": false, \"skip_fsdp_guards\": true, \"skip_fsdp_hooks\": true, \"skip_nnmodule_hook_guards\": true, \"skip_no_tensor_aliasing_guards_on_parameters\": true, \"raise_on_ctx_manager_usage\": true, \"raise_on_unsafe_aot_autograd\": false, \"error_on_nested_jit_trace\": true, \"error_on_nested_fx_trace\": true, \"allow_rnn\": false, \"capture_sparse_compute\": true, \"error_on_recompile\": false, \"report_guard_failures\": true, \"trace_numpy\": true, \"numpy_default_float\": \"float64\", \"numpy_default_complex\": \"complex128\", \"numpy_default_int\": \"int64\", \"use_numpy_random_stream\": false, \"enable_cpp_guard_manager\": true, \"inline_inbuilt_nn_modules\": true, \"record_compile_time_instruction_count\": false, \"cudagraph_backend_keep_input_mutation\": false, \"cudagraph_backend_support_input_mutation\": false, \"only_allow_pt2_compliant_ops\": false, \"capture_autograd_function\": true, \"capture_func_transforms\": true, \"fake_tensor_cache_enabled\": true, \"fake_tensor_cache_crosscheck_enabled\": true, \"compiled_autograd\": false, \"compiled_autograd_kwargs_override\": {}, \"enable_compiler_collectives\": false, \"automatic_dynamic_local_pgo\": false, \"automatic_dynamic_remote_pgo\": null}"
	},
	"ph": "E",
	"cat": "dynamo_timed",
	"tid": 0,
	"pid": 0
	}