torch-sys 0.24.0

// THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT BY HAND!
#include "torch_api.h"

extern "C" {
void atg___and__(tensor *, tensor self, scalar other);
void atg___and__tensor_(tensor *, tensor self, tensor other);
void atg___iand__(tensor *, tensor self, scalar other);
void atg___iand__tensor_(tensor *, tensor self, tensor other);
void atg___ilshift__(tensor *, tensor self, scalar other);
void atg___ilshift__tensor_(tensor *, tensor self, tensor other);
void atg___ior__(tensor *, tensor self, scalar other);
void atg___ior__tensor_(tensor *, tensor self, tensor other);
void atg___irshift__(tensor *, tensor self, scalar other);
void atg___irshift__tensor_(tensor *, tensor self, tensor other);
void atg___ixor__(tensor *, tensor self, scalar other);
void atg___ixor__tensor_(tensor *, tensor self, tensor other);
void atg___lshift__(tensor *, tensor self, scalar other);
void atg___lshift__scalar_out_(tensor *, tensor out, tensor self, scalar other);
void atg___lshift__tensor_(tensor *, tensor self, tensor other);
void atg___lshift__tensor_out_(tensor *, tensor out, tensor self, tensor other);
void atg___or__(tensor *, tensor self, scalar other);
void atg___or__tensor_(tensor *, tensor self, tensor other);
void atg___rshift__(tensor *, tensor self, scalar other);
void atg___rshift__scalar_out_(tensor *, tensor out, tensor self, scalar other);
void atg___rshift__tensor_(tensor *, tensor self, tensor other);
void atg___rshift__tensor_out_(tensor *, tensor out, tensor self, tensor other);
void atg___xor__(tensor *, tensor self, scalar other);
void atg___xor__tensor_(tensor *, tensor self, tensor other);
void atg__adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool2d_backward(tensor *, tensor grad_output, tensor self);
void atg__adaptive_avg_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg__adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg__adaptive_avg_pool3d_backward(tensor *, tensor grad_output, tensor self);
void atg__adaptive_avg_pool3d_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg__adaptive_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg__add_batch_dim(tensor *, tensor self, int64_t batch_dim, int64_t level);
void atg__add_relu(tensor *, tensor self, tensor other);
void atg__add_relu_(tensor *, tensor self, tensor other);
void atg__add_relu_out(tensor *, tensor out, tensor self, tensor other);
void atg__add_relu_scalar(tensor *, tensor self, scalar other);
void atg__add_relu_scalar_(tensor *, tensor self, scalar other);
void atg__add_relu_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg__addmm_activation(tensor *, tensor self, tensor mat1, tensor mat2, int use_gelu);
void atg__addmm_activation_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2, int use_gelu);
void atg__aminmax(tensor *, tensor self);
void atg__aminmax_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg__aminmax_dim_out(tensor *, tensor out0, tensor out1, tensor self, int64_t dim, int keepdim);
void atg__aminmax_out(tensor *, tensor out0, tensor out1, tensor self);
void atg__amp_update_scale(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__amp_update_scale_(tensor *, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__amp_update_scale_out(tensor *, tensor out, tensor self, tensor growth_tracker, tensor found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval);
void atg__assert_scalar(scalar self_scalar, char* assert_msg_ptr, int assert_msg_len);
void atg__assert_tensor_metadata(tensor a, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int dtype, int device, int8_t layout);
void atg__autocast_to_full_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled);
void atg__autocast_to_reduced_precision(tensor *, tensor self, int cuda_enabled, int cpu_enabled, int cuda_dtype, int cpu_dtype);
void atg__batch_norm_no_update(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_no_update_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_with_update(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_with_update_functional(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__batch_norm_with_update_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor reserve, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__cast_byte(tensor *, tensor self, int non_blocking);
void atg__cast_char(tensor *, tensor self, int non_blocking);
void atg__cast_double(tensor *, tensor self, int non_blocking);
void atg__cast_float(tensor *, tensor self, int non_blocking);
void atg__cast_half(tensor *, tensor self, int non_blocking);
void atg__cast_int(tensor *, tensor self, int non_blocking);
void atg__cast_long(tensor *, tensor self, int non_blocking);
void atg__cast_short(tensor *, tensor self, int non_blocking);
void atg__cdist_backward(tensor *, tensor grad, tensor x1, tensor x2, double p, tensor cdist);
void atg__cdist_backward_out(tensor *, tensor out, tensor grad, tensor x1, tensor x2, double p, tensor cdist);
void atg__cholesky_solve_helper(tensor *, tensor self, tensor A, int upper);
void atg__cholesky_solve_helper_out(tensor *, tensor out, tensor self, tensor A, int upper);
void atg__chunk_cat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks);
void atg__chunk_cat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim, int64_t num_chunks);
void atg__coalesce(tensor *, tensor self);
void atg__coalesce_out(tensor *, tensor out, tensor self);
void atg__coalesced(tensor *, tensor self, int coalesced);
void atg__coalesced_(tensor *, tensor self, int coalesced);
void atg__coalesced_out(tensor *, tensor out, tensor self, int coalesced);
void atg__compute_linear_combination(tensor *, tensor input, tensor coefficients);
void atg__compute_linear_combination_out(tensor *, tensor out, tensor input, tensor coefficients);
void atg__conj(tensor *, tensor self);
void atg__conj_copy(tensor *, tensor self);
void atg__conj_copy_out(tensor *, tensor out, tensor self);
void atg__conj_physical(tensor *, tensor self);
void atg__conj_physical_out(tensor *, tensor out, tensor self);
void atg__conv_depthwise2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__conv_depthwise2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg__convert_indices_from_coo_to_csr(tensor *, tensor self, int64_t size, int out_int32);
void atg__convert_indices_from_coo_to_csr_out(tensor *, tensor out, tensor self, int64_t size, int out_int32);
void atg__convert_indices_from_csr_to_coo(tensor *, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
void atg__convert_indices_from_csr_to_coo_out(tensor *, tensor out, tensor crow_indices, tensor col_indices, int out_int32, int transpose);
void atg__convert_weight_to_int4pack(tensor *, tensor self, int64_t innerKTiles);
void atg__convert_weight_to_int4pack_for_cpu(tensor *, tensor self, int64_t innerKTiles);
void atg__convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
void atg__convolution_deprecated(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled);
void atg__convolution_mode(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__convolution_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups, int benchmark, int deterministic, int cudnn_enabled, int allow_tf32);
void atg__copy_from(tensor *, tensor self, tensor dst, int non_blocking);
void atg__copy_from_and_resize(tensor *, tensor self, tensor dst);
void atg__copy_from_and_resize_out(tensor *, tensor out, tensor self, tensor dst);
void atg__copy_from_out(tensor *, tensor out, tensor self, tensor dst, int non_blocking);
void atg__cslt_compress(tensor *, tensor input);
void atg__cslt_sparse_mm(tensor *, tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result, int64_t alg_id, int64_t split_k, int64_t split_k_mode);
int64_t atg__cslt_sparse_mm_search(tensor compressed_A, tensor dense_B, tensor bias, tensor alpha, int out_dtype, int transpose_result);
void atg__ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
void atg__ctc_loss_backward(tensor *, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
void atg__ctc_loss_backward_out(tensor *, tensor out, tensor grad, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
void atg__ctc_loss_backward_tensor(tensor *, tensor grad, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, tensor neg_log_likelihood, tensor log_alpha, int64_t blank, int zero_infinity);
void atg__ctc_loss_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int zero_infinity);
void atg__ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int zero_infinity);
void atg__ctc_loss_tensor_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int zero_infinity);
void atg__cudnn_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor philox_seed, tensor philox_offset, tensor attn_bias, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
void atg__cudnn_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg__cudnn_ctc_loss_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg__cudnn_ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int deterministic, int zero_infinity);
void atg__cudnn_init_dropout_state(tensor *, double dropout, int train, int64_t dropout_seed, int options_kind, int options_device);
void atg__cudnn_init_dropout_state_out(tensor *, tensor out, double dropout, int train, int64_t dropout_seed);
void atg__cudnn_rnn(tensor *, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor weight_buf, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg__cudnn_rnn_flatten_weight(tensor *, tensor *weight_arr_data, int weight_arr_len, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, int bidirectional);
void atg__cudnn_rnn_flatten_weight_out(tensor *, tensor out, tensor *weight_arr_data, int weight_arr_len, int64_t weight_stride0, int64_t input_size, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, int bidirectional);
void atg__cudnn_rnn_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor weight_buf, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t proj_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
int64_t atg__debug_has_internal_overlap(tensor self);
void atg__dim_arange(tensor *, tensor like, int64_t dim);
int64_t atg__dimi(tensor self);
int64_t atg__dimv(tensor self);
void atg__dirichlet_grad(tensor *, tensor x, tensor alpha, tensor total);
void atg__dirichlet_grad_out(tensor *, tensor out, tensor x, tensor alpha, tensor total);
void atg__dyn_quant_matmul_4bit(tensor *, tensor inp, tensor packed_weights, int64_t block_size, int64_t in_features, int64_t out_features);
void atg__dyn_quant_pack_4bit_weight(tensor *, tensor weights, tensor scales_zeros, tensor bias, int64_t block_size, int64_t in_features, int64_t out_features);
void atg__efficient_attention_backward(tensor *, tensor grad_out_, tensor query, tensor key, tensor value, tensor bias, tensor out, tensor cu_seqlens_q, tensor cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, tensor logsumexp, double dropout_p, tensor philox_seed, tensor philox_offset, int64_t custom_mask_type, int bias_requires_grad, double scale_v, uint8_t scale_null, int64_t num_splits_key_v, uint8_t num_splits_key_null, int64_t window_size_v, uint8_t window_size_null, int shared_storage_dqdkdv);
void atg__efficientzerotensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__efficientzerotensor_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg__embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_dense_backward(tensor *, tensor grad, tensor indices, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_dense_backward_out(tensor *, tensor out, tensor grad, tensor indices, tensor offset2bag, tensor bag_size, tensor maximum_indices, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
void atg__embedding_bag_forward_only(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_forward_only_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx);
void atg__embedding_bag_per_sample_weights_backward(tensor *, tensor grad, tensor weight, tensor indices, tensor offsets, tensor offset2bag, int64_t mode, int64_t padding_idx);
void atg__embedding_bag_per_sample_weights_backward_out(tensor *, tensor out, tensor grad, tensor weight, tensor indices, tensor offsets, tensor offset2bag, int64_t mode, int64_t padding_idx);
void atg__embedding_bag_sparse_backward(tensor *, tensor grad, tensor indices, tensor offsets, tensor offset2bag, tensor bag_size, int64_t num_weights, int scale_grad_by_freq, int64_t mode, tensor per_sample_weights, int64_t padding_idx);
void atg__empty_affine_quantized(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device, double scale, int64_t zero_point);
void atg__empty_affine_quantized_out(tensor *, tensor out, int64_t *size_data, int size_len, double scale, int64_t zero_point);
void atg__empty_per_channel_affine_quantized(tensor *, int64_t *size_data, int size_len, tensor scales, tensor zero_points, int64_t axis, int options_kind, int options_device);
void atg__empty_per_channel_affine_quantized_out(tensor *, tensor out, int64_t *size_data, int size_len, tensor scales, tensor zero_points, int64_t axis);
void atg__euclidean_dist(tensor *, tensor x1, tensor x2);
void atg__euclidean_dist_out(tensor *, tensor out, tensor x1, tensor x2);
void atg__fake_quantize_learnable_per_channel_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_channel_affine_backward(tensor *, tensor grad, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_channel_affine_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_tensor_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_tensor_affine_backward(tensor *, tensor grad, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_learnable_per_tensor_affine_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max, double grad_factor);
void atg__fake_quantize_per_tensor_affine_cachemask_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, tensor fake_quant_enabled, int64_t quant_min, int64_t quant_max);
void atg__fake_quantize_per_tensor_affine_cachemask_tensor_qparams_out(tensor *, tensor out0, tensor out1, tensor self, tensor scale, tensor zero_point, tensor fake_quant_enabled, int64_t quant_min, int64_t quant_max);
void atg__fft_c2c(tensor *, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int forward);
void atg__fft_c2c_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int forward);
void atg__fft_c2r(tensor *, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int64_t last_dim_size);
void atg__fft_c2r_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int64_t last_dim_size);
void atg__fft_r2c(tensor *, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int onesided);
void atg__fft_r2c_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int64_t normalization, int onesided);
void atg__fill_mem_eff_dropout_mask_(tensor *, tensor self, double dropout_p, int64_t seed, int64_t offset);
void atg__flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor rng_state, tensor unused, double scale_v, uint8_t scale_null, int64_t window_size_left_v, uint8_t window_size_left_null, int64_t window_size_right_v, uint8_t window_size_right_null);
void atg__foobar(tensor *, tensor self, int arg1, int arg2, int arg3);
void atg__foobar_out(tensor *, tensor out, tensor self, int arg1, int arg2, int arg3);
void atg__functional_assert_async(tensor *, tensor self, char* assert_msg_ptr, int assert_msg_len, tensor dep_token);
void atg__functional_assert_scalar(tensor *, scalar self_scalar, char* assert_msg_ptr, int assert_msg_len, tensor dep_token);
void atg__functional_sym_constrain_range(tensor *, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token);
void atg__functional_sym_constrain_range_for_size(tensor *, scalar size, int64_t min_v, uint8_t min_null, int64_t max_v, uint8_t max_null, tensor dep_token);
void atg__fused_dropout(tensor *, tensor self, double p);
void atg__fused_dropout_out(tensor *, tensor out0, tensor out1, tensor self, double p);
void atg__fused_moving_avg_obs_fq_helper(tensor *, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg__fused_moving_avg_obs_fq_helper_functional(tensor *, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg__fused_moving_avg_obs_fq_helper_out(tensor *, tensor out0, tensor out1, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg__fused_rms_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, double eps_v, uint8_t eps_null);
int64_t atg__fused_sdp_choice(tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, double scale_v, uint8_t scale_null, int enable_gqa);
void atg__fw_primal(tensor *, tensor self, int64_t level);
void atg__fw_primal_copy(tensor *, tensor self, int64_t level);
void atg__fw_primal_copy_out(tensor *, tensor out, tensor self, int64_t level);
void atg__gather_sparse_backward(tensor *, tensor self, int64_t dim, tensor index, tensor grad);
void atg__grid_sampler_2d_cpu_fallback(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grid_sampler_2d_cpu_fallback_backward(tensor *, tensor grad_output, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grid_sampler_2d_cpu_fallback_out(tensor *, tensor out, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg__grouped_mm(tensor *, tensor self, tensor mat2, tensor offs, tensor bias, int out_dtype);
int atg__has_compatible_shallow_copy_type(tensor self, tensor from);
int atg__has_same_storage_numel(tensor self, tensor other);
tensor *atg__histogramdd_bin_edges(tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_bin_edges_out(tensor *out_data, int out_len, tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_from_bin_cts(tensor *, tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_from_bin_cts_out(tensor *, tensor out, tensor self, int64_t *bins_data, int bins_len, double *range_data, int range_len, tensor weight, int density);
void atg__histogramdd_from_bin_tensors(tensor *, tensor self, tensor *bins_data, int bins_len, tensor weight, int density);
void atg__histogramdd_from_bin_tensors_out(tensor *, tensor out, tensor self, tensor *bins_data, int bins_len, tensor weight, int density);
void atg__index_put_impl(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__index_put_impl_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__index_put_impl_out(tensor *, tensor out, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate, int unsafe);
void atg__indices(tensor *, tensor self);
void atg__indices_copy(tensor *, tensor self);
void atg__indices_copy_out(tensor *, tensor out, tensor self);
void atg__int_mm(tensor *, tensor self, tensor mat2);
void atg__int_mm_out(tensor *, tensor out, tensor self, tensor mat2);
void atg__is_all_true(tensor *, tensor self);
void atg__is_any_true(tensor *, tensor self);
int atg__is_zerotensor(tensor self);
void atg__lazy_clone(tensor *, tensor self);
void atg__linalg_check_errors(tensor info, char* api_name_ptr, int api_name_len, int is_matrix);
void atg__linalg_det(tensor *, tensor A);
void atg__linalg_det_result(tensor *, tensor result, tensor LU, tensor pivots, tensor A);
void atg__linalg_eigh(tensor *, tensor A, char* UPLO_ptr, int UPLO_len, int compute_v);
void atg__linalg_eigh_eigenvalues(tensor *, tensor eigenvalues, tensor eigenvectors, tensor A, char* UPLO_ptr, int UPLO_len, int compute_v);
void atg__linalg_eigvals(tensor *, tensor self);
void atg__linalg_slogdet(tensor *, tensor A);
void atg__linalg_slogdet_sign(tensor *, tensor sign, tensor logabsdet, tensor LU, tensor pivots, tensor A);
void atg__linalg_solve_ex(tensor *, tensor A, tensor B, int left, int check_errors);
void atg__linalg_solve_ex_result(tensor *, tensor result, tensor LU, tensor pivots, tensor info, tensor A, tensor B, int left, int check_errors);
void atg__linalg_svd(tensor *, tensor A, int full_matrices, int compute_uv, char* driver_ptr, int driver_len);
void atg__linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor A, int full_matrices, int compute_uv, char* driver_ptr, int driver_len);
void atg__log_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__log_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__log_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__logcumsumexp(tensor *, tensor self, int64_t dim);
void atg__logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
void atg__lstm_mps(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg__lstm_mps_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor out5, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg__lu_with_info(tensor *, tensor self, int pivot, int check_errors);
void atg__make_dep_token(tensor *, int options_kind, int options_device);
void atg__make_dual(tensor *, tensor primal, tensor tangent, int64_t level);
void atg__make_dual_copy(tensor *, tensor primal, tensor tangent, int64_t level);
void atg__make_dual_copy_out(tensor *, tensor out, tensor primal, tensor tangent, int64_t level);
void atg__make_per_channel_quantized_tensor(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis);
void atg__make_per_channel_quantized_tensor_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int64_t axis);
void atg__make_per_tensor_quantized_tensor(tensor *, tensor self, double scale, int64_t zero_point);
void atg__make_per_tensor_quantized_tensor_out(tensor *, tensor out, tensor self, double scale, int64_t zero_point);
void atg__masked_scale(tensor *, tensor self, tensor mask, double scale);
void atg__masked_scale_out(tensor *, tensor out, tensor self, tensor mask, double scale);
void atg__masked_softmax(tensor *, tensor self, tensor mask, int64_t dim_v, uint8_t dim_null, int64_t mask_type_v, uint8_t mask_type_null);
void atg__masked_softmax_backward(tensor *, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null);
void atg__masked_softmax_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor mask, int64_t dim_v, uint8_t dim_null);
void atg__masked_softmax_out(tensor *, tensor out, tensor self, tensor mask, int64_t dim_v, uint8_t dim_null, int64_t mask_type_v, uint8_t mask_type_null);
void atg__mixed_dtypes_linear(tensor *, tensor input, tensor weight, tensor scale, tensor bias, char* activation_ptr, int activation_len);
void atg__mkldnn_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg__mkldnn_reshape_out(tensor *, tensor out, tensor self, int64_t *shape_data, int shape_len);
void atg__mkldnn_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__mkldnn_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg__mkldnn_transpose_out(tensor *, tensor out, tensor self, int64_t dim0, int64_t dim1);
void atg__mps_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__mps_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__mps_convolution_transpose(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__mps_convolution_transpose_out(tensor *, tensor out, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg__native_batch_norm_legit(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg__native_batch_norm_legit_functional(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg__native_batch_norm_legit_no_stats(tensor *, tensor input, tensor weight, tensor bias, int training, double momentum, double eps);
void atg__native_batch_norm_legit_no_stats_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, int training, double momentum, double eps);
void atg__native_batch_norm_legit_no_training(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__native_batch_norm_legit_no_training_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, double momentum, double eps);
void atg__native_batch_norm_legit_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg__native_multi_head_attention(tensor *, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask, int need_weights, int average_attn_weights, int64_t mask_type_v, uint8_t mask_type_null);
void atg__native_multi_head_attention_out(tensor *, tensor out0, tensor out1, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask, int need_weights, int average_attn_weights, int64_t mask_type_v, uint8_t mask_type_null);
void atg__neg_view(tensor *, tensor self);
void atg__neg_view_copy(tensor *, tensor self);
void atg__neg_view_copy_out(tensor *, tensor out, tensor self);
void atg__nested_compute_contiguous_strides_offsets(tensor *, tensor nested_size);
void atg__nested_from_padded(tensor *, tensor padded, tensor cpu_nested_shape_example, int fuse_transform_0213);
void atg__nested_from_padded_and_nested_example(tensor *, tensor padded, tensor nt_example);
void atg__nested_from_padded_and_nested_example_out(tensor *, tensor out, tensor padded, tensor nt_example);
void atg__nested_from_padded_out(tensor *, tensor out, tensor padded, tensor cpu_nested_shape_example, int fuse_transform_0213);
void atg__nested_from_padded_tensor(tensor *, tensor padded, tensor offsets, tensor dummy, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen, int64_t sum_S_v, uint8_t sum_S_null);
void atg__nested_get_jagged_dummy(tensor *, tensor any);
void atg__nested_get_lengths(tensor *, tensor self);
void atg__nested_get_max_seqlen(tensor *, tensor self);
void atg__nested_get_min_seqlen(tensor *, tensor self);
void atg__nested_get_offsets(tensor *, tensor self);
int64_t atg__nested_get_ragged_idx(tensor self);
void atg__nested_get_values(tensor *, tensor self);
void atg__nested_get_values_copy(tensor *, tensor self);
void atg__nested_get_values_copy_out(tensor *, tensor out, tensor self);
void atg__nested_select_backward(tensor *, tensor grad_output, tensor self, int64_t dim, int64_t index);
void atg__nested_sum_backward(tensor *, tensor grad, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg__nested_view_from_buffer(tensor *, tensor self, tensor nested_size, tensor nested_strides, tensor offsets);
void atg__nested_view_from_buffer_copy(tensor *, tensor self, tensor nested_size, tensor nested_strides, tensor offsets);
void atg__nested_view_from_buffer_copy_out(tensor *, tensor out, tensor self, tensor nested_size, tensor nested_strides, tensor offsets);
void atg__nested_view_from_jagged(tensor *, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen);
void atg__nested_view_from_jagged_copy(tensor *, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen);
void atg__nested_view_from_jagged_copy_out(tensor *, tensor out, tensor self, tensor offsets, tensor dummy, tensor lengths, int64_t ragged_idx, tensor min_seqlen, tensor max_seqlen);
void atg__new_zeros_with_same_feature_meta(tensor *, tensor self, tensor other, int64_t self_num_batch_dims);
void atg__new_zeros_with_same_feature_meta_out(tensor *, tensor out, tensor self, tensor other, int64_t self_num_batch_dims);
int atg__nnpack_available();
void atg__nnpack_spatial_convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg__nnpack_spatial_convolution_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
int64_t atg__nnz(tensor self);
void atg__pack_padded_sequence(tensor *, tensor input, tensor lengths, int batch_first);
void atg__pack_padded_sequence_backward(tensor *, tensor grad, int64_t *input_size_data, int input_size_len, tensor batch_sizes, int batch_first);
void atg__pack_padded_sequence_out(tensor *, tensor out0, tensor out1, tensor input, tensor lengths, int batch_first);
void atg__pad_circular(tensor *, tensor self, int64_t *pad_data, int pad_len);
void atg__pad_enum(tensor *, tensor self, int64_t *pad_data, int pad_len, int64_t mode, double value_v, uint8_t value_null);
void atg__pad_packed_sequence(tensor *, tensor data, tensor batch_sizes, int batch_first, scalar padding_value, int64_t total_length);
void atg__pdist_backward(tensor *, tensor grad, tensor self, double p, tensor pdist);
void atg__pdist_backward_out(tensor *, tensor out, tensor grad, tensor self, double p, tensor pdist);
void atg__pin_memory(tensor *, tensor self, int device);
void atg__pin_memory_out(tensor *, tensor out, tensor self, int device);
void atg__prelu_kernel(tensor *, tensor self, tensor weight);
void atg__prelu_kernel_backward(tensor *, tensor grad_output, tensor self, tensor weight);
void atg__print(char* s_ptr, int s_len);
void atg__propagate_xla_data(tensor input, tensor output);
void atg__remove_batch_dim(tensor *, tensor self, int64_t level, int64_t batch_size, int64_t out_dim);
void atg__reshape_alias(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg__reshape_alias_copy(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg__reshape_alias_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg__reshape_copy(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__reshape_from_tensor(tensor *, tensor self, tensor shape);
void atg__resize_output(tensor *, tensor self, int64_t *size_data, int size_len, int device);
void atg__resize_output_(tensor *, tensor self, int64_t *size_data, int size_len, int device);
void atg__resize_output_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int device);
void atg__rowwise_prune(tensor *, tensor weight, tensor mask, int compressed_indices_dtype);
void atg__safe_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg__sample_dirichlet(tensor *, tensor self);
void atg__sample_dirichlet_out(tensor *, tensor out, tensor self);
void atg__saturate_weight_to_fp16(tensor *, tensor weight);
void atg__scaled_dot_product_attention_math(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null, int enable_gqa);
void atg__scaled_dot_product_attention_math_for_mps(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, tensor dropout_mask, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_cudnn_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor philox_seed, tensor philox_offset, tensor attn_bias, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_efficient_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_bias, int compute_log_sumexp, double dropout_p, int is_causal, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_flash_attention_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, tensor cum_seq_q, tensor cum_seq_k, int64_t max_q, int64_t max_k, double dropout_p, int is_causal, tensor philox_seed, tensor philox_offset, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_flash_attention_for_cpu(tensor *, tensor query, tensor key, tensor value, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null);
void atg__scaled_dot_product_flash_attention_for_cpu_backward(tensor *, tensor grad_out, tensor query, tensor key, tensor value, tensor out, tensor logsumexp, double dropout_p, int is_causal, tensor attn_mask, double scale_v, uint8_t scale_null);
void atg__scaled_grouped_mm(tensor *, tensor self, tensor mat2, tensor scale_a, tensor scale_b, tensor offs, tensor bias, tensor scale_result, int out_dtype, int use_fast_accum);
void atg__scaled_grouped_mm_v2(tensor *, tensor self, tensor mat2, tensor *scale_a_data, int scale_a_len, int64_t *recipe_a_data, int recipe_a_len, int64_t *swizzle_a_data, int swizzle_a_len, tensor *scale_b_data, int scale_b_len, int64_t *recipe_b_data, int recipe_b_len, int64_t *swizzle_b_data, int swizzle_b_len, tensor offs, tensor bias, int out_dtype, int64_t *contraction_dim_data, int contraction_dim_len, int use_fast_accum);
void atg__scaled_mm(tensor *, tensor self, tensor mat2, tensor scale_a, tensor scale_b, tensor bias, tensor scale_result, int out_dtype, int use_fast_accum);
void atg__scaled_mm_out(tensor *, tensor out, tensor self, tensor mat2, tensor scale_a, tensor scale_b, tensor bias, tensor scale_result, int out_dtype, int use_fast_accum);
void atg__scaled_mm_v2(tensor *, tensor self, tensor mat2, tensor *scale_a_data, int scale_a_len, int64_t *recipe_a_data, int recipe_a_len, int64_t *swizzle_a_data, int swizzle_a_len, tensor *scale_b_data, int scale_b_len, int64_t *recipe_b_data, int recipe_b_len, int64_t *swizzle_b_data, int swizzle_b_len, tensor bias, int out_dtype, int64_t *contraction_dim_data, int contraction_dim_len, int use_fast_accum);
void atg__scaled_mm_v2_out(tensor *, tensor out, tensor self, tensor mat2, tensor *scale_a_data, int scale_a_len, int64_t *recipe_a_data, int recipe_a_len, int64_t *swizzle_a_data, int swizzle_a_len, tensor *scale_b_data, int scale_b_len, int64_t *recipe_b_data, int recipe_b_len, int64_t *swizzle_b_data, int swizzle_b_len, tensor bias, int out_dtype, int64_t *contraction_dim_data, int contraction_dim_len, int use_fast_accum);
void atg__scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
void atg__scatter_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
void atg__scatter_reduce_two_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len, int include_self);
void atg__segment_reduce_backward(tensor *, tensor grad, tensor output, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor offsets, int64_t axis, scalar initial);
void atg__segment_reduce_backward_out(tensor *, tensor out, tensor grad, tensor output, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor offsets, int64_t axis, scalar initial);
void atg__shape_as_tensor(tensor *, tensor self);
void atg__slow_conv2d_backward(tensor *, tensor grad_input, tensor grad_weight, tensor grad_bias, tensor grad_output, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg__sobol_engine_draw(tensor *, tensor quasi, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated, int dtype);
void atg__sobol_engine_ff_(tensor *, tensor self, int64_t n, tensor sobolstate, int64_t dimension, int64_t num_generated);
void atg__sobol_engine_initialize_state_(tensor *, tensor self, int64_t dimension);
void atg__sobol_engine_scramble_(tensor *, tensor self, tensor ltm, int64_t dimension);
void atg__softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__softmax_backward_data_out(tensor *, tensor grad_input, tensor grad_output, tensor output, int64_t dim, int input_dtype);
void atg__softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__sparse_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg__sparse_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg__sparse_broadcast_to(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__sparse_broadcast_to_copy(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__sparse_broadcast_to_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg__sparse_bsc_tensor_unsafe(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_bsr_tensor_unsafe(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_compressed_tensor_unsafe(tensor *, tensor compressed_indices, tensor plain_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_compressed_tensor_with_dims(tensor *, int64_t nnz, int64_t dense_dim, int64_t *size_data, int size_len, int64_t *blocksize_data, int blocksize_len, int index_dtype, int options_kind, int options_device);
void atg__sparse_coo_tensor_unsafe(tensor *, tensor indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device, int is_coalesced);
void atg__sparse_coo_tensor_with_dims(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_coo_tensor_with_dims_and_tensors(tensor *, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, tensor indices, tensor values, int options_kind, int options_device, int is_coalesced);
void atg__sparse_coo_tensor_with_dims_and_tensors_out(tensor *, tensor out, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len, tensor indices, tensor values, int is_coalesced);
void atg__sparse_coo_tensor_with_dims_out(tensor *, tensor out, int64_t sparse_dim, int64_t dense_dim, int64_t *size_data, int size_len);
void atg__sparse_csc_tensor_unsafe(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_csr_prod(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_prod_dim_dtype_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_sum(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_sum_dim_dtype_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg__sparse_csr_tensor_unsafe(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg__sparse_log_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__sparse_log_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_log_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_log_softmax_int(tensor *, tensor self, int64_t dim, int dtype);
void atg__sparse_log_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__sparse_mask_projection(tensor *, tensor self, tensor mask, int accumulate_matches);
void atg__sparse_mask_projection_out(tensor *, tensor out, tensor self, tensor mask, int accumulate_matches);
void atg__sparse_mm(tensor *, tensor sparse, tensor dense);
void atg__sparse_mm_reduce(tensor *, tensor sparse, tensor dense, char* reduce_ptr, int reduce_len);
void atg__sparse_mm_reduce_impl(tensor *, tensor self, tensor other, char* reduce_ptr, int reduce_len);
void atg__sparse_semi_structured_apply(tensor *, tensor input, tensor thread_masks);
void atg__sparse_semi_structured_apply_dense(tensor *, tensor input, tensor thread_masks);
void atg__sparse_semi_structured_linear(tensor *, tensor input, tensor weight, tensor meta, tensor bias, char* activation_ptr, int activation_len, int out_dtype);
void atg__sparse_semi_structured_mm(tensor *, tensor mat1, tensor mat1_meta, tensor mat2, int out_dtype);
void atg__sparse_semi_structured_tile(tensor *, tensor input, char* algorithm_ptr, int algorithm_len, int use_cutlass);
void atg__sparse_softmax(tensor *, tensor self, int64_t dim, int half_to_float);
void atg__sparse_softmax_backward_data(tensor *, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_softmax_backward_data_out(tensor *, tensor out, tensor grad_output, tensor output, int64_t dim, tensor self);
void atg__sparse_softmax_int(tensor *, tensor self, int64_t dim, int dtype);
void atg__sparse_softmax_out(tensor *, tensor out, tensor self, int64_t dim, int half_to_float);
void atg__sparse_sparse_matmul(tensor *, tensor self, tensor other);
void atg__sparse_sparse_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg__sparse_sum(tensor *, tensor self);
void atg__sparse_sum_backward(tensor *, tensor grad, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_backward_out(tensor *, tensor out, tensor grad, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_dim(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_dim_dtype(tensor *, tensor self, int64_t *dim_data, int dim_len, int dtype);
void atg__sparse_sum_dim_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len);
void atg__sparse_sum_dtype(tensor *, tensor self, int dtype);
void atg__spdiags(tensor *, tensor diagonals, tensor offsets, int64_t *shape_data, int shape_len, int8_t layout);
void atg__spdiags_out(tensor *, tensor out, tensor diagonals, tensor offsets, int64_t *shape_data, int shape_len, int8_t layout);
void atg__spsolve(tensor *, tensor A, tensor B, int left);
void atg__stack(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg__stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg__standard_gamma(tensor *, tensor self);
void atg__standard_gamma_grad(tensor *, tensor self, tensor output);
void atg__standard_gamma_grad_out(tensor *, tensor out, tensor self, tensor output);
void atg__standard_gamma_out(tensor *, tensor out, tensor self);
void atg__test_ambiguous_defaults(tensor *, tensor dummy, int64_t a, int64_t b);
void atg__test_ambiguous_defaults_b(tensor *, tensor dummy, int64_t a, char* b_ptr, int b_len);
void atg__test_autograd_multiple_dispatch(tensor *, tensor self);
void atg__test_autograd_multiple_dispatch_fullcoverage_out(tensor *, tensor out, tensor self);
void atg__test_autograd_multiple_dispatch_ntonly(tensor *, tensor self, int b);
void atg__test_autograd_multiple_dispatch_view(tensor *, tensor self);
void atg__test_autograd_multiple_dispatch_view_copy(tensor *, tensor self);
void atg__test_autograd_multiple_dispatch_view_copy_out(tensor *, tensor out, tensor self);
void atg__test_check_tensor(tensor *, tensor self);
void atg__test_functorch_fallback(tensor *, tensor self, tensor other);
void atg__test_functorch_fallback_out(tensor *, tensor out, tensor self, tensor other);
void atg__test_optional_filled_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_filled_intlist_out(tensor *, tensor out, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_floatlist(tensor *, tensor values, double *addends_data, int addends_len);
void atg__test_optional_floatlist_out(tensor *, tensor out, tensor values, double *addends_data, int addends_len);
void atg__test_optional_intlist(tensor *, tensor values, int64_t *addends_data, int addends_len);
void atg__test_optional_intlist_out(tensor *, tensor out, tensor values, int64_t *addends_data, int addends_len);
void atg__test_parallel_materialize(tensor *, tensor self, int64_t num_parallel, int skip_first);
void atg__test_serialization_subcmul(tensor *, tensor self, tensor other);
void atg__test_string_default(tensor *, tensor dummy, char* a_ptr, int a_len, char* b_ptr, int b_len);
void atg__test_warn_in_autograd(tensor *, tensor self);
void atg__test_warn_in_autograd_out(tensor *, tensor out, tensor self);
void atg__to_copy(tensor *, tensor self, int options_kind, int options_device, int non_blocking);
void atg__to_copy_out(tensor *, tensor out, tensor self, int non_blocking);
tensor *atg__to_cpu(tensor *tensors_data, int tensors_len);
void atg__to_dense(tensor *, tensor self, int dtype, int masked_grad);
void atg__to_dense_out(tensor *, tensor out, tensor self, int dtype, int masked_grad);
void atg__to_sparse(tensor *, tensor self, int8_t layout, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsc(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsc_out(tensor *, tensor out, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsr(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_bsr_out(tensor *, tensor out, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csc(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csc_out(tensor *, tensor out, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csr(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_csr_out(tensor *, tensor out, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_out(tensor *, tensor out, tensor self, int8_t layout, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg__to_sparse_semi_structured(tensor *, tensor dense);
void atg__to_sparse_sparse_dim(tensor *, tensor self, int64_t sparse_dim);
void atg__to_sparse_sparse_dim_out(tensor *, tensor out, tensor self, int64_t sparse_dim);
void atg__transform_bias_rescale_qkv(tensor *, tensor qkv, tensor qkv_bias, int64_t num_heads);
void atg__transform_bias_rescale_qkv_out(tensor *, tensor out0, tensor out1, tensor out2, tensor qkv, tensor qkv_bias, int64_t num_heads);
void atg__transformer_encoder_layer_fwd(tensor *, tensor src, int64_t embed_dim, int64_t num_heads, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, int use_gelu, int norm_first, double eps, tensor norm_weight_1, tensor norm_bias_1, tensor norm_weight_2, tensor norm_bias_2, tensor ffn_weight_1, tensor ffn_bias_1, tensor ffn_weight_2, tensor ffn_bias_2, tensor mask, int64_t mask_type_v, uint8_t mask_type_null);
void atg__transformer_encoder_layer_fwd_out(tensor *, tensor out, tensor src, int64_t embed_dim, int64_t num_heads, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, int use_gelu, int norm_first, double eps, tensor norm_weight_1, tensor norm_bias_1, tensor norm_weight_2, tensor norm_bias_2, tensor ffn_weight_1, tensor ffn_bias_1, tensor ffn_weight_2, tensor ffn_bias_2, tensor mask, int64_t mask_type_v, uint8_t mask_type_null);
void atg__trilinear(tensor *, tensor i1, tensor i2, tensor i3, int64_t *expand1_data, int expand1_len, int64_t *expand2_data, int expand2_len, int64_t *expand3_data, int expand3_len, int64_t *sumdim_data, int sumdim_len, int64_t unroll_dim);
void atg__trilinear_out(tensor *, tensor out, tensor i1, tensor i2, tensor i3, int64_t *expand1_data, int expand1_len, int64_t *expand2_data, int expand2_len, int64_t *expand3_data, int expand3_len, int64_t *sumdim_data, int sumdim_len, int64_t unroll_dim);
void atg__triton_multi_head_attention(tensor *, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask);
void atg__triton_multi_head_attention_out(tensor *, tensor out, tensor query, tensor key, tensor value, int64_t embed_dim, int64_t num_head, tensor qkv_weight, tensor qkv_bias, tensor proj_weight, tensor proj_bias, tensor mask);
void atg__triton_scaled_dot_attention(tensor *, tensor q, tensor k, tensor v, double dropout_p);
void atg__triton_scaled_dot_attention_out(tensor *, tensor out, tensor q, tensor k, tensor v, double dropout_p);
void atg__unique(tensor *, tensor self, int sorted, int return_inverse);
void atg__unique2(tensor *, tensor self, int sorted, int return_inverse, int return_counts);
void atg__unique2_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int sorted, int return_inverse, int return_counts);
void atg__unique_out(tensor *, tensor out0, tensor out1, tensor self, int sorted, int return_inverse);
void atg__unpack_dual(tensor *, tensor dual, int64_t level);
void atg__unsafe_index(tensor *, tensor self, tensor *indices_data, int indices_len);
void atg__unsafe_index_put(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg__unsafe_masked_index(tensor *, tensor self, tensor mask, tensor *indices_data, int indices_len, scalar fill);
void atg__unsafe_masked_index_put_accumulate(tensor *, tensor self, tensor mask, tensor *indices_data, int indices_len, tensor values);
void atg__unsafe_view(tensor *, tensor self, int64_t *size_data, int size_len);
void atg__unsafe_view_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg__upsample_bicubic2d_aa(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bicubic2d_aa_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg__upsample_bilinear2d_aa(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_bilinear2d_aa_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg__upsample_nearest_exact1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg__upsample_nearest_exact1d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg__upsample_nearest_exact2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg__upsample_nearest_exact3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg__upsample_nearest_exact3d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
int atg__use_cudnn_ctc_loss(tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank);
int atg__use_cudnn_ctc_loss_tensor(tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank);
int atg__use_cudnn_rnn_flatten_weight();
int atg__use_miopen_ctc_loss(tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank);
int atg__use_miopen_ctc_loss_tensor(tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank);
void atg__validate_compressed_sparse_indices(int is_crow, tensor compressed_idx, tensor plain_idx, int64_t cdim, int64_t dim, int64_t nnz);
void atg__validate_sparse_bsc_tensor_args(tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__validate_sparse_bsr_tensor_args(tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__validate_sparse_compressed_tensor_args(tensor compressed_indices, tensor plain_indices, tensor values, int64_t *size_data, int size_len, int8_t layout, int check_pinning);
void atg__validate_sparse_csc_tensor_args(tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__validate_sparse_csr_tensor_args(tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int check_pinning);
void atg__values(tensor *, tensor self);
void atg__values_copy(tensor *, tensor self);
void atg__values_copy_out(tensor *, tensor out, tensor self);
int64_t atg__version(tensor self);
void atg__weight_int4pack_mm(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros);
void atg__weight_int4pack_mm_for_cpu(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScaleAndZeros);
void atg__weight_int4pack_mm_with_scales_and_zeros(tensor *, tensor self, tensor mat2, int64_t qGroupSize, tensor qScale, tensor qZeros);
void atg__weight_int8pack_mm(tensor *, tensor self, tensor mat2, tensor scales);
void atg__weight_norm(tensor *, tensor v, tensor g, int64_t dim);
void atg__weight_norm_differentiable_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
void atg__weight_norm_interface(tensor *, tensor v, tensor g, int64_t dim);
void atg__weight_norm_interface_backward(tensor *, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
void atg__weight_norm_interface_backward_out(tensor *, tensor out0, tensor out1, tensor grad_w, tensor saved_v, tensor saved_g, tensor saved_norms, int64_t dim);
void atg__weight_norm_interface_out(tensor *, tensor out0, tensor out1, tensor v, tensor g, int64_t dim);
void atg__wrapped_linear_prepack(tensor *, tensor weight, tensor weight_scale, tensor weight_zero_point, tensor bias);
void atg__wrapped_quantized_linear_prepacked(tensor *, tensor input, tensor input_scale, tensor input_zero_point, tensor packed_weight, tensor output_scale, tensor output_zero_point, int64_t out_channel);
void atg_abs(tensor *, tensor self);
void atg_abs_(tensor *, tensor self);
void atg_abs_out(tensor *, tensor out, tensor self);
void atg_absolute(tensor *, tensor self);
void atg_absolute_(tensor *, tensor self);
void atg_absolute_out(tensor *, tensor out, tensor self);
void atg_acos(tensor *, tensor self);
void atg_acos_(tensor *, tensor self);
void atg_acos_out(tensor *, tensor out, tensor self);
void atg_acosh(tensor *, tensor self);
void atg_acosh_(tensor *, tensor self);
void atg_acosh_out(tensor *, tensor out, tensor self);
void atg_adaptive_avg_pool1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_avg_pool3d_backward(tensor *, tensor grad_input, tensor grad_output, tensor self);
void atg_adaptive_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool2d_backward(tensor *, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool2d_out(tensor *, tensor out, tensor indices, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_adaptive_max_pool3d_backward(tensor *, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor indices);
void atg_adaptive_max_pool3d_out(tensor *, tensor out, tensor indices, tensor self, int64_t *output_size_data, int output_size_len);
void atg_add(tensor *, tensor self, tensor other);
void atg_add_(tensor *, tensor self, tensor other);
void atg_add_out(tensor *, tensor out, tensor self, tensor other);
void atg_add_scalar(tensor *, tensor self, scalar other);
void atg_add_scalar_(tensor *, tensor self, scalar other);
void atg_add_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_addbmm(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_addbmm_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_addbmm_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2);
void atg_addcdiv(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcdiv_(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcdiv_out(tensor *, tensor out, tensor self, tensor tensor1, tensor tensor2);
void atg_addcmul(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcmul_(tensor *, tensor self, tensor tensor1, tensor tensor2);
void atg_addcmul_out(tensor *, tensor out, tensor self, tensor tensor1, tensor tensor2);
void atg_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_addmm_(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_addmm_dtype(tensor *, tensor self, tensor mat1, tensor mat2, int out_dtype);
void atg_addmm_dtype_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2, int out_dtype);
void atg_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_addmv(tensor *, tensor self, tensor mat, tensor vec);
void atg_addmv_(tensor *, tensor self, tensor mat, tensor vec);
void atg_addmv_out(tensor *, tensor out, tensor self, tensor mat, tensor vec);
void atg_addr(tensor *, tensor self, tensor vec1, tensor vec2);
void atg_addr_(tensor *, tensor self, tensor vec1, tensor vec2);
void atg_addr_out(tensor *, tensor out, tensor self, tensor vec1, tensor vec2);
void atg_adjoint(tensor *, tensor self);
void atg_affine_grid_generator(tensor *, tensor theta, int64_t *size_data, int size_len, int align_corners);
void atg_affine_grid_generator_backward(tensor *, tensor grad, int64_t *size_data, int size_len, int align_corners);
void atg_affine_grid_generator_out(tensor *, tensor out, tensor theta, int64_t *size_data, int size_len, int align_corners);
void atg_alias(tensor *, tensor self);
void atg_alias_copy(tensor *, tensor self);
void atg_alias_copy_out(tensor *, tensor out, tensor self);
void atg_align_as(tensor *, tensor self, tensor other);
tensor *atg_align_tensors(tensor *tensors_data, int tensors_len);
void atg_all(tensor *, tensor self);
void atg_all_all_out(tensor *, tensor out, tensor self);
void atg_all_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_all_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_all_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_all_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
int atg_allclose(tensor self, tensor other, double rtol, double atol, int equal_nan);
void atg_alpha_dropout(tensor *, tensor input, double p, int train);
void atg_alpha_dropout_(tensor *, tensor self, double p, int train);
void atg_amax(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amax_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amin(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_amin_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_aminmax(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_aminmax_out(tensor *, tensor min, tensor max, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_angle(tensor *, tensor self);
void atg_angle_out(tensor *, tensor out, tensor self);
void atg_any(tensor *, tensor self);
void atg_any_all_out(tensor *, tensor out, tensor self);
void atg_any_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_any_dims(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_any_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_any_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim);
void atg_arange(tensor *, scalar end, int options_kind, int options_device);
void atg_arange_start(tensor *, scalar start, scalar end, int options_kind, int options_device);
void atg_arange_start_step(tensor *, scalar start, scalar end, scalar step, int options_kind, int options_device);
void atg_arccos(tensor *, tensor self);
void atg_arccos_(tensor *, tensor self);
void atg_arccos_out(tensor *, tensor out, tensor self);
void atg_arccosh(tensor *, tensor self);
void atg_arccosh_(tensor *, tensor self);
void atg_arccosh_out(tensor *, tensor out, tensor self);
void atg_arcsin(tensor *, tensor self);
void atg_arcsin_(tensor *, tensor self);
void atg_arcsin_out(tensor *, tensor out, tensor self);
void atg_arcsinh(tensor *, tensor self);
void atg_arcsinh_(tensor *, tensor self);
void atg_arcsinh_out(tensor *, tensor out, tensor self);
void atg_arctan(tensor *, tensor self);
void atg_arctan2(tensor *, tensor self, tensor other);
void atg_arctan2_(tensor *, tensor self, tensor other);
void atg_arctan2_out(tensor *, tensor out, tensor self, tensor other);
void atg_arctan_(tensor *, tensor self);
void atg_arctan_out(tensor *, tensor out, tensor self);
void atg_arctanh(tensor *, tensor self);
void atg_arctanh_(tensor *, tensor self);
void atg_arctanh_out(tensor *, tensor out, tensor self);
void atg_argmax(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmax_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmin(tensor *, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argmin_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null, int keepdim);
void atg_argsort(tensor *, tensor self, int64_t dim, int descending);
void atg_argsort_stable(tensor *, tensor self, int stable, int64_t dim, int descending);
void atg_argsort_stable_out(tensor *, tensor out, tensor self, int stable, int64_t dim, int descending);
void atg_argwhere(tensor *, tensor self);
void atg_as_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_copy(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_scatter(tensor *, tensor self, tensor src, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_as_strided_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int64_t storage_offset_v, uint8_t storage_offset_null);
void atg_asin(tensor *, tensor self);
void atg_asin_(tensor *, tensor self);
void atg_asin_out(tensor *, tensor out, tensor self);
void atg_asinh(tensor *, tensor self);
void atg_asinh_(tensor *, tensor self);
void atg_asinh_out(tensor *, tensor out, tensor self);
void atg_atan(tensor *, tensor self);
void atg_atan2(tensor *, tensor self, tensor other);
void atg_atan2_(tensor *, tensor self, tensor other);
void atg_atan2_out(tensor *, tensor out, tensor self, tensor other);
void atg_atan_(tensor *, tensor self);
void atg_atan_out(tensor *, tensor out, tensor self);
void atg_atanh(tensor *, tensor self);
void atg_atanh_(tensor *, tensor self);
void atg_atanh_out(tensor *, tensor out, tensor self);
void atg_atleast_1d(tensor *, tensor self);
tensor *atg_atleast_1d_sequence(tensor *tensors_data, int tensors_len);
void atg_atleast_2d(tensor *, tensor self);
tensor *atg_atleast_2d_sequence(tensor *tensors_data, int tensors_len);
void atg_atleast_3d(tensor *, tensor self);
tensor *atg_atleast_3d_sequence(tensor *tensors_data, int tensors_len);
void atg_avg_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad);
void atg_avg_pool1d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad);
void atg_avg_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_avg_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int ceil_mode, int count_include_pad, int64_t divisor_override_v, uint8_t divisor_override_null);
void atg_baddbmm(tensor *, tensor self, tensor batch1, tensor batch2, scalar beta, scalar alpha);
void atg_baddbmm_(tensor *, tensor self, tensor batch1, tensor batch2);
void atg_baddbmm_dtype(tensor *, tensor self, tensor batch1, tensor batch2, int out_dtype, scalar beta, scalar alpha);
void atg_baddbmm_dtype_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2, int out_dtype);
void atg_baddbmm_out(tensor *, tensor out, tensor self, tensor batch1, tensor batch2);
void atg_bartlett_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_bartlett_window_out(tensor *, tensor out, int64_t window_length);
void atg_bartlett_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_bartlett_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps, int cudnn_enabled);
void atg_batch_norm_backward_elemt(tensor *, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, tensor sum_dy, tensor sum_dy_xmu, tensor count);
void atg_batch_norm_backward_elemt_out(tensor *, tensor out, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, tensor sum_dy, tensor sum_dy_xmu, tensor count);
void atg_batch_norm_backward_reduce(tensor *, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, int input_g, int weight_g, int bias_g);
void atg_batch_norm_backward_reduce_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor grad_out, tensor input, tensor mean, tensor invstd, tensor weight, int input_g, int weight_g, int bias_g);
void atg_batch_norm_elemt(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor invstd, double eps);
void atg_batch_norm_elemt_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, tensor mean, tensor invstd, double eps);
void atg_batch_norm_gather_stats(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, int64_t count);
void atg_batch_norm_gather_stats_out(tensor *, tensor out0, tensor out1, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, int64_t count);
void atg_batch_norm_gather_stats_with_counts(tensor *, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, tensor counts);
void atg_batch_norm_gather_stats_with_counts_out(tensor *, tensor out0, tensor out1, tensor input, tensor mean, tensor invstd, tensor running_mean, tensor running_var, double momentum, double eps, tensor counts);
void atg_batch_norm_stats(tensor *, tensor input, double eps);
void atg_batch_norm_stats_out(tensor *, tensor out0, tensor out1, tensor input, double eps);
void atg_batch_norm_update_stats(tensor *, tensor input, tensor running_mean, tensor running_var, double momentum);
void atg_batch_norm_update_stats_out(tensor *, tensor out0, tensor out1, tensor input, tensor running_mean, tensor running_var, double momentum);
void atg_bernoulli(tensor *, tensor self);
void atg_bernoulli_(tensor *, tensor self, tensor p);
void atg_bernoulli_float_(tensor *, tensor self, double p);
void atg_bernoulli_p(tensor *, tensor self, double p);
void atg_bernoulli_tensor(tensor *, tensor self, tensor p);
void atg_bilinear(tensor *, tensor input1, tensor input2, tensor weight, tensor bias);
void atg_binary_cross_entropy(tensor *, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_out(tensor *, tensor out, tensor self, tensor target, tensor weight, int64_t reduction);
void atg_binary_cross_entropy_with_logits(tensor *, tensor self, tensor target, tensor weight, tensor pos_weight, int64_t reduction);
void atg_binary_cross_entropy_with_logits_out(tensor *, tensor out, tensor self, tensor target, tensor weight, tensor pos_weight, int64_t reduction);
void atg_bincount(tensor *, tensor self, tensor weights, int64_t minlength);
void atg_bincount_out(tensor *, tensor out, tensor self, tensor weights, int64_t minlength);
void atg_binomial(tensor *, tensor count, tensor prob);
void atg_binomial_out(tensor *, tensor out, tensor count, tensor prob);
void atg_bitwise_and(tensor *, tensor self, scalar other);
void atg_bitwise_and_(tensor *, tensor self, scalar other);
void atg_bitwise_and_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_and_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_and_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_and_tensor(tensor *, tensor self, tensor other);
void atg_bitwise_and_tensor_(tensor *, tensor self, tensor other);
void atg_bitwise_and_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_left_shift(tensor *, tensor self, tensor other);
void atg_bitwise_left_shift_(tensor *, tensor self, tensor other);
void atg_bitwise_left_shift_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_left_shift_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_left_shift_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_left_shift_tensor_scalar(tensor *, tensor self, scalar other);
void atg_bitwise_left_shift_tensor_scalar_(tensor *, tensor self, scalar other);
void atg_bitwise_left_shift_tensor_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_not(tensor *, tensor self);
void atg_bitwise_not_(tensor *, tensor self);
void atg_bitwise_not_out(tensor *, tensor out, tensor self);
void atg_bitwise_or(tensor *, tensor self, scalar other);
void atg_bitwise_or_(tensor *, tensor self, scalar other);
void atg_bitwise_or_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_or_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_or_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_or_tensor(tensor *, tensor self, tensor other);
void atg_bitwise_or_tensor_(tensor *, tensor self, tensor other);
void atg_bitwise_or_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_right_shift(tensor *, tensor self, tensor other);
void atg_bitwise_right_shift_(tensor *, tensor self, tensor other);
void atg_bitwise_right_shift_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_right_shift_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_right_shift_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_bitwise_right_shift_tensor_scalar(tensor *, tensor self, scalar other);
void atg_bitwise_right_shift_tensor_scalar_(tensor *, tensor self, scalar other);
void atg_bitwise_right_shift_tensor_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_xor(tensor *, tensor self, scalar other);
void atg_bitwise_xor_(tensor *, tensor self, scalar other);
void atg_bitwise_xor_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_bitwise_xor_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_bitwise_xor_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_bitwise_xor_tensor(tensor *, tensor self, tensor other);
void atg_bitwise_xor_tensor_(tensor *, tensor self, tensor other);
void atg_bitwise_xor_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_blackman_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_blackman_window_out(tensor *, tensor out, int64_t window_length);
void atg_blackman_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_blackman_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_block_diag(tensor *, tensor *tensors_data, int tensors_len);
void atg_block_diag_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_bmm(tensor *, tensor self, tensor mat2);
void atg_bmm_dtype(tensor *, tensor self, tensor mat2, int out_dtype);
void atg_bmm_dtype_out(tensor *, tensor out, tensor self, tensor mat2, int out_dtype);
void atg_bmm_out(tensor *, tensor out, tensor self, tensor mat2);
tensor *atg_broadcast_tensors(tensor *tensors_data, int tensors_len);
void atg_broadcast_to(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_bucketize(tensor *, tensor self, tensor boundaries, int out_int32, int right);
void atg_bucketize_scalar(tensor *, scalar self_scalar, tensor boundaries, int out_int32, int right);
void atg_bucketize_scalar_out(tensor *, tensor out, scalar self_scalar, tensor boundaries, int out_int32, int right);
void atg_bucketize_tensor_out(tensor *, tensor out, tensor self, tensor boundaries, int out_int32, int right);
int atg_can_cast(int from_, int to);
void atg_cartesian_prod(tensor *, tensor *tensors_data, int tensors_len);
void atg_cat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_cat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_cauchy(tensor *, tensor self, double median, double sigma);
void atg_cauchy_(tensor *, tensor self, double median, double sigma);
void atg_cauchy_out(tensor *, tensor out, tensor self, double median, double sigma);
void atg_ccol_indices(tensor *, tensor self);
void atg_ccol_indices_copy(tensor *, tensor self);
void atg_ccol_indices_copy_out(tensor *, tensor out, tensor self);
void atg_cdist(tensor *, tensor x1, tensor x2, double p, int64_t compute_mode_v, uint8_t compute_mode_null);
void atg_ceil(tensor *, tensor self);
void atg_ceil_(tensor *, tensor self);
void atg_ceil_out(tensor *, tensor out, tensor self);
void atg_celu(tensor *, tensor self);
void atg_celu_(tensor *, tensor self);
void atg_celu_out(tensor *, tensor out, tensor self);
void atg_chain_matmul(tensor *, tensor *matrices_data, int matrices_len);
void atg_chain_matmul_out(tensor *, tensor out, tensor *matrices_data, int matrices_len);
void atg_chalf(tensor *, tensor self);
void atg_channel_shuffle(tensor *, tensor self, int64_t groups);
void atg_channel_shuffle_out(tensor *, tensor out, tensor self, int64_t groups);
void atg_cholesky(tensor *, tensor self, int upper);
void atg_cholesky_inverse(tensor *, tensor self, int upper);
void atg_cholesky_inverse_out(tensor *, tensor out, tensor self, int upper);
void atg_cholesky_out(tensor *, tensor out, tensor self, int upper);
void atg_cholesky_solve(tensor *, tensor self, tensor input2, int upper);
void atg_cholesky_solve_out(tensor *, tensor out, tensor self, tensor input2, int upper);
void atg_choose_qparams_optimized(tensor *, tensor input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width);
tensor *atg_chunk(tensor self, int64_t chunks, int64_t dim);
void atg_clamp(tensor *, tensor self, scalar min, scalar max);
void atg_clamp_(tensor *, tensor self, scalar min, scalar max);
void atg_clamp_max(tensor *, tensor self, scalar max);
void atg_clamp_max_(tensor *, tensor self, scalar max);
void atg_clamp_max_out(tensor *, tensor out, tensor self, scalar max);
void atg_clamp_max_tensor(tensor *, tensor self, tensor max);
void atg_clamp_max_tensor_(tensor *, tensor self, tensor max);
void atg_clamp_max_tensor_out(tensor *, tensor out, tensor self, tensor max);
void atg_clamp_min(tensor *, tensor self, scalar min);
void atg_clamp_min_(tensor *, tensor self, scalar min);
void atg_clamp_min_out(tensor *, tensor out, tensor self, scalar min);
void atg_clamp_min_tensor(tensor *, tensor self, tensor min);
void atg_clamp_min_tensor_(tensor *, tensor self, tensor min);
void atg_clamp_min_tensor_out(tensor *, tensor out, tensor self, tensor min);
void atg_clamp_out(tensor *, tensor out, tensor self, scalar min, scalar max);
void atg_clamp_tensor(tensor *, tensor self, tensor min, tensor max);
void atg_clamp_tensor_(tensor *, tensor self, tensor min, tensor max);
void atg_clamp_tensor_out(tensor *, tensor out, tensor self, tensor min, tensor max);
void atg_clip(tensor *, tensor self, scalar min, scalar max);
void atg_clip_(tensor *, tensor self, scalar min, scalar max);
void atg_clip_out(tensor *, tensor out, tensor self, scalar min, scalar max);
void atg_clip_tensor(tensor *, tensor self, tensor min, tensor max);
void atg_clip_tensor_(tensor *, tensor self, tensor min, tensor max);
void atg_clip_tensor_out(tensor *, tensor out, tensor self, tensor min, tensor max);
void atg_clone(tensor *, tensor out, tensor self);
void atg_coalesce(tensor *, tensor self);
void atg_col2im(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col2im_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_col_indices(tensor *, tensor self);
void atg_col_indices_copy(tensor *, tensor self);
void atg_col_indices_copy_out(tensor *, tensor out, tensor self);
void atg_column_stack(tensor *, tensor *tensors_data, int tensors_len);
void atg_column_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_combinations(tensor *, tensor self, int64_t r, int with_replacement);
void atg_complex(tensor *, tensor real, tensor imag);
void atg_complex_out(tensor *, tensor out, tensor real, tensor imag);
void atg_concat(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_concat_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_concatenate(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_concatenate_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_conj(tensor *, tensor self);
void atg_conj_physical(tensor *, tensor self);
void atg_conj_physical_(tensor *, tensor self);
void atg_conj_physical_out(tensor *, tensor out, tensor self);
void atg_constant_pad_nd(tensor *, tensor self, int64_t *pad_data, int pad_len);
void atg_constant_pad_nd_out(tensor *, tensor out, tensor self, int64_t *pad_data, int pad_len);
void atg_contiguous(tensor *, tensor self);
void atg_conv1d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv1d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv2d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv2d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv3d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv3d_padding(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, char* padding_ptr, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_conv_depthwise3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_conv_depthwise3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_conv_tbc(tensor *, tensor self, tensor weight, tensor bias, int64_t pad);
void atg_conv_tbc_backward(tensor *, tensor self, tensor input, tensor weight, tensor bias, int64_t pad);
void atg_conv_tbc_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t pad);
void atg_conv_transpose1d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
void atg_conv_transpose2d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
void atg_conv_transpose3d(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t groups, int64_t *dilation_data, int dilation_len);
void atg_convolution(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_convolution_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_convolution_overrideable(tensor *, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_convolution_overrideable_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int transposed, int64_t *output_padding_data, int output_padding_len, int64_t groups);
void atg_copy_sparse_to_sparse(tensor *, tensor self, tensor src, int non_blocking);
void atg_copy_sparse_to_sparse_(tensor *, tensor self, tensor src, int non_blocking);
void atg_copy_sparse_to_sparse_out(tensor *, tensor out, tensor self, tensor src, int non_blocking);
void atg_copysign(tensor *, tensor self, tensor other);
void atg_copysign_(tensor *, tensor self, tensor other);
void atg_copysign_out(tensor *, tensor out, tensor self, tensor other);
void atg_copysign_scalar(tensor *, tensor self, scalar other);
void atg_copysign_scalar_(tensor *, tensor self, scalar other);
void atg_copysign_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_corrcoef(tensor *, tensor self);
void atg_cos(tensor *, tensor self);
void atg_cos_(tensor *, tensor self);
void atg_cos_out(tensor *, tensor out, tensor self);
void atg_cosh(tensor *, tensor self);
void atg_cosh_(tensor *, tensor self);
void atg_cosh_out(tensor *, tensor out, tensor self);
void atg_cosine_embedding_loss(tensor *, tensor input1, tensor input2, tensor target, double margin, int64_t reduction);
void atg_cosine_similarity(tensor *, tensor x1, tensor x2, int64_t dim, double eps);
void atg_count_nonzero(tensor *, tensor self, int64_t dim_v, uint8_t dim_null);
void atg_count_nonzero_dim_intlist(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_count_nonzero_dim_intlist_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len);
void atg_count_nonzero_out(tensor *, tensor out, tensor self, int64_t dim_v, uint8_t dim_null);
void atg_cov(tensor *, tensor self, int64_t correction, tensor fweights, tensor aweights);
void atg_cross(tensor *, tensor self, tensor other, int64_t dim_v, uint8_t dim_null);
void atg_cross_entropy_loss(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, double label_smoothing);
void atg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim_v, uint8_t dim_null);
void atg_crow_indices(tensor *, tensor self);
void atg_crow_indices_copy(tensor *, tensor self);
void atg_crow_indices_copy_out(tensor *, tensor out, tensor self);
void atg_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int64_t reduction, int zero_infinity);
void atg_ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int64_t reduction, int zero_infinity);
void atg_cudnn_affine_grid_generator(tensor *, tensor theta, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_affine_grid_generator_backward(tensor *, tensor grad, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_affine_grid_generator_backward_out(tensor *, tensor out, tensor grad, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_affine_grid_generator_out(tensor *, tensor out, tensor theta, int64_t n, int64_t C, int64_t H, int64_t W);
void atg_cudnn_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_cudnn_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon, tensor reserveSpace);
void atg_cudnn_batch_norm_backward_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon, tensor reserveSpace);
void atg_cudnn_batch_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_cudnn_convolution(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_add_relu(tensor *, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_add_relu_out(tensor *, tensor out, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_out(tensor *, tensor out, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_relu(tensor *, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_relu_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_cudnn_convolution_transpose(tensor *, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_convolution_transpose_out(tensor *, tensor out, tensor self, tensor weight, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic, int allow_tf32);
void atg_cudnn_grid_sampler(tensor *, tensor self, tensor grid);
void atg_cudnn_grid_sampler_backward(tensor *, tensor self, tensor grid, tensor grad_output);
void atg_cudnn_grid_sampler_backward_out(tensor *, tensor out0, tensor out1, tensor self, tensor grid, tensor grad_output);
void atg_cudnn_grid_sampler_out(tensor *, tensor out, tensor self, tensor grid);
int atg_cudnn_is_acceptable(tensor self);
void atg_cummax(tensor *, tensor self, int64_t dim);
void atg_cummax_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim);
void atg_cummaxmin_backward(tensor *, tensor grad, tensor input, tensor indices, int64_t dim);
void atg_cummin(tensor *, tensor self, int64_t dim);
void atg_cummin_out(tensor *, tensor values, tensor indices, tensor self, int64_t dim);
void atg_cumprod(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumprod_(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumprod_backward(tensor *, tensor grad, tensor input, int64_t dim, tensor output);
void atg_cumprod_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_cumsum(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumsum_(tensor *, tensor self, int64_t dim, int dtype);
void atg_cumsum_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_cumulative_trapezoid(tensor *, tensor y, int64_t dim);
void atg_cumulative_trapezoid_x(tensor *, tensor y, tensor x, int64_t dim);
void atg_data(tensor *, tensor self);
void atg_deg2rad(tensor *, tensor self);
void atg_deg2rad_(tensor *, tensor self);
void atg_deg2rad_out(tensor *, tensor out, tensor self);
int64_t atg_dense_dim(tensor self);
void atg_dequantize(tensor *, tensor self);
void atg_dequantize_self_out(tensor *, tensor out, tensor self);
tensor *atg_dequantize_tensors(tensor *tensors_data, int tensors_len);
void atg_dequantize_tensors_out(tensor *out_data, int out_len, tensor *tensors_data, int tensors_len);
void atg_det(tensor *, tensor self);
void atg_detach(tensor *, tensor self);
void atg_detach_(tensor *, tensor self);
void atg_detach_copy(tensor *, tensor self);
void atg_detach_copy_out(tensor *, tensor out, tensor self);
void atg_diag(tensor *, tensor self, int64_t diagonal);
void atg_diag_embed(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diag_embed_out(tensor *, tensor out, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diag_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_diagflat(tensor *, tensor self, int64_t offset);
void atg_diagonal(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_copy(tensor *, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_copy_out(tensor *, tensor out, tensor self, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_scatter(tensor *, tensor self, tensor src, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diagonal_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t offset, int64_t dim1, int64_t dim2);
void atg_diff(tensor *, tensor self, int64_t n, int64_t dim, tensor prepend, tensor append);
void atg_diff_out(tensor *, tensor out, tensor self, int64_t n, int64_t dim, tensor prepend, tensor append);
void atg_digamma(tensor *, tensor self);
void atg_digamma_(tensor *, tensor self);
void atg_digamma_out(tensor *, tensor out, tensor self);
void atg_dist(tensor *, tensor self, tensor other);
void atg_dist_out(tensor *, tensor out, tensor self, tensor other);
void atg_div(tensor *, tensor self, tensor other);
void atg_div_(tensor *, tensor self, tensor other);
void atg_div_out(tensor *, tensor out, tensor self, tensor other);
void atg_div_out_mode(tensor *, tensor out, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar(tensor *, tensor self, scalar other);
void atg_div_scalar_(tensor *, tensor self, scalar other);
void atg_div_scalar_mode(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar_mode_(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar_mode_out(tensor *, tensor out, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_div_tensor_mode(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_div_tensor_mode_(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide(tensor *, tensor self, tensor other);
void atg_divide_(tensor *, tensor self, tensor other);
void atg_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_divide_out_mode(tensor *, tensor out, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_scalar(tensor *, tensor self, scalar other);
void atg_divide_scalar_(tensor *, tensor self, scalar other);
void atg_divide_scalar_mode(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_scalar_mode_(tensor *, tensor self, scalar other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_tensor_mode(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_divide_tensor_mode_(tensor *, tensor self, tensor other, char* rounding_mode_ptr, int rounding_mode_len);
void atg_dot(tensor *, tensor self, tensor tensor);
void atg_dot_out(tensor *, tensor out, tensor self, tensor tensor);
void atg_dropout(tensor *, tensor input, double p, int train);
void atg_dropout_(tensor *, tensor self, double p, int train);
tensor *atg_dsplit(tensor self, int64_t sections);
tensor *atg_dsplit_array(tensor self, int64_t *indices_data, int indices_len);
void atg_dstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_dstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_einsum(tensor *, char* equation_ptr, int equation_len, tensor *tensors_data, int tensors_len, int64_t *path_data, int path_len);
void atg_elu(tensor *, tensor self);
void atg_elu_(tensor *, tensor self);
void atg_elu_backward(tensor *, tensor grad_output, scalar alpha, scalar scale, scalar input_scale, int is_result, tensor self_or_result);
void atg_elu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, scalar alpha, scalar scale, scalar input_scale, int is_result, tensor self_or_result);
void atg_elu_out(tensor *, tensor out, tensor self);
void atg_embedding(tensor *, tensor weight, tensor indices, int64_t padding_idx, int scale_grad_by_freq, int sparse);
void atg_embedding_backward(tensor *, tensor grad, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq, int sparse);
void atg_embedding_bag(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset);
void atg_embedding_bag_padding_idx(tensor *, tensor weight, tensor indices, tensor offsets, int scale_grad_by_freq, int64_t mode, int sparse, tensor per_sample_weights, int include_last_offset, int64_t padding_idx_v, uint8_t padding_idx_null);
void atg_embedding_dense_backward(tensor *, tensor grad_output, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_embedding_dense_backward_out(tensor *, tensor out, tensor grad_output, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_embedding_out(tensor *, tensor out, tensor weight, tensor indices, int64_t padding_idx, int scale_grad_by_freq, int sparse);
void atg_embedding_renorm(tensor *, tensor self, tensor indices, double max_norm, double norm_type);
void atg_embedding_renorm_(tensor *, tensor self, tensor indices, double max_norm, double norm_type);
void atg_embedding_renorm_out(tensor *, tensor out, tensor self, tensor indices, double max_norm, double norm_type);
void atg_embedding_sparse_backward(tensor *, tensor grad, tensor indices, int64_t num_weights, int64_t padding_idx, int scale_grad_by_freq);
void atg_empty(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_empty_like(tensor *, tensor self);
void atg_empty_like_out(tensor *, tensor out, tensor self);
void atg_empty_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_empty_permuted(tensor *, int64_t *size_data, int size_len, int64_t *physical_layout_data, int physical_layout_len, int options_kind, int options_device);
void atg_empty_permuted_out(tensor *, tensor out, int64_t *size_data, int size_len, int64_t *physical_layout_data, int physical_layout_len);
void atg_empty_quantized(tensor *, int64_t *size_data, int size_len, tensor qtensor, int options_kind, int options_device);
void atg_empty_quantized_out(tensor *, tensor out, int64_t *size_data, int size_len, tensor qtensor);
void atg_empty_strided(tensor *, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int options_kind, int options_device);
void atg_empty_strided_out(tensor *, tensor out, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg_eq(tensor *, tensor self, scalar other);
void atg_eq_(tensor *, tensor self, scalar other);
void atg_eq_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_eq_tensor(tensor *, tensor self, tensor other);
void atg_eq_tensor_(tensor *, tensor self, tensor other);
void atg_eq_tensor_out(tensor *, tensor out, tensor self, tensor other);
int atg_equal(tensor self, tensor other);
void atg_erf(tensor *, tensor self);
void atg_erf_(tensor *, tensor self);
void atg_erf_out(tensor *, tensor out, tensor self);
void atg_erfc(tensor *, tensor self);
void atg_erfc_(tensor *, tensor self);
void atg_erfc_out(tensor *, tensor out, tensor self);
void atg_erfinv(tensor *, tensor self);
void atg_erfinv_(tensor *, tensor self);
void atg_erfinv_out(tensor *, tensor out, tensor self);
void atg_exp(tensor *, tensor self);
void atg_exp2(tensor *, tensor self);
void atg_exp2_(tensor *, tensor self);
void atg_exp2_out(tensor *, tensor out, tensor self);
void atg_exp_(tensor *, tensor self);
void atg_exp_out(tensor *, tensor out, tensor self);
void atg_expand(tensor *, tensor self, int64_t *size_data, int size_len, int implicit);
void atg_expand_as(tensor *, tensor self, tensor other);
void atg_expand_copy(tensor *, tensor self, int64_t *size_data, int size_len, int implicit);
void atg_expand_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int implicit);
void atg_expm1(tensor *, tensor self);
void atg_expm1_(tensor *, tensor self);
void atg_expm1_out(tensor *, tensor out, tensor self);
void atg_exponential(tensor *, tensor self, double lambd);
void atg_exponential_(tensor *, tensor self, double lambd);
void atg_exponential_out(tensor *, tensor out, tensor self, double lambd);
void atg_eye(tensor *, int64_t n, int options_kind, int options_device);
void atg_eye_m(tensor *, int64_t n, int64_t m, int options_kind, int options_device);
void atg_eye_m_out(tensor *, tensor out, int64_t n, int64_t m);
void atg_eye_out(tensor *, tensor out, int64_t n);
void atg_fake_quantize_per_channel_affine(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_channel_affine_cachemask(tensor *, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_channel_affine_cachemask_backward(tensor *, tensor grad, tensor mask);
void atg_fake_quantize_per_channel_affine_cachemask_out(tensor *, tensor out0, tensor out1, tensor self, tensor scale, tensor zero_point, int64_t axis, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine(tensor *, tensor self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine_cachemask(tensor *, tensor self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine_cachemask_backward(tensor *, tensor grad, tensor mask);
void atg_fake_quantize_per_tensor_affine_cachemask_out(tensor *, tensor out0, tensor out1, tensor self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max);
void atg_fake_quantize_per_tensor_affine_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, int64_t quant_min, int64_t quant_max);
void atg_feature_alpha_dropout(tensor *, tensor input, double p, int train);
void atg_feature_alpha_dropout_(tensor *, tensor self, double p, int train);
void atg_feature_dropout(tensor *, tensor input, double p, int train);
void atg_feature_dropout_(tensor *, tensor self, double p, int train);
void atg_fft_fft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_fft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_fftfreq(tensor *, int64_t n, double d, int options_kind, int options_device);
void atg_fft_fftfreq_out(tensor *, tensor out, int64_t n, double d);
void atg_fft_fftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_fftshift(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_fft_hfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_hfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_hfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_hfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ifftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ifftshift(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_fft_ihfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ihfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_ihfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_ihfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_irfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_irfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft(tensor *, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_rfft2(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft2_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfft_out(tensor *, tensor out, tensor self, int64_t n_v, uint8_t n_null, int64_t dim, char* norm_ptr, int norm_len);
void atg_fft_rfftfreq(tensor *, int64_t n, double d, int options_kind, int options_device);
void atg_fft_rfftfreq_out(tensor *, tensor out, int64_t n, double d);
void atg_fft_rfftn(tensor *, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fft_rfftn_out(tensor *, tensor out, tensor self, int64_t *s_data, int s_len, int64_t *dim_data, int dim_len, char* norm_ptr, int norm_len);
void atg_fill(tensor *, tensor self, scalar value);
void atg_fill_(tensor *, tensor self, scalar value);
void atg_fill_diagonal_(tensor *, tensor self, scalar fill_value, int wrap);
void atg_fill_scalar_out(tensor *, tensor out, tensor self, scalar value);
void atg_fill_tensor(tensor *, tensor self, tensor value);
void atg_fill_tensor_(tensor *, tensor self, tensor value);
void atg_fill_tensor_out(tensor *, tensor out, tensor self, tensor value);
void atg_fix(tensor *, tensor self);
void atg_fix_(tensor *, tensor self);
void atg_fix_out(tensor *, tensor out, tensor self);
void atg_flatten(tensor *, tensor self, int64_t start_dim, int64_t end_dim);
void atg_flatten_dense_tensors(tensor *, tensor *tensors_data, int tensors_len);
void atg_flip(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_flip_out(tensor *, tensor out, tensor self, int64_t *dims_data, int dims_len);
void atg_fliplr(tensor *, tensor self);
void atg_flipud(tensor *, tensor self);
void atg_float_power(tensor *, tensor self, tensor exponent);
void atg_float_power_(tensor *, tensor self, scalar exponent);
void atg_float_power_scalar(tensor *, scalar self_scalar, tensor exponent);
void atg_float_power_scalar_out(tensor *, tensor out, scalar self_scalar, tensor exponent);
void atg_float_power_tensor_(tensor *, tensor self, tensor exponent);
void atg_float_power_tensor_scalar(tensor *, tensor self, scalar exponent);
void atg_float_power_tensor_scalar_out(tensor *, tensor out, tensor self, scalar exponent);
void atg_float_power_tensor_tensor_out(tensor *, tensor out, tensor self, tensor exponent);
void atg_floor(tensor *, tensor self);
void atg_floor_(tensor *, tensor self);
void atg_floor_divide(tensor *, tensor self, tensor other);
void atg_floor_divide_(tensor *, tensor self, tensor other);
void atg_floor_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_floor_divide_scalar(tensor *, tensor self, scalar other);
void atg_floor_divide_scalar_(tensor *, tensor self, scalar other);
void atg_floor_divide_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_floor_out(tensor *, tensor out, tensor self);
void atg_fmax(tensor *, tensor self, tensor other);
void atg_fmax_out(tensor *, tensor out, tensor self, tensor other);
void atg_fmin(tensor *, tensor self, tensor other);
void atg_fmin_out(tensor *, tensor out, tensor self, tensor other);
void atg_fmod(tensor *, tensor self, scalar other);
void atg_fmod_(tensor *, tensor self, scalar other);
void atg_fmod_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_fmod_tensor(tensor *, tensor self, tensor other);
void atg_fmod_tensor_(tensor *, tensor self, tensor other);
void atg_fmod_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_frac(tensor *, tensor self);
void atg_frac_(tensor *, tensor self);
void atg_frac_out(tensor *, tensor out, tensor self);
void atg_fractional_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_fractional_max_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool2d_output(tensor *, tensor output, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_fractional_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_fractional_max_pool3d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor indices);
void atg_fractional_max_pool3d_output(tensor *, tensor output, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *output_size_data, int output_size_len, tensor random_samples);
void atg_frexp(tensor *, tensor self);
void atg_frexp_tensor_out(tensor *, tensor mantissa, tensor exponent, tensor self);
void atg_frobenius_norm(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_frobenius_norm_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_from_file(tensor *, char* filename_ptr, int filename_len, int shared, int64_t size_v, uint8_t size_null, int options_kind, int options_device);
void atg_from_file_out(tensor *, tensor out, char* filename_ptr, int filename_len, int shared, int64_t size_v, uint8_t size_null);
void atg_full(tensor *, int64_t *size_data, int size_len, scalar fill_value, int options_kind, int options_device);
void atg_full_like(tensor *, tensor self, scalar fill_value);
void atg_full_like_out(tensor *, tensor out, tensor self, scalar fill_value);
void atg_full_out(tensor *, tensor out, int64_t *size_data, int size_len, scalar fill_value);
void atg_fused_moving_avg_obs_fake_quant(tensor *, tensor self, tensor observer_on, tensor fake_quant_on, tensor running_min, tensor running_max, tensor scale, tensor zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, int per_row_fake_quant, int symmetric_quant);
void atg_gather(tensor *, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gather_backward(tensor *, tensor grad, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gather_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, int sparse_grad);
void atg_gcd(tensor *, tensor self, tensor other);
void atg_gcd_(tensor *, tensor self, tensor other);
void atg_gcd_out(tensor *, tensor out, tensor self, tensor other);
void atg_ge(tensor *, tensor self, scalar other);
void atg_ge_(tensor *, tensor self, scalar other);
void atg_ge_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_ge_tensor(tensor *, tensor self, tensor other);
void atg_ge_tensor_(tensor *, tensor self, tensor other);
void atg_ge_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_gelu(tensor *, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_(tensor *, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_backward(tensor *, tensor grad_output, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, char* approximate_ptr, int approximate_len);
void atg_gelu_out(tensor *, tensor out, tensor self, char* approximate_ptr, int approximate_len);
void atg_geometric(tensor *, tensor self, double p);
void atg_geometric_(tensor *, tensor self, double p);
void atg_geometric_out(tensor *, tensor out, tensor self, double p);
void atg_geqrf(tensor *, tensor self);
void atg_geqrf_a(tensor *, tensor a, tensor tau, tensor self);
void atg_ger(tensor *, tensor self, tensor vec2);
void atg_ger_out(tensor *, tensor out, tensor self, tensor vec2);
void atg_glu(tensor *, tensor self, int64_t dim);
void atg_glu_backward(tensor *, tensor grad_output, tensor self, int64_t dim);
void atg_glu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t dim);
void atg_glu_backward_jvp(tensor *, tensor grad_x, tensor grad_glu, tensor x, tensor dgrad_glu, tensor dx, int64_t dim);
void atg_glu_backward_jvp_out(tensor *, tensor out, tensor grad_x, tensor grad_glu, tensor x, tensor dgrad_glu, tensor dx, int64_t dim);
void atg_glu_jvp(tensor *, tensor glu, tensor x, tensor dx, int64_t dim);
void atg_glu_jvp_out(tensor *, tensor out, tensor glu, tensor x, tensor dx, int64_t dim);
void atg_glu_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_grad(tensor *, tensor self);
void atg_greater(tensor *, tensor self, scalar other);
void atg_greater_(tensor *, tensor self, scalar other);
void atg_greater_equal(tensor *, tensor self, scalar other);
void atg_greater_equal_(tensor *, tensor self, scalar other);
void atg_greater_equal_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_greater_equal_tensor(tensor *, tensor self, tensor other);
void atg_greater_equal_tensor_(tensor *, tensor self, tensor other);
void atg_greater_equal_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_greater_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_greater_tensor(tensor *, tensor self, tensor other);
void atg_greater_tensor_(tensor *, tensor self, tensor other);
void atg_greater_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_grid_sampler(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_2d_out(tensor *, tensor out, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_3d(tensor *, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_grid_sampler_3d_out(tensor *, tensor out, tensor input, tensor grid, int64_t interpolation_mode, int64_t padding_mode, int align_corners);
void atg_group_norm(tensor *, tensor input, int64_t num_groups, tensor weight, tensor bias, double eps, int cudnn_enabled);
void atg_gru(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_gru_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_gru_data(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_gt(tensor *, tensor self, scalar other);
void atg_gt_(tensor *, tensor self, scalar other);
void atg_gt_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_gt_tensor(tensor *, tensor self, tensor other);
void atg_gt_tensor_(tensor *, tensor self, tensor other);
void atg_gt_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_hamming_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_hamming_window_out(tensor *, tensor out, int64_t window_length);
void atg_hamming_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_hamming_window_periodic_alpha(tensor *, int64_t window_length, int periodic, double alpha, int options_kind, int options_device);
void atg_hamming_window_periodic_alpha_beta(tensor *, int64_t window_length, int periodic, double alpha, double beta, int options_kind, int options_device);
void atg_hamming_window_periodic_alpha_beta_out(tensor *, tensor out, int64_t window_length, int periodic, double alpha, double beta);
void atg_hamming_window_periodic_alpha_out(tensor *, tensor out, int64_t window_length, int periodic, double alpha);
void atg_hamming_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_hann_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_hann_window_out(tensor *, tensor out, int64_t window_length);
void atg_hann_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_hann_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_hardshrink(tensor *, tensor self);
void atg_hardshrink_backward(tensor *, tensor grad_out, tensor self, scalar lambd);
void atg_hardshrink_backward_grad_input(tensor *, tensor grad_input, tensor grad_out, tensor self, scalar lambd);
void atg_hardshrink_out(tensor *, tensor out, tensor self);
void atg_hardsigmoid(tensor *, tensor self);
void atg_hardsigmoid_(tensor *, tensor self);
void atg_hardsigmoid_backward(tensor *, tensor grad_output, tensor self);
void atg_hardsigmoid_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self);
void atg_hardsigmoid_out(tensor *, tensor out, tensor self);
void atg_hardswish(tensor *, tensor self);
void atg_hardswish_(tensor *, tensor self);
void atg_hardswish_backward(tensor *, tensor grad_output, tensor self);
void atg_hardswish_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg_hardswish_out(tensor *, tensor out, tensor self);
void atg_hardtanh(tensor *, tensor self);
void atg_hardtanh_(tensor *, tensor self);
void atg_hardtanh_backward(tensor *, tensor grad_output, tensor self, scalar min_val, scalar max_val);
void atg_hardtanh_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar min_val, scalar max_val);
void atg_hardtanh_out(tensor *, tensor out, tensor self);
void atg_hash_tensor(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int64_t mode);
void atg_hash_tensor_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int64_t mode);
void atg_heaviside(tensor *, tensor self, tensor values);
void atg_heaviside_(tensor *, tensor self, tensor values);
void atg_heaviside_out(tensor *, tensor out, tensor self, tensor values);
void atg_hinge_embedding_loss(tensor *, tensor self, tensor target, double margin, int64_t reduction);
void atg_histc(tensor *, tensor self, int64_t bins);
void atg_histc_out(tensor *, tensor out, tensor self, int64_t bins);
void atg_histogram(tensor *, tensor self, tensor bins, tensor weight, int density);
void atg_histogram_bin_ct(tensor *, tensor self, int64_t bins, double *range_data, int range_len, tensor weight, int density);
void atg_histogram_bin_ct_out(tensor *, tensor hist, tensor bin_edges, tensor self, int64_t bins, double *range_data, int range_len, tensor weight, int density);
void atg_histogram_bins_tensor_out(tensor *, tensor hist, tensor bin_edges, tensor self, tensor bins, tensor weight, int density);
tensor *atg_hsplit(tensor self, int64_t sections);
tensor *atg_hsplit_array(tensor self, int64_t *indices_data, int indices_len);
void atg_hspmm(tensor *, tensor mat1, tensor mat2);
void atg_hspmm_out(tensor *, tensor out, tensor mat1, tensor mat2);
void atg_hstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_hstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_huber_loss(tensor *, tensor self, tensor target, int64_t reduction, double delta);
void atg_huber_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, double delta);
void atg_huber_loss_backward_out(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, double delta);
void atg_huber_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction, double delta);
void atg_hypot(tensor *, tensor self, tensor other);
void atg_hypot_(tensor *, tensor self, tensor other);
void atg_hypot_out(tensor *, tensor out, tensor self, tensor other);
void atg_i0(tensor *, tensor self);
void atg_i0_(tensor *, tensor self);
void atg_i0_out(tensor *, tensor out, tensor self);
void atg_igamma(tensor *, tensor self, tensor other);
void atg_igamma_(tensor *, tensor self, tensor other);
void atg_igamma_out(tensor *, tensor out, tensor self, tensor other);
void atg_igammac(tensor *, tensor self, tensor other);
void atg_igammac_(tensor *, tensor self, tensor other);
void atg_igammac_out(tensor *, tensor out, tensor self, tensor other);
void atg_im2col(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_im2col_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *dilation_data, int dilation_len, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len);
void atg_imag(tensor *, tensor self);
void atg_index(tensor *, tensor self, tensor *indices_data, int indices_len);
void atg_index_add(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_add_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_add_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy_(tensor *, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_copy_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source);
void atg_index_fill(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_index_fill_(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_index_fill_int_scalar_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value);
void atg_index_fill_int_tensor(tensor *, tensor self, int64_t dim, tensor index, tensor value);
void atg_index_fill_int_tensor_(tensor *, tensor self, int64_t dim, tensor index, tensor value);
void atg_index_fill_int_tensor_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor value);
void atg_index_put(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_put_(tensor *, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_put_out(tensor *, tensor out, tensor self, tensor *indices_data, int indices_len, tensor values, int accumulate);
void atg_index_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor source, char* reduce_ptr, int reduce_len, int include_self);
void atg_index_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor source, char* reduce_ptr, int reduce_len, int include_self);
void atg_index_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor source, char* reduce_ptr, int reduce_len, int include_self);
void atg_index_select(tensor *, tensor self, int64_t dim, tensor index);
void atg_index_select_backward(tensor *, tensor grad, int64_t *self_sizes_data, int self_sizes_len, int64_t dim, tensor index);
void atg_index_select_out(tensor *, tensor out, tensor self, int64_t dim, tensor index);
void atg_index_tensor_out(tensor *, tensor out, tensor self, tensor *indices_data, int indices_len);
void atg_indices(tensor *, tensor self);
void atg_indices_copy(tensor *, tensor self);
void atg_indices_copy_out(tensor *, tensor out, tensor self);
void atg_infinitely_differentiable_gelu_backward(tensor *, tensor grad, tensor self);
void atg_inner(tensor *, tensor self, tensor other);
void atg_inner_out(tensor *, tensor out, tensor self, tensor other);
void atg_instance_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int use_input_stats, double momentum, double eps, int cudnn_enabled);
void atg_int_repr(tensor *, tensor self);
void atg_int_repr_out(tensor *, tensor out, tensor self);
void atg_inverse(tensor *, tensor self);
void atg_inverse_out(tensor *, tensor out, tensor self);
int atg_is_coalesced(tensor self);
int atg_is_complex(tensor self);
int atg_is_conj(tensor self);
int atg_is_distributed(tensor self);
int atg_is_floating_point(tensor self);
int atg_is_inference(tensor self);
int atg_is_leaf(tensor self);
int atg_is_neg(tensor self);
int atg_is_nonzero(tensor self);
int atg_is_pinned(tensor self, int device);
int atg_is_same_size(tensor self, tensor other);
int atg_is_set_to(tensor self, tensor tensor);
int atg_is_signed(tensor self);
int atg_is_vulkan_available();
void atg_isclose(tensor *, tensor self, tensor other, double rtol, double atol, int equal_nan);
void atg_isfinite(tensor *, tensor self);
void atg_isin(tensor *, tensor elements, tensor test_elements, int assume_unique, int invert);
void atg_isin_scalar_tensor(tensor *, scalar element, tensor test_elements, int assume_unique, int invert);
void atg_isin_scalar_tensor_out(tensor *, tensor out, scalar element, tensor test_elements, int assume_unique, int invert);
void atg_isin_tensor_scalar(tensor *, tensor elements, scalar test_element, int assume_unique, int invert);
void atg_isin_tensor_scalar_out(tensor *, tensor out, tensor elements, scalar test_element, int assume_unique, int invert);
void atg_isin_tensor_tensor_out(tensor *, tensor out, tensor elements, tensor test_elements, int assume_unique, int invert);
void atg_isinf(tensor *, tensor self);
void atg_isinf_out(tensor *, tensor out, tensor self);
void atg_isnan(tensor *, tensor self);
void atg_isnan_out(tensor *, tensor out, tensor self);
void atg_isneginf(tensor *, tensor self);
void atg_isneginf_out(tensor *, tensor out, tensor self);
void atg_isposinf(tensor *, tensor self);
void atg_isposinf_out(tensor *, tensor out, tensor self);
void atg_isreal(tensor *, tensor self);
void atg_istft(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int center, int normalized, int onesided, int64_t length_v, uint8_t length_null, int return_complex);
void atg_kaiser_window(tensor *, int64_t window_length, int options_kind, int options_device);
void atg_kaiser_window_beta(tensor *, int64_t window_length, int periodic, double beta, int options_kind, int options_device);
void atg_kaiser_window_beta_out(tensor *, tensor out, int64_t window_length, int periodic, double beta);
void atg_kaiser_window_out(tensor *, tensor out, int64_t window_length);
void atg_kaiser_window_periodic(tensor *, int64_t window_length, int periodic, int options_kind, int options_device);
void atg_kaiser_window_periodic_out(tensor *, tensor out, int64_t window_length, int periodic);
void atg_kl_div(tensor *, tensor self, tensor target, int64_t reduction, int log_target);
void atg_kron(tensor *, tensor self, tensor other);
void atg_kron_out(tensor *, tensor out, tensor self, tensor other);
void atg_kthvalue(tensor *, tensor self, int64_t k, int64_t dim, int keepdim);
void atg_kthvalue_values(tensor *, tensor values, tensor indices, tensor self, int64_t k, int64_t dim, int keepdim);
void atg_l1_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_layer_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps, int cudnn_enable);
void atg_lcm(tensor *, tensor self, tensor other);
void atg_lcm_(tensor *, tensor self, tensor other);
void atg_lcm_out(tensor *, tensor out, tensor self, tensor other);
void atg_ldexp(tensor *, tensor self, tensor other);
void atg_ldexp_(tensor *, tensor self, tensor other);
void atg_ldexp_out(tensor *, tensor out, tensor self, tensor other);
void atg_le(tensor *, tensor self, scalar other);
void atg_le_(tensor *, tensor self, scalar other);
void atg_le_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_le_tensor(tensor *, tensor self, tensor other);
void atg_le_tensor_(tensor *, tensor self, tensor other);
void atg_le_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_leaky_relu(tensor *, tensor self);
void atg_leaky_relu_(tensor *, tensor self);
void atg_leaky_relu_backward(tensor *, tensor grad_output, tensor self, scalar negative_slope, int self_is_result);
void atg_leaky_relu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar negative_slope, int self_is_result);
void atg_leaky_relu_out(tensor *, tensor out, tensor self);
void atg_lerp(tensor *, tensor self, tensor end, scalar weight);
void atg_lerp_(tensor *, tensor self, tensor end, scalar weight);
void atg_lerp_scalar_out(tensor *, tensor out, tensor self, tensor end, scalar weight);
void atg_lerp_tensor(tensor *, tensor self, tensor end, tensor weight);
void atg_lerp_tensor_(tensor *, tensor self, tensor end, tensor weight);
void atg_lerp_tensor_out(tensor *, tensor out, tensor self, tensor end, tensor weight);
void atg_less(tensor *, tensor self, scalar other);
void atg_less_(tensor *, tensor self, scalar other);
void atg_less_equal(tensor *, tensor self, scalar other);
void atg_less_equal_(tensor *, tensor self, scalar other);
void atg_less_equal_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_less_equal_tensor(tensor *, tensor self, tensor other);
void atg_less_equal_tensor_(tensor *, tensor self, tensor other);
void atg_less_equal_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_less_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_less_tensor(tensor *, tensor self, tensor other);
void atg_less_tensor_(tensor *, tensor self, tensor other);
void atg_less_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_lgamma(tensor *, tensor self);
void atg_lgamma_(tensor *, tensor self);
void atg_lgamma_out(tensor *, tensor out, tensor self);
void atg_lift(tensor *, tensor self);
void atg_lift_fresh(tensor *, tensor self);
void atg_lift_fresh_copy(tensor *, tensor self);
void atg_lift_fresh_copy_out(tensor *, tensor out, tensor self);
void atg_lift_out(tensor *, tensor out, tensor self);
void atg_linalg_cholesky(tensor *, tensor self, int upper);
void atg_linalg_cholesky_ex(tensor *, tensor self, int upper, int check_errors);
void atg_linalg_cholesky_ex_l(tensor *, tensor L, tensor info, tensor self, int upper, int check_errors);
void atg_linalg_cholesky_out(tensor *, tensor out, tensor self, int upper);
void atg_linalg_cond(tensor *, tensor self, scalar p);
void atg_linalg_cond_out(tensor *, tensor out, tensor self, scalar p);
void atg_linalg_cond_p_str(tensor *, tensor self, char* p_ptr, int p_len);
void atg_linalg_cond_p_str_out(tensor *, tensor out, tensor self, char* p_ptr, int p_len);
void atg_linalg_cross(tensor *, tensor self, tensor other, int64_t dim);
void atg_linalg_cross_out(tensor *, tensor out, tensor self, tensor other, int64_t dim);
void atg_linalg_det(tensor *, tensor A);
void atg_linalg_det_out(tensor *, tensor out, tensor A);
void atg_linalg_diagonal(tensor *, tensor A, int64_t offset, int64_t dim1, int64_t dim2);
void atg_linalg_eig(tensor *, tensor self);
void atg_linalg_eig_out(tensor *, tensor eigenvalues, tensor eigenvectors, tensor self);
void atg_linalg_eigh(tensor *, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_eigh_eigvals(tensor *, tensor eigvals, tensor eigvecs, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_eigvals(tensor *, tensor self);
void atg_linalg_eigvals_out(tensor *, tensor out, tensor self);
void atg_linalg_eigvalsh(tensor *, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_eigvalsh_out(tensor *, tensor out, tensor self, char* UPLO_ptr, int UPLO_len);
void atg_linalg_householder_product(tensor *, tensor input, tensor tau);
void atg_linalg_householder_product_out(tensor *, tensor out, tensor input, tensor tau);
void atg_linalg_inv(tensor *, tensor A);
void atg_linalg_inv_ex(tensor *, tensor A, int check_errors);
void atg_linalg_inv_ex_inverse(tensor *, tensor inverse, tensor info, tensor A, int check_errors);
void atg_linalg_inv_out(tensor *, tensor out, tensor A);
void atg_linalg_ldl_factor(tensor *, tensor self, int hermitian);
void atg_linalg_ldl_factor_ex(tensor *, tensor self, int hermitian, int check_errors);
void atg_linalg_ldl_factor_ex_out(tensor *, tensor LD, tensor pivots, tensor info, tensor self, int hermitian, int check_errors);
void atg_linalg_ldl_factor_out(tensor *, tensor LD, tensor pivots, tensor self, int hermitian);
void atg_linalg_ldl_solve(tensor *, tensor LD, tensor pivots, tensor B, int hermitian);
void atg_linalg_ldl_solve_out(tensor *, tensor out, tensor LD, tensor pivots, tensor B, int hermitian);
void atg_linalg_lstsq(tensor *, tensor self, tensor b, double rcond_v, uint8_t rcond_null, char* driver_ptr, int driver_len);
void atg_linalg_lstsq_out(tensor *, tensor solution, tensor residuals, tensor rank, tensor singular_values, tensor self, tensor b, double rcond_v, uint8_t rcond_null, char* driver_ptr, int driver_len);
void atg_linalg_lu(tensor *, tensor A, int pivot);
void atg_linalg_lu_factor(tensor *, tensor A, int pivot);
void atg_linalg_lu_factor_ex(tensor *, tensor A, int pivot, int check_errors);
void atg_linalg_lu_factor_ex_out(tensor *, tensor LU, tensor pivots, tensor info, tensor A, int pivot, int check_errors);
void atg_linalg_lu_factor_out(tensor *, tensor LU, tensor pivots, tensor A, int pivot);
void atg_linalg_lu_out(tensor *, tensor P, tensor L, tensor U, tensor A, int pivot);
void atg_linalg_lu_solve(tensor *, tensor LU, tensor pivots, tensor B, int left, int adjoint);
void atg_linalg_lu_solve_out(tensor *, tensor out, tensor LU, tensor pivots, tensor B, int left, int adjoint);
void atg_linalg_matmul(tensor *, tensor self, tensor other);
void atg_linalg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_linalg_matrix_exp(tensor *, tensor self);
void atg_linalg_matrix_exp_out(tensor *, tensor out, tensor self);
void atg_linalg_matrix_power(tensor *, tensor self, int64_t n);
void atg_linalg_matrix_power_out(tensor *, tensor out, tensor self, int64_t n);
void atg_linalg_matrix_rank(tensor *, tensor self, double tol, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_float(tensor *, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_float_out(tensor *, tensor out, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_tensor(tensor *, tensor input, tensor atol, tensor rtol, int hermitian);
void atg_linalg_matrix_rank_atol_rtol_tensor_out(tensor *, tensor out, tensor input, tensor atol, tensor rtol, int hermitian);
void atg_linalg_matrix_rank_out(tensor *, tensor out, tensor self, double tol, int hermitian);
void atg_linalg_matrix_rank_out_tol_tensor(tensor *, tensor out, tensor input, tensor tol, int hermitian);
void atg_linalg_matrix_rank_tol_tensor(tensor *, tensor input, tensor tol, int hermitian);
void atg_linalg_multi_dot(tensor *, tensor *tensors_data, int tensors_len);
void atg_linalg_multi_dot_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_linalg_norm(tensor *, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_ord_str(tensor *, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_ord_str_out(tensor *, tensor out, tensor self, char* ord_ptr, int ord_len, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_norm_out(tensor *, tensor out, tensor self, scalar ord, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_linalg_pinv(tensor *, tensor self, double rcond, int hermitian);
void atg_linalg_pinv_atol_rtol_float(tensor *, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_pinv_atol_rtol_float_out(tensor *, tensor out, tensor self, double atol_v, uint8_t atol_null, double rtol_v, uint8_t rtol_null, int hermitian);
void atg_linalg_pinv_atol_rtol_tensor(tensor *, tensor self, tensor atol, tensor rtol, int hermitian);
void atg_linalg_pinv_atol_rtol_tensor_out(tensor *, tensor out, tensor self, tensor atol, tensor rtol, int hermitian);
void atg_linalg_pinv_out(tensor *, tensor out, tensor self, double rcond, int hermitian);
void atg_linalg_pinv_out_rcond_tensor(tensor *, tensor out, tensor self, tensor rcond, int hermitian);
void atg_linalg_pinv_rcond_tensor(tensor *, tensor self, tensor rcond, int hermitian);
void atg_linalg_qr(tensor *, tensor A, char* mode_ptr, int mode_len);
void atg_linalg_qr_out(tensor *, tensor Q, tensor R, tensor A, char* mode_ptr, int mode_len);
void atg_linalg_slogdet(tensor *, tensor A);
void atg_linalg_slogdet_out(tensor *, tensor sign, tensor logabsdet, tensor A);
void atg_linalg_solve(tensor *, tensor A, tensor B, int left);
void atg_linalg_solve_ex(tensor *, tensor A, tensor B, int left, int check_errors);
void atg_linalg_solve_ex_out(tensor *, tensor result, tensor info, tensor A, tensor B, int left, int check_errors);
void atg_linalg_solve_out(tensor *, tensor out, tensor A, tensor B, int left);
void atg_linalg_solve_triangular(tensor *, tensor self, tensor B, int upper, int left, int unitriangular);
void atg_linalg_solve_triangular_out(tensor *, tensor out, tensor self, tensor B, int upper, int left, int unitriangular);
void atg_linalg_svd(tensor *, tensor A, int full_matrices, char* driver_ptr, int driver_len);
void atg_linalg_svd_u(tensor *, tensor U, tensor S, tensor Vh, tensor A, int full_matrices, char* driver_ptr, int driver_len);
void atg_linalg_svdvals(tensor *, tensor A, char* driver_ptr, int driver_len);
void atg_linalg_svdvals_out(tensor *, tensor out, tensor A, char* driver_ptr, int driver_len);
void atg_linalg_tensorinv(tensor *, tensor self, int64_t ind);
void atg_linalg_tensorinv_out(tensor *, tensor out, tensor self, int64_t ind);
void atg_linalg_tensorsolve(tensor *, tensor self, tensor other, int64_t *dims_data, int dims_len);
void atg_linalg_tensorsolve_out(tensor *, tensor out, tensor self, tensor other, int64_t *dims_data, int dims_len);
void atg_linalg_vander(tensor *, tensor x, int64_t n_v, uint8_t n_null);
void atg_linalg_vecdot(tensor *, tensor x, tensor y, int64_t dim);
void atg_linalg_vecdot_out(tensor *, tensor out, tensor x, tensor y, int64_t dim);
void atg_linear(tensor *, tensor input, tensor weight, tensor bias);
void atg_linear_out(tensor *, tensor out, tensor input, tensor weight, tensor bias);
void atg_linspace(tensor *, scalar start, scalar end, int64_t steps, int options_kind, int options_device);
void atg_linspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps);
void atg_linspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, int options_kind, int options_device);
void atg_linspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps);
void atg_linspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, int options_kind, int options_device);
void atg_linspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps);
void atg_linspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, int options_kind, int options_device);
void atg_linspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps);
void atg_log(tensor *, tensor self);
void atg_log10(tensor *, tensor self);
void atg_log10_(tensor *, tensor self);
void atg_log10_out(tensor *, tensor out, tensor self);
void atg_log1p(tensor *, tensor self);
void atg_log1p_(tensor *, tensor self);
void atg_log1p_out(tensor *, tensor out, tensor self);
void atg_log2(tensor *, tensor self);
void atg_log2_(tensor *, tensor self);
void atg_log2_out(tensor *, tensor out, tensor self);
void atg_log_(tensor *, tensor self);
void atg_log_normal(tensor *, tensor self, double mean, double std);
void atg_log_normal_(tensor *, tensor self, double mean, double std);
void atg_log_normal_out(tensor *, tensor out, tensor self, double mean, double std);
void atg_log_out(tensor *, tensor out, tensor self);
void atg_log_sigmoid(tensor *, tensor self);
void atg_log_sigmoid_backward(tensor *, tensor grad_output, tensor self, tensor buffer);
void atg_log_sigmoid_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor buffer);
void atg_log_sigmoid_out(tensor *, tensor out, tensor self);
void atg_log_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_log_softmax_int_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_logaddexp(tensor *, tensor self, tensor other);
void atg_logaddexp2(tensor *, tensor self, tensor other);
void atg_logaddexp2_out(tensor *, tensor out, tensor self, tensor other);
void atg_logaddexp_out(tensor *, tensor out, tensor self, tensor other);
void atg_logcumsumexp(tensor *, tensor self, int64_t dim);
void atg_logcumsumexp_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_logdet(tensor *, tensor self);
void atg_logical_and(tensor *, tensor self, tensor other);
void atg_logical_and_(tensor *, tensor self, tensor other);
void atg_logical_and_out(tensor *, tensor out, tensor self, tensor other);
void atg_logical_not(tensor *, tensor self);
void atg_logical_not_(tensor *, tensor self);
void atg_logical_not_out(tensor *, tensor out, tensor self);
void atg_logical_or(tensor *, tensor self, tensor other);
void atg_logical_or_(tensor *, tensor self, tensor other);
void atg_logical_or_out(tensor *, tensor out, tensor self, tensor other);
void atg_logical_xor(tensor *, tensor self, tensor other);
void atg_logical_xor_(tensor *, tensor self, tensor other);
void atg_logical_xor_out(tensor *, tensor out, tensor self, tensor other);
void atg_logit(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward(tensor *, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, double eps_v, uint8_t eps_null);
void atg_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
void atg_logspace(tensor *, scalar start, scalar end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_out(tensor *, tensor out, scalar start, scalar end, int64_t steps, double base);
void atg_logspace_scalar_tensor(tensor *, scalar start, tensor end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_scalar_tensor_out(tensor *, tensor out, scalar start, tensor end, int64_t steps, double base);
void atg_logspace_tensor_scalar(tensor *, tensor start, scalar end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_tensor_scalar_out(tensor *, tensor out, tensor start, scalar end, int64_t steps, double base);
void atg_logspace_tensor_tensor(tensor *, tensor start, tensor end, int64_t steps, double base, int options_kind, int options_device);
void atg_logspace_tensor_tensor_out(tensor *, tensor out, tensor start, tensor end, int64_t steps, double base);
void atg_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_lstm(tensor *, tensor input, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_lstm_cell(tensor *, tensor input, tensor *hx_data, int hx_len, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_lstm_data(tensor *, tensor data, tensor batch_sizes, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_lstm_mps_backward(tensor out0, tensor *out1_data, int out1_len, tensor *out2_data, int out2_len, tensor grad_y, tensor grad_hy, tensor grad_cy, tensor z_state, tensor cell_state_fwd, tensor input, tensor layersOutputs, tensor *hx_data, int hx_len, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_lt(tensor *, tensor self, scalar other);
void atg_lt_(tensor *, tensor self, scalar other);
void atg_lt_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_lt_tensor(tensor *, tensor self, tensor other);
void atg_lt_tensor_(tensor *, tensor self, tensor other);
void atg_lt_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_lu_solve(tensor *, tensor self, tensor LU_data, tensor LU_pivots);
void atg_lu_solve_out(tensor *, tensor out, tensor self, tensor LU_data, tensor LU_pivots);
void atg_lu_unpack(tensor *, tensor LU_data, tensor LU_pivots, int unpack_data, int unpack_pivots);
void atg_lu_unpack_out(tensor *, tensor P, tensor L, tensor U, tensor LU_data, tensor LU_pivots, int unpack_data, int unpack_pivots);
void atg_margin_ranking_loss(tensor *, tensor input1, tensor input2, tensor target, double margin, int64_t reduction);
void atg_masked_fill(tensor *, tensor self, tensor mask, scalar value);
void atg_masked_fill_(tensor *, tensor self, tensor mask, scalar value);
void atg_masked_fill_scalar_out(tensor *, tensor out, tensor self, tensor mask, scalar value);
void atg_masked_fill_tensor(tensor *, tensor self, tensor mask, tensor value);
void atg_masked_fill_tensor_(tensor *, tensor self, tensor mask, tensor value);
void atg_masked_fill_tensor_out(tensor *, tensor out, tensor self, tensor mask, tensor value);
void atg_masked_scatter(tensor *, tensor self, tensor mask, tensor source);
void atg_masked_scatter_(tensor *, tensor self, tensor mask, tensor source);
void atg_masked_scatter_backward(tensor *, tensor grad_output, tensor mask, int64_t *sizes_data, int sizes_len);
void atg_masked_scatter_out(tensor *, tensor out, tensor self, tensor mask, tensor source);
void atg_masked_select(tensor *, tensor self, tensor mask);
void atg_masked_select_backward(tensor *, tensor grad, tensor input, tensor mask);
void atg_masked_select_out(tensor *, tensor out, tensor self, tensor mask);
void atg_matmul(tensor *, tensor self, tensor other);
void atg_matmul_out(tensor *, tensor out, tensor self, tensor other);
void atg_matrix_exp(tensor *, tensor self);
void atg_matrix_exp_backward(tensor *, tensor self, tensor grad);
void atg_matrix_h(tensor *, tensor self);
void atg_matrix_power(tensor *, tensor self, int64_t n);
void atg_matrix_power_out(tensor *, tensor out, tensor self, int64_t n);
void atg_max(tensor *, tensor self);
void atg_max_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_max_dim_max(tensor *, tensor max, tensor max_values, tensor self, int64_t dim, int keepdim);
void atg_max_other(tensor *, tensor self, tensor other);
void atg_max_out(tensor *, tensor out, tensor self, tensor other);
void atg_max_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool1d_with_indices(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_with_indices(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool2d_with_indices_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool2d_with_indices_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool2d_with_indices_out(tensor *, tensor out, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool3d_with_indices(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_pool3d_with_indices_backward(tensor *, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool3d_with_indices_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode, tensor indices);
void atg_max_pool3d_with_indices_out(tensor *, tensor out, tensor indices, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_max_unary_out(tensor *, tensor out, tensor self);
void atg_max_unpool2d(tensor *, tensor self, tensor indices, int64_t *output_size_data, int output_size_len);
void atg_max_unpool2d_out(tensor *, tensor out, tensor self, tensor indices, int64_t *output_size_data, int output_size_len);
void atg_max_unpool3d(tensor *, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_max_unpool3d_out(tensor *, tensor out, tensor self, tensor indices, int64_t *output_size_data, int output_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_maximum(tensor *, tensor self, tensor other);
void atg_maximum_out(tensor *, tensor out, tensor self, tensor other);
void atg_mean(tensor *, tensor self, int dtype);
void atg_mean_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_mean_dtype_out(tensor *, tensor out, tensor self, int dtype);
void atg_mean_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_median(tensor *, tensor self);
void atg_median_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_median_dim_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_median_out(tensor *, tensor out, tensor self);
tensor *atg_meshgrid(tensor *tensors_data, int tensors_len);
tensor *atg_meshgrid_indexing(tensor *tensors_data, int tensors_len, char* indexing_ptr, int indexing_len);
void atg_mh(tensor *, tensor self);
void atg_min(tensor *, tensor self);
void atg_min_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_min_dim_min(tensor *, tensor min, tensor min_indices, tensor self, int64_t dim, int keepdim);
void atg_min_other(tensor *, tensor self, tensor other);
void atg_min_out(tensor *, tensor out, tensor self, tensor other);
void atg_min_unary_out(tensor *, tensor out, tensor self);
void atg_minimum(tensor *, tensor self, tensor other);
void atg_minimum_out(tensor *, tensor out, tensor self, tensor other);
void atg_miopen_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_miopen_batch_norm_backward(tensor *, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon);
void atg_miopen_batch_norm_backward_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor grad_output, tensor weight, tensor running_mean, tensor running_var, tensor save_mean, tensor save_var, double epsilon);
void atg_miopen_batch_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double exponential_average_factor, double epsilon);
void atg_miopen_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_add_relu(tensor *, tensor self, tensor weight, tensor z, scalar alpha, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_miopen_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_relu(tensor *, tensor self, tensor weight, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_miopen_convolution_transpose(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_convolution_transpose_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_ctc_loss(tensor *, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg_miopen_ctc_loss_out(tensor *, tensor out0, tensor out1, tensor log_probs, tensor targets, int64_t *input_lengths_data, int input_lengths_len, int64_t *target_lengths_data, int target_lengths_len, int64_t blank, int deterministic, int zero_infinity);
void atg_miopen_ctc_loss_tensor(tensor *, tensor log_probs, tensor targets, tensor input_lengths, tensor target_lengths, int64_t blank, int deterministic, int zero_infinity);
void atg_miopen_depthwise_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_depthwise_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int benchmark, int deterministic);
void atg_miopen_rnn(tensor *, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg_miopen_rnn_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor input, tensor *weight_data, int weight_len, int64_t weight_stride0, tensor hx, tensor cx, int64_t mode, int64_t hidden_size, int64_t num_layers, int batch_first, double dropout, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, tensor dropout_state);
void atg_mish(tensor *, tensor self);
void atg_mish_(tensor *, tensor self);
void atg_mish_backward(tensor *, tensor grad_output, tensor self);
void atg_mish_out(tensor *, tensor out, tensor self);
void atg_mkldnn_adaptive_avg_pool2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len);
void atg_mkldnn_adaptive_avg_pool2d_backward(tensor *, tensor grad_output, tensor self);
void atg_mkldnn_adaptive_avg_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor self);
void atg_mkldnn_adaptive_avg_pool2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len);
void atg_mkldnn_convolution(tensor *, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mkldnn_convolution_out(tensor *, tensor out, tensor self, tensor weight, tensor bias, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups);
void atg_mkldnn_linear(tensor *, tensor self, tensor weight, tensor bias);
void atg_mkldnn_linear_backward_input(tensor *, int64_t *input_size_data, int input_size_len, tensor grad_output, tensor weight);
void atg_mkldnn_linear_backward_input_out(tensor *, tensor out, int64_t *input_size_data, int input_size_len, tensor grad_output, tensor weight);
void atg_mkldnn_linear_backward_weights(tensor *, tensor grad_output, tensor input, tensor weight, int bias_defined);
void atg_mkldnn_linear_backward_weights_out(tensor *, tensor out0, tensor out1, tensor grad_output, tensor input, tensor weight, int bias_defined);
void atg_mkldnn_linear_out(tensor *, tensor out, tensor self, tensor weight, tensor bias);
void atg_mkldnn_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool2d_backward(tensor *, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool2d_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d_backward(tensor *, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d_backward_out(tensor *, tensor out, tensor grad_output, tensor output, tensor input, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_max_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_mkldnn_reorder_conv2d_weight(tensor *, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_reorder_conv2d_weight_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_reorder_conv3d_weight(tensor *, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_reorder_conv3d_weight_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len, int64_t *stride_data, int stride_len, int64_t *dilation_data, int dilation_len, int64_t groups, int64_t *input_size_data, int input_size_len);
void atg_mkldnn_rnn_layer(tensor *, tensor input, tensor weight0, tensor weight1, tensor weight2, tensor weight3, tensor hx_, tensor cx_, int reverse, int64_t *batch_sizes_data, int batch_sizes_len, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int bidirectional, int batch_first, int train);
void atg_mkldnn_rnn_layer_backward(tensor *, tensor input, tensor weight1, tensor weight2, tensor weight3, tensor weight4, tensor hx_, tensor cx_tmp, tensor output, tensor hy_, tensor cy_, tensor grad_output, tensor grad_hy, tensor grad_cy, int reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, int batch_first, tensor workspace);
void atg_mkldnn_rnn_layer_backward_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor out4, tensor out5, tensor out6, tensor input, tensor weight1, tensor weight2, tensor weight3, tensor weight4, tensor hx_, tensor cx_tmp, tensor output, tensor hy_, tensor cy_, tensor grad_output, tensor grad_hy, tensor grad_cy, int reverse, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int train, int bidirectional, int64_t *batch_sizes_data, int batch_sizes_len, int batch_first, tensor workspace);
void atg_mkldnn_rnn_layer_out(tensor *, tensor out0, tensor out1, tensor out2, tensor out3, tensor input, tensor weight0, tensor weight1, tensor weight2, tensor weight3, tensor hx_, tensor cx_, int reverse, int64_t *batch_sizes_data, int batch_sizes_len, int64_t mode, int64_t hidden_size, int64_t num_layers, int has_biases, int bidirectional, int batch_first, int train);
void atg_mm(tensor *, tensor self, tensor mat2);
void atg_mm_dtype(tensor *, tensor self, tensor mat2, int out_dtype);
void atg_mm_dtype_out(tensor *, tensor out, tensor self, tensor mat2, int out_dtype);
void atg_mm_out(tensor *, tensor out, tensor self, tensor mat2);
void atg_mode(tensor *, tensor self, int64_t dim, int keepdim);
void atg_mode_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_moveaxis(tensor *, tensor self, int64_t *source_data, int source_len, int64_t *destination_data, int destination_len);
void atg_moveaxis_int(tensor *, tensor self, int64_t source, int64_t destination);
void atg_movedim(tensor *, tensor self, int64_t *source_data, int source_len, int64_t *destination_data, int destination_len);
void atg_movedim_int(tensor *, tensor self, int64_t source, int64_t destination);
void atg_mse_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_mse_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_msort(tensor *, tensor self);
void atg_msort_out(tensor *, tensor out, tensor self);
void atg_mt(tensor *, tensor self);
void atg_mul(tensor *, tensor self, tensor other);
void atg_mul_(tensor *, tensor self, tensor other);
void atg_mul_out(tensor *, tensor out, tensor self, tensor other);
void atg_mul_scalar(tensor *, tensor self, scalar other);
void atg_mul_scalar_(tensor *, tensor self, scalar other);
void atg_mul_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_multi_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, scalar p, scalar margin, tensor weight, int64_t reduction);
void atg_multi_margin_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, scalar p, scalar margin, tensor weight, int64_t reduction);
void atg_multilabel_margin_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_multilabel_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, tensor is_target);
void atg_multilabel_margin_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, tensor is_target);
void atg_multilabel_margin_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_multinomial(tensor *, tensor self, int64_t num_samples, int replacement);
void atg_multinomial_out(tensor *, tensor out, tensor self, int64_t num_samples, int replacement);
void atg_multiply(tensor *, tensor self, tensor other);
void atg_multiply_(tensor *, tensor self, tensor other);
void atg_multiply_out(tensor *, tensor out, tensor self, tensor other);
void atg_multiply_scalar(tensor *, tensor self, scalar other);
void atg_multiply_scalar_(tensor *, tensor self, scalar other);
void atg_mv(tensor *, tensor self, tensor vec);
void atg_mv_out(tensor *, tensor out, tensor self, tensor vec);
void atg_mvlgamma(tensor *, tensor self, int64_t p);
void atg_mvlgamma_(tensor *, tensor self, int64_t p);
void atg_mvlgamma_out(tensor *, tensor out, tensor self, int64_t p);
void atg_nan_to_num(tensor *, tensor self, double nan_v, uint8_t nan_null, double posinf_v, uint8_t posinf_null, double neginf_v, uint8_t neginf_null);
void atg_nan_to_num_(tensor *, tensor self, double nan_v, uint8_t nan_null, double posinf_v, uint8_t posinf_null, double neginf_v, uint8_t neginf_null);
void atg_nan_to_num_out(tensor *, tensor out, tensor self, double nan_v, uint8_t nan_null, double posinf_v, uint8_t posinf_null, double neginf_v, uint8_t neginf_null);
void atg_nanmean(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nanmean_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nanmedian(tensor *, tensor self);
void atg_nanmedian_dim(tensor *, tensor self, int64_t dim, int keepdim);
void atg_nanmedian_dim_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int keepdim);
void atg_nanmedian_out(tensor *, tensor out, tensor self);
void atg_nanquantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nanquantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_nansum(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_nansum_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_narrow(tensor *, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow_copy(tensor *, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow_copy_out(tensor *, tensor out, tensor self, int64_t dim, int64_t start, int64_t length);
void atg_narrow_tensor(tensor *, tensor self, int64_t dim, tensor start, int64_t length);
void atg_native_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_batch_norm_out(tensor *, tensor out, tensor save_mean, tensor save_invstd, tensor input, tensor weight, tensor bias, tensor running_mean, tensor running_var, int training, double momentum, double eps);
void atg_native_channel_shuffle(tensor *, tensor self, int64_t groups);
void atg_native_dropout(tensor *, tensor input, double p, int train);
void atg_native_dropout_backward(tensor *, tensor grad_output, tensor mask, double scale);
void atg_native_dropout_backward_out(tensor *, tensor out, tensor grad_output, tensor mask, double scale);
void atg_native_dropout_out(tensor *, tensor out0, tensor out1, tensor input, double p, int train);
void atg_native_group_norm(tensor *, tensor input, tensor weight, tensor bias, int64_t n, int64_t C, int64_t HxW, int64_t group, double eps);
void atg_native_group_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, tensor weight, tensor bias, int64_t n, int64_t C, int64_t HxW, int64_t group, double eps);
void atg_native_layer_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps);
void atg_native_layer_norm_out(tensor *, tensor out0, tensor out1, tensor out2, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, tensor bias, double eps);
void atg_native_norm(tensor *, tensor self);
void atg_native_norm_out(tensor *, tensor out, tensor self);
void atg_native_norm_scalaropt_dim_dtype(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_native_norm_scalaropt_dim_dtype_out(tensor *, tensor out, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_ne(tensor *, tensor self, scalar other);
void atg_ne_(tensor *, tensor self, scalar other);
void atg_ne_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_ne_tensor(tensor *, tensor self, tensor other);
void atg_ne_tensor_(tensor *, tensor self, tensor other);
void atg_ne_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_neg(tensor *, tensor self);
void atg_neg_(tensor *, tensor self);
void atg_neg_out(tensor *, tensor out, tensor self);
void atg_negative(tensor *, tensor self);
void atg_negative_(tensor *, tensor self);
void atg_negative_out(tensor *, tensor out, tensor self);
void atg_nested_to_padded_tensor(tensor *, tensor self, double padding, int64_t *output_size_data, int output_size_len);
void atg_new_empty(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_empty_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_new_empty_strided(tensor *, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len, int options_kind, int options_device);
void atg_new_empty_strided_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg_new_full(tensor *, tensor self, int64_t *size_data, int size_len, scalar fill_value, int options_kind, int options_device);
void atg_new_full_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, scalar fill_value);
void atg_new_ones(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_ones_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_new_zeros(tensor *, tensor self, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_new_zeros_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_nextafter(tensor *, tensor self, tensor other);
void atg_nextafter_(tensor *, tensor self, tensor other);
void atg_nextafter_out(tensor *, tensor out, tensor self, tensor other);
void atg_nll_loss(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss2d(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss2d_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss2d_out(tensor *, tensor out, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index, tensor total_weight);
void atg_nll_loss_nd(tensor *, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nll_loss_out(tensor *, tensor out, tensor self, tensor target, tensor weight, int64_t reduction, int64_t ignore_index);
void atg_nonzero(tensor *, tensor self);
tensor *atg_nonzero_numpy(tensor self);
void atg_nonzero_out(tensor *, tensor out, tensor self);
void atg_nonzero_static(tensor *, tensor self, int64_t size, int64_t fill_value);
void atg_nonzero_static_out(tensor *, tensor out, tensor self, int64_t size, int64_t fill_value);
void atg_norm(tensor *, tensor self);
void atg_norm_dtype_out(tensor *, tensor out, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_norm_except_dim(tensor *, tensor v, int64_t pow, int64_t dim);
void atg_norm_out(tensor *, tensor out, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim);
void atg_norm_scalar_out(tensor *, tensor out, tensor self);
void atg_norm_scalaropt_dim(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim);
void atg_norm_scalaropt_dim_dtype(tensor *, tensor self, scalar p, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_norm_scalaropt_dtype(tensor *, tensor self, scalar p, int dtype);
void atg_norm_scalaropt_dtype_out(tensor *, tensor out, tensor self, scalar p, int dtype);
void atg_normal_(tensor *, tensor self, double mean, double std);
void atg_normal_functional(tensor *, tensor self, double mean, double std);
void atg_not_equal(tensor *, tensor self, scalar other);
void atg_not_equal_(tensor *, tensor self, scalar other);
void atg_not_equal_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_not_equal_tensor(tensor *, tensor self, tensor other);
void atg_not_equal_tensor_(tensor *, tensor self, tensor other);
void atg_not_equal_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_nuclear_norm(tensor *, tensor self, int keepdim);
void atg_nuclear_norm_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_nuclear_norm_dim_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_nuclear_norm_out(tensor *, tensor out, tensor self, int keepdim);
void atg_numpy_t(tensor *, tensor self);
void atg_one_hot(tensor *, tensor self, int64_t num_classes);
void atg_ones(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_ones_like(tensor *, tensor self);
void atg_ones_like_out(tensor *, tensor out, tensor self);
void atg_ones_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_orgqr(tensor *, tensor self, tensor input2);
void atg_orgqr_out(tensor *, tensor out, tensor self, tensor input2);
void atg_ormqr(tensor *, tensor self, tensor input2, tensor input3, int left, int transpose);
void atg_ormqr_out(tensor *, tensor out, tensor self, tensor input2, tensor input3, int left, int transpose);
void atg_outer(tensor *, tensor self, tensor vec2);
void atg_outer_out(tensor *, tensor out, tensor self, tensor vec2);
int64_t atg_output_nr(tensor self);
void atg_pad(tensor *, tensor self, int64_t *pad_data, int pad_len, char* mode_ptr, int mode_len, double value_v, uint8_t value_null);
void atg_pad_sequence(tensor *, tensor *sequences_data, int sequences_len, int batch_first, double padding_value, char* padding_side_ptr, int padding_side_len);
void atg_pairwise_distance(tensor *, tensor x1, tensor x2, double p, double eps, int keepdim);
void atg_pdist(tensor *, tensor self, double p);
void atg_permute(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_permute_copy(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_permute_copy_out(tensor *, tensor out, tensor self, int64_t *dims_data, int dims_len);
void atg_pin_memory(tensor *, tensor self, int device);
void atg_pinverse(tensor *, tensor self, double rcond);
void atg_pixel_shuffle(tensor *, tensor self, int64_t upscale_factor);
void atg_pixel_shuffle_out(tensor *, tensor out, tensor self, int64_t upscale_factor);
void atg_pixel_unshuffle(tensor *, tensor self, int64_t downscale_factor);
void atg_pixel_unshuffle_out(tensor *, tensor out, tensor self, int64_t downscale_factor);
void atg_poisson(tensor *, tensor self);
void atg_poisson_nll_loss(tensor *, tensor input, tensor target, int log_input, int full, double eps, int64_t reduction);
void atg_poisson_out(tensor *, tensor out, tensor self);
void atg_polar(tensor *, tensor abs, tensor angle);
void atg_polar_out(tensor *, tensor out, tensor abs, tensor angle);
void atg_polygamma(tensor *, int64_t n, tensor self);
void atg_polygamma_(tensor *, tensor self, int64_t n);
void atg_polygamma_out(tensor *, tensor out, int64_t n, tensor self);
void atg_positive(tensor *, tensor self);
void atg_pow(tensor *, tensor self, tensor exponent);
void atg_pow_(tensor *, tensor self, scalar exponent);
void atg_pow_scalar(tensor *, scalar self_scalar, tensor exponent);
void atg_pow_scalar_out(tensor *, tensor out, scalar self_scalar, tensor exponent);
void atg_pow_tensor_(tensor *, tensor self, tensor exponent);
void atg_pow_tensor_scalar(tensor *, tensor self, scalar exponent);
void atg_pow_tensor_scalar_out(tensor *, tensor out, tensor self, scalar exponent);
void atg_pow_tensor_tensor_out(tensor *, tensor out, tensor self, tensor exponent);
void atg_prelu(tensor *, tensor self, tensor weight);
void atg_prod(tensor *, tensor self, int dtype);
void atg_prod_dim_int(tensor *, tensor self, int64_t dim, int keepdim, int dtype);
void atg_prod_int_out(tensor *, tensor out, tensor self, int64_t dim, int keepdim, int dtype);
void atg_prod_out(tensor *, tensor out, tensor self, int dtype);
void atg_put(tensor *, tensor self, tensor index, tensor source, int accumulate);
void atg_put_(tensor *, tensor self, tensor index, tensor source, int accumulate);
void atg_put_out(tensor *, tensor out, tensor self, tensor index, tensor source, int accumulate);
int64_t atg_q_per_channel_axis(tensor self);
void atg_q_per_channel_scales(tensor *, tensor self);
void atg_q_per_channel_scales_out(tensor *, tensor out, tensor self);
void atg_q_per_channel_zero_points(tensor *, tensor self);
void atg_q_per_channel_zero_points_out(tensor *, tensor out, tensor self);
double atg_q_scale(tensor self);
int64_t atg_q_zero_point(tensor self);
void atg_qr(tensor *, tensor self, int some);
void atg_qr_q(tensor *, tensor Q, tensor R, tensor self, int some);
void atg_quantile(tensor *, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_out(tensor *, tensor out, tensor self, tensor q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_scalar(tensor *, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantile_scalar_out(tensor *, tensor out, tensor self, double q, int64_t dim_v, uint8_t dim_null, int keepdim, char* interpolation_ptr, int interpolation_len);
void atg_quantize_per_channel(tensor *, tensor self, tensor scales, tensor zero_points, int64_t axis, int dtype);
void atg_quantize_per_channel_out(tensor *, tensor out, tensor self, tensor scales, tensor zero_points, int64_t axis, int dtype);
void atg_quantize_per_tensor(tensor *, tensor self, double scale, int64_t zero_point, int dtype);
void atg_quantize_per_tensor_dynamic(tensor *, tensor self, int dtype, int reduce_range);
void atg_quantize_per_tensor_dynamic_out(tensor *, tensor out, tensor self, int dtype, int reduce_range);
void atg_quantize_per_tensor_out(tensor *, tensor out, tensor self, double scale, int64_t zero_point, int dtype);
void atg_quantize_per_tensor_tensor_qparams(tensor *, tensor self, tensor scale, tensor zero_point, int dtype);
void atg_quantize_per_tensor_tensor_qparams_out(tensor *, tensor out, tensor self, tensor scale, tensor zero_point, int dtype);
tensor *atg_quantize_per_tensor_tensors(tensor *tensors_data, int tensors_len, tensor scales, tensor zero_points, int dtype);
void atg_quantize_per_tensor_tensors_out(tensor *out_data, int out_len, tensor *tensors_data, int tensors_len, tensor scales, tensor zero_points, int dtype);
void atg_quantized_batch_norm(tensor *, tensor input, tensor weight, tensor bias, tensor mean, tensor var, double eps, double output_scale, int64_t output_zero_point);
void atg_quantized_batch_norm_out(tensor *, tensor out, tensor input, tensor weight, tensor bias, tensor mean, tensor var, double eps, double output_scale, int64_t output_zero_point);
void atg_quantized_gru_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_lstm_cell(tensor *, tensor input, tensor *hx_data, int hx_len, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_max_pool1d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool1d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool2d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool2d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool3d(tensor *, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_max_pool3d_out(tensor *, tensor out, tensor self, int64_t *kernel_size_data, int kernel_size_len, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len, int ceil_mode);
void atg_quantized_rnn_relu_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_quantized_rnn_tanh_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh, tensor packed_ih, tensor packed_hh, tensor col_offsets_ih, tensor col_offsets_hh, scalar scale_ih, scalar scale_hh, scalar zero_point_ih, scalar zero_point_hh);
void atg_rad2deg(tensor *, tensor self);
void atg_rad2deg_(tensor *, tensor self);
void atg_rad2deg_out(tensor *, tensor out, tensor self);
void atg_rand(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_rand_like(tensor *, tensor self);
void atg_rand_like_out(tensor *, tensor out, tensor self);
void atg_rand_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_randint(tensor *, int64_t high, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_randint_like(tensor *, tensor self, int64_t high);
void atg_randint_like_low_dtype(tensor *, tensor self, int64_t low, int64_t high);
void atg_randint_like_low_dtype_out(tensor *, tensor out, tensor self, int64_t low, int64_t high);
void atg_randint_like_out(tensor *, tensor out, tensor self, int64_t high);
void atg_randint_like_tensor(tensor *, tensor self, tensor high);
void atg_randint_like_tensor_out(tensor *, tensor out, tensor self, tensor high);
void atg_randint_low(tensor *, int64_t low, int64_t high, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_randint_low_out(tensor *, tensor out, int64_t low, int64_t high, int64_t *size_data, int size_len);
void atg_randint_out(tensor *, tensor out, int64_t high, int64_t *size_data, int size_len);
void atg_randn(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_randn_like(tensor *, tensor self);
void atg_randn_like_out(tensor *, tensor out, tensor self);
void atg_randn_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_random(tensor *, tensor self);
void atg_random_(tensor *, tensor self);
void atg_random_from(tensor *, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_random_from_(tensor *, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_random_from_out(tensor *, tensor out, tensor self, int64_t from, int64_t to_v, uint8_t to_null);
void atg_random_out(tensor *, tensor out, tensor self);
void atg_random_to(tensor *, tensor self, int64_t to);
void atg_random_to_(tensor *, tensor self, int64_t to);
void atg_random_to_out(tensor *, tensor out, tensor self, int64_t to);
void atg_randperm(tensor *, int64_t n, int options_kind, int options_device);
void atg_randperm_out(tensor *, tensor out, int64_t n);
void atg_range(tensor *, scalar start, scalar end, int options_kind, int options_device);
void atg_range_out(tensor *, tensor out, scalar start, scalar end);
void atg_range_out_(tensor *, tensor out, scalar start, scalar end);
void atg_range_step(tensor *, scalar start, scalar end, int options_kind, int options_device);
void atg_ravel(tensor *, tensor self);
void atg_real(tensor *, tensor self);
void atg_reciprocal(tensor *, tensor self);
void atg_reciprocal_(tensor *, tensor self);
void atg_reciprocal_out(tensor *, tensor out, tensor self);
void atg_reflection_pad1d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad1d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad1d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad2d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_reflection_pad3d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_relu(tensor *, tensor self);
void atg_relu6(tensor *, tensor self);
void atg_relu6_(tensor *, tensor self);
void atg_relu_(tensor *, tensor self);
void atg_relu_out(tensor *, tensor out, tensor self);
void atg_remainder(tensor *, tensor self, scalar other);
void atg_remainder_(tensor *, tensor self, scalar other);
void atg_remainder_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_remainder_scalar_tensor(tensor *, scalar self_scalar, tensor other);
void atg_remainder_scalar_tensor_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_remainder_tensor(tensor *, tensor self, tensor other);
void atg_remainder_tensor_(tensor *, tensor self, tensor other);
void atg_remainder_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_renorm(tensor *, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_renorm_(tensor *, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_renorm_out(tensor *, tensor out, tensor self, scalar p, int64_t dim, scalar maxnorm);
void atg_repeat(tensor *, tensor self, int64_t *repeats_data, int repeats_len);
void atg_repeat_interleave(tensor *, tensor repeats, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_interleave_self_int(tensor *, tensor self, int64_t repeats, int64_t dim_v, uint8_t dim_null, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_interleave_self_tensor(tensor *, tensor self, tensor repeats, int64_t dim_v, uint8_t dim_null, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_interleave_tensor_out(tensor *, tensor out, tensor repeats, int64_t output_size_v, uint8_t output_size_null);
void atg_repeat_out(tensor *, tensor out, tensor self, int64_t *repeats_data, int repeats_len);
void atg_replication_pad1d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad1d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad2d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d(tensor *, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_backward(tensor *, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, int64_t *padding_data, int padding_len);
void atg_replication_pad3d_out(tensor *, tensor out, tensor self, int64_t *padding_data, int padding_len);
void atg_requires_grad_(tensor *, tensor self, int requires_grad);
void atg_reshape(tensor *, tensor self, int64_t *shape_data, int shape_len);
void atg_reshape_as(tensor *, tensor self, tensor other);
void atg_resize(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_resize_(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_resize_as(tensor *, tensor self, tensor the_template);
void atg_resize_as_(tensor *, tensor self, tensor the_template);
void atg_resize_as_out(tensor *, tensor out, tensor self, tensor the_template);
void atg_resize_as_sparse(tensor *, tensor self, tensor the_template);
void atg_resize_as_sparse_(tensor *, tensor self, tensor the_template);
void atg_resize_as_sparse_out(tensor *, tensor out, tensor self, tensor the_template);
void atg_resize_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_resolve_conj(tensor *, tensor self);
void atg_resolve_neg(tensor *, tensor self);
int atg_retains_grad(tensor self);
void atg_rms_norm(tensor *, tensor input, int64_t *normalized_shape_data, int normalized_shape_len, tensor weight, double eps_v, uint8_t eps_null);
void atg_rnn_relu(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_rnn_relu_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_rnn_relu_data(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_rnn_tanh(tensor *, tensor input, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional, int batch_first);
void atg_rnn_tanh_cell(tensor *, tensor input, tensor hx, tensor w_ih, tensor w_hh, tensor b_ih, tensor b_hh);
void atg_rnn_tanh_data(tensor *, tensor data, tensor batch_sizes, tensor hx, tensor *params_data, int params_len, int has_biases, int64_t num_layers, double dropout, int train, int bidirectional);
void atg_roll(tensor *, tensor self, int64_t *shifts_data, int shifts_len, int64_t *dims_data, int dims_len);
void atg_roll_out(tensor *, tensor out, tensor self, int64_t *shifts_data, int shifts_len, int64_t *dims_data, int dims_len);
void atg_rot90(tensor *, tensor self, int64_t k, int64_t *dims_data, int dims_len);
void atg_rot90_out(tensor *, tensor out, tensor self, int64_t k, int64_t *dims_data, int dims_len);
void atg_round(tensor *, tensor self);
void atg_round_(tensor *, tensor self);
void atg_round_decimals(tensor *, tensor self, int64_t decimals);
void atg_round_decimals_(tensor *, tensor self, int64_t decimals);
void atg_round_decimals_out(tensor *, tensor out, tensor self, int64_t decimals);
void atg_round_out(tensor *, tensor out, tensor self);
void atg_row_indices(tensor *, tensor self);
void atg_row_indices_copy(tensor *, tensor self);
void atg_row_indices_copy_out(tensor *, tensor out, tensor self);
void atg_row_stack(tensor *, tensor *tensors_data, int tensors_len);
void atg_row_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
void atg_rrelu(tensor *, tensor self, int training);
void atg_rrelu_(tensor *, tensor self, int training);
void atg_rrelu_with_noise(tensor *, tensor self, tensor noise, int training);
void atg_rrelu_with_noise_(tensor *, tensor self, tensor noise, int training);
void atg_rrelu_with_noise_backward(tensor *, tensor grad_output, tensor self, tensor noise, scalar lower, scalar upper, int training, int self_is_result);
void atg_rrelu_with_noise_backward_out(tensor *, tensor out, tensor grad_output, tensor self, tensor noise, scalar lower, scalar upper, int training, int self_is_result);
void atg_rrelu_with_noise_functional(tensor *, tensor self, tensor noise, int training);
void atg_rrelu_with_noise_out(tensor *, tensor out, tensor self, tensor noise, int training);
void atg_rsqrt(tensor *, tensor self);
void atg_rsqrt_(tensor *, tensor self);
void atg_rsqrt_out(tensor *, tensor out, tensor self);
void atg_rsub(tensor *, tensor self, tensor other);
void atg_rsub_scalar(tensor *, tensor self, scalar other);
void atg_rsub_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_rsub_tensor_out(tensor *, tensor out, tensor self, tensor other);
void atg_scalar_tensor(tensor *, scalar s, int options_kind, int options_device);
void atg_scalar_tensor_out(tensor *, tensor out, scalar s);
void atg_scaled_dot_product_attention(tensor *, tensor query, tensor key, tensor value, tensor attn_mask, double dropout_p, int is_causal, double scale_v, uint8_t scale_null, int enable_gqa);
void atg_scatter(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add_(tensor *, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_add_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_reduce(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_reduce_(tensor *, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src, char* reduce_ptr, int reduce_len);
void atg_scatter_src_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, tensor src);
void atg_scatter_value(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_value_(tensor *, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_value_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value);
void atg_scatter_value_reduce(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_value_reduce_(tensor *, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_scatter_value_reduce_out(tensor *, tensor out, tensor self, int64_t dim, tensor index, scalar value, char* reduce_ptr, int reduce_len);
void atg_searchsorted(tensor *, tensor sorted_sequence, tensor self, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_scalar(tensor *, tensor sorted_sequence, scalar self_scalar, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_scalar_out(tensor *, tensor out, tensor sorted_sequence, scalar self_scalar, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_searchsorted_tensor_out(tensor *, tensor out, tensor sorted_sequence, tensor self, int out_int32, int right, char* side_ptr, int side_len, tensor sorter);
void atg_segment_reduce(tensor *, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor indices, tensor offsets, int64_t axis, int unsafe, scalar initial);
void atg_segment_reduce_out(tensor *, tensor out, tensor data, char* reduce_ptr, int reduce_len, tensor lengths, tensor indices, tensor offsets, int64_t axis, int unsafe, scalar initial);
void atg_select(tensor *, tensor self, int64_t dim, int64_t index);
void atg_select_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t index);
void atg_select_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t index);
void atg_select_copy(tensor *, tensor self, int64_t dim, int64_t index);
void atg_select_copy_int_out(tensor *, tensor out, tensor self, int64_t dim, int64_t index);
void atg_select_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t index);
void atg_select_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t dim, int64_t index);
void atg_selu(tensor *, tensor self);
void atg_selu_(tensor *, tensor self);
void atg_set(tensor *, tensor self);
void atg_set_(tensor *, tensor self);
void atg_set_data(tensor self, tensor new_data);
void atg_set_out(tensor *, tensor out, tensor self);
void atg_set_requires_grad(tensor *, tensor self, int r);
void atg_set_source_tensor(tensor *, tensor self, tensor source);
void atg_set_source_tensor_(tensor *, tensor self, tensor source);
void atg_set_source_tensor_out(tensor *, tensor out, tensor self, tensor source);
void atg_set_source_tensor_storage_offset_(tensor *, tensor self, tensor source, int64_t storage_offset, int64_t *size_data, int size_len, int64_t *stride_data, int stride_len);
void atg_sgn(tensor *, tensor self);
void atg_sgn_(tensor *, tensor self);
void atg_sgn_out(tensor *, tensor out, tensor self);
void atg_sigmoid(tensor *, tensor self);
void atg_sigmoid_(tensor *, tensor self);
void atg_sigmoid_backward(tensor *, tensor grad_output, tensor output);
void atg_sigmoid_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor output);
void atg_sigmoid_out(tensor *, tensor out, tensor self);
void atg_sign(tensor *, tensor self);
void atg_sign_(tensor *, tensor self);
void atg_sign_out(tensor *, tensor out, tensor self);
void atg_signbit(tensor *, tensor self);
void atg_signbit_out(tensor *, tensor out, tensor self);
void atg_silu(tensor *, tensor self);
void atg_silu_(tensor *, tensor self);
void atg_silu_backward(tensor *, tensor grad_output, tensor self);
void atg_silu_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self);
void atg_silu_out(tensor *, tensor out, tensor self);
void atg_sin(tensor *, tensor self);
void atg_sin_(tensor *, tensor self);
void atg_sin_out(tensor *, tensor out, tensor self);
void atg_sinc(tensor *, tensor self);
void atg_sinc_(tensor *, tensor self);
void atg_sinc_out(tensor *, tensor out, tensor self);
void atg_sinh(tensor *, tensor self);
void atg_sinh_(tensor *, tensor self);
void atg_sinh_out(tensor *, tensor out, tensor self);
void atg_slice(tensor *, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_backward(tensor *, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slice_backward_out(tensor *, tensor out, tensor grad_output, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t start, int64_t end, int64_t step);
void atg_slice_copy(tensor *, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_copy_tensor_out(tensor *, tensor out, tensor self, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_inverse(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_scatter(tensor *, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slice_scatter_out(tensor *, tensor out, tensor self, tensor src, int64_t dim, int64_t start_v, uint8_t start_null, int64_t end_v, uint8_t end_null, int64_t step);
void atg_slogdet(tensor *, tensor self);
void atg_slogdet_out(tensor *, tensor sign, tensor logabsdet, tensor self);
void atg_slow_conv3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_slow_conv3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len);
void atg_slow_conv_dilated2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_dilated2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_dilated3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_dilated3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose2d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose2d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose3d(tensor *, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_slow_conv_transpose3d_out(tensor *, tensor out, tensor self, tensor weight, int64_t *kernel_size_data, int kernel_size_len, tensor bias, int64_t *stride_data, int stride_len, int64_t *padding_data, int padding_len, int64_t *output_padding_data, int output_padding_len, int64_t *dilation_data, int dilation_len);
void atg_smm(tensor *, tensor self, tensor mat2);
void atg_smooth_l1_loss(tensor *, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction, double beta);
void atg_smooth_l1_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction, double beta);
void atg_soft_margin_loss(tensor *, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_backward(tensor *, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, tensor target, int64_t reduction);
void atg_soft_margin_loss_out(tensor *, tensor out, tensor self, tensor target, int64_t reduction);
void atg_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_softmax_int_out(tensor *, tensor out, tensor self, int64_t dim, int dtype);
void atg_softplus(tensor *, tensor self);
void atg_softplus_backward(tensor *, tensor grad_output, tensor self, scalar beta, scalar threshold);
void atg_softplus_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar beta, scalar threshold);
void atg_softplus_out(tensor *, tensor out, tensor self);
void atg_softshrink(tensor *, tensor self);
void atg_softshrink_backward(tensor *, tensor grad_output, tensor self, scalar lambd);
void atg_softshrink_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar lambd);
void atg_softshrink_out(tensor *, tensor out, tensor self);
void atg_sort(tensor *, tensor self, int64_t dim, int descending);
void atg_sort_stable(tensor *, tensor self, int stable, int64_t dim, int descending);
void atg_sort_values(tensor *, tensor values, tensor indices, tensor self, int64_t dim, int descending);
void atg_sort_values_stable(tensor *, tensor values, tensor indices, tensor self, int stable, int64_t dim, int descending);
void atg_sparse_bsc_tensor(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int options_kind, int options_device);
void atg_sparse_bsc_tensor_ccol_row_value_size(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_bsr_tensor(tensor *, tensor crow_indices, tensor col_indices, tensor values, int options_kind, int options_device);
void atg_sparse_bsr_tensor_crow_col_value_size(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_compressed_tensor(tensor *, tensor compressed_indices, tensor plain_indices, tensor values, int options_kind, int options_device);
void atg_sparse_compressed_tensor_comp_plain_value_size(tensor *, tensor compressed_indices, tensor plain_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_coo_tensor(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_coo_tensor_indices(tensor *, tensor indices, tensor values, int options_kind, int options_device, int is_coalesced);
void atg_sparse_coo_tensor_indices_size(tensor *, tensor indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device, int is_coalesced);
void atg_sparse_coo_tensor_size_out(tensor *, tensor out, int64_t *size_data, int size_len);
void atg_sparse_csc_tensor(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int options_kind, int options_device);
void atg_sparse_csc_tensor_ccol_row_value_size(tensor *, tensor ccol_indices, tensor row_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_sparse_csr_tensor(tensor *, tensor crow_indices, tensor col_indices, tensor values, int options_kind, int options_device);
void atg_sparse_csr_tensor_crow_col_value_size(tensor *, tensor crow_indices, tensor col_indices, tensor values, int64_t *size_data, int size_len, int options_kind, int options_device);
int64_t atg_sparse_dim(tensor self);
void atg_sparse_mask(tensor *, tensor self, tensor mask);
void atg_sparse_mask_out(tensor *, tensor out, tensor self, tensor mask);
void atg_sparse_resize(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear_(tensor *, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_and_clear_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_resize_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len, int64_t sparse_dim, int64_t dense_dim);
void atg_sparse_sampled_addmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_sparse_sampled_addmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_special_airy_ai(tensor *, tensor x);
void atg_special_airy_ai_out(tensor *, tensor out, tensor x);
void atg_special_bessel_j0(tensor *, tensor self);
void atg_special_bessel_j0_out(tensor *, tensor out, tensor self);
void atg_special_bessel_j1(tensor *, tensor self);
void atg_special_bessel_j1_out(tensor *, tensor out, tensor self);
void atg_special_bessel_y0(tensor *, tensor self);
void atg_special_bessel_y0_out(tensor *, tensor out, tensor self);
void atg_special_bessel_y1(tensor *, tensor self);
void atg_special_bessel_y1_out(tensor *, tensor out, tensor self);
void atg_special_chebyshev_polynomial_t(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_t_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_t_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_t_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_t_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_t_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_chebyshev_polynomial_u(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_u_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_u_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_u_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_u_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_u_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_chebyshev_polynomial_v(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_v_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_v_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_v_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_v_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_v_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_chebyshev_polynomial_w(tensor *, tensor x, tensor n);
void atg_special_chebyshev_polynomial_w_n_scalar(tensor *, tensor x, scalar n);
void atg_special_chebyshev_polynomial_w_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_chebyshev_polynomial_w_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_chebyshev_polynomial_w_x_scalar(tensor *, scalar x, tensor n);
void atg_special_chebyshev_polynomial_w_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_digamma(tensor *, tensor self);
void atg_special_digamma_out(tensor *, tensor out, tensor self);
void atg_special_entr(tensor *, tensor self);
void atg_special_entr_out(tensor *, tensor out, tensor self);
void atg_special_erf(tensor *, tensor self);
void atg_special_erf_out(tensor *, tensor out, tensor self);
void atg_special_erfc(tensor *, tensor self);
void atg_special_erfc_out(tensor *, tensor out, tensor self);
void atg_special_erfcx(tensor *, tensor self);
void atg_special_erfcx_out(tensor *, tensor out, tensor self);
void atg_special_erfinv(tensor *, tensor self);
void atg_special_erfinv_out(tensor *, tensor out, tensor self);
void atg_special_exp2(tensor *, tensor self);
void atg_special_exp2_out(tensor *, tensor out, tensor self);
void atg_special_expit(tensor *, tensor self);
void atg_special_expit_out(tensor *, tensor out, tensor self);
void atg_special_expm1(tensor *, tensor self);
void atg_special_expm1_out(tensor *, tensor out, tensor self);
void atg_special_gammainc(tensor *, tensor self, tensor other);
void atg_special_gammainc_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_gammaincc(tensor *, tensor self, tensor other);
void atg_special_gammaincc_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_gammaln(tensor *, tensor self);
void atg_special_gammaln_out(tensor *, tensor out, tensor self);
void atg_special_hermite_polynomial_h(tensor *, tensor x, tensor n);
void atg_special_hermite_polynomial_h_n_scalar(tensor *, tensor x, scalar n);
void atg_special_hermite_polynomial_h_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_hermite_polynomial_h_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_hermite_polynomial_h_x_scalar(tensor *, scalar x, tensor n);
void atg_special_hermite_polynomial_h_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_hermite_polynomial_he(tensor *, tensor x, tensor n);
void atg_special_hermite_polynomial_he_n_scalar(tensor *, tensor x, scalar n);
void atg_special_hermite_polynomial_he_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_hermite_polynomial_he_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_hermite_polynomial_he_x_scalar(tensor *, scalar x, tensor n);
void atg_special_hermite_polynomial_he_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_i0(tensor *, tensor self);
void atg_special_i0_out(tensor *, tensor out, tensor self);
void atg_special_i0e(tensor *, tensor self);
void atg_special_i0e_out(tensor *, tensor out, tensor self);
void atg_special_i1(tensor *, tensor self);
void atg_special_i1_out(tensor *, tensor out, tensor self);
void atg_special_i1e(tensor *, tensor self);
void atg_special_i1e_out(tensor *, tensor out, tensor self);
void atg_special_laguerre_polynomial_l(tensor *, tensor x, tensor n);
void atg_special_laguerre_polynomial_l_n_scalar(tensor *, tensor x, scalar n);
void atg_special_laguerre_polynomial_l_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_laguerre_polynomial_l_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_laguerre_polynomial_l_x_scalar(tensor *, scalar x, tensor n);
void atg_special_laguerre_polynomial_l_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_legendre_polynomial_p(tensor *, tensor x, tensor n);
void atg_special_legendre_polynomial_p_n_scalar(tensor *, tensor x, scalar n);
void atg_special_legendre_polynomial_p_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_legendre_polynomial_p_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_legendre_polynomial_p_x_scalar(tensor *, scalar x, tensor n);
void atg_special_legendre_polynomial_p_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_log1p(tensor *, tensor self);
void atg_special_log1p_out(tensor *, tensor out, tensor self);
void atg_special_log_ndtr(tensor *, tensor self);
void atg_special_log_ndtr_out(tensor *, tensor out, tensor self);
void atg_special_log_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_special_logit(tensor *, tensor self, double eps_v, uint8_t eps_null);
void atg_special_logit_out(tensor *, tensor out, tensor self, double eps_v, uint8_t eps_null);
void atg_special_logsumexp(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_special_logsumexp_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim);
void atg_special_modified_bessel_i0(tensor *, tensor self);
void atg_special_modified_bessel_i0_out(tensor *, tensor out, tensor self);
void atg_special_modified_bessel_i1(tensor *, tensor self);
void atg_special_modified_bessel_i1_out(tensor *, tensor out, tensor self);
void atg_special_modified_bessel_k0(tensor *, tensor self);
void atg_special_modified_bessel_k0_out(tensor *, tensor out, tensor self);
void atg_special_modified_bessel_k1(tensor *, tensor self);
void atg_special_modified_bessel_k1_out(tensor *, tensor out, tensor self);
void atg_special_multigammaln(tensor *, tensor self, int64_t p);
void atg_special_multigammaln_out(tensor *, tensor out, tensor self, int64_t p);
void atg_special_ndtr(tensor *, tensor self);
void atg_special_ndtr_out(tensor *, tensor out, tensor self);
void atg_special_ndtri(tensor *, tensor self);
void atg_special_ndtri_out(tensor *, tensor out, tensor self);
void atg_special_polygamma(tensor *, int64_t n, tensor self);
void atg_special_polygamma_out(tensor *, tensor out, int64_t n, tensor self);
void atg_special_psi(tensor *, tensor self);
void atg_special_psi_out(tensor *, tensor out, tensor self);
void atg_special_round(tensor *, tensor self, int64_t decimals);
void atg_special_round_out(tensor *, tensor out, tensor self, int64_t decimals);
void atg_special_scaled_modified_bessel_k0(tensor *, tensor x);
void atg_special_scaled_modified_bessel_k0_out(tensor *, tensor out, tensor x);
void atg_special_scaled_modified_bessel_k1(tensor *, tensor x);
void atg_special_scaled_modified_bessel_k1_out(tensor *, tensor out, tensor x);
void atg_special_shifted_chebyshev_polynomial_t(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_t_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_t_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_t_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_t_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_t_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_u_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_u_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_u_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_v_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_v_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_v_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w(tensor *, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w_n_scalar(tensor *, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_w_n_scalar_out(tensor *, tensor out, tensor x, scalar n);
void atg_special_shifted_chebyshev_polynomial_w_out(tensor *, tensor out, tensor x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w_x_scalar(tensor *, scalar x, tensor n);
void atg_special_shifted_chebyshev_polynomial_w_x_scalar_out(tensor *, tensor out, scalar x, tensor n);
void atg_special_sinc(tensor *, tensor self);
void atg_special_sinc_out(tensor *, tensor out, tensor self);
void atg_special_softmax(tensor *, tensor self, int64_t dim, int dtype);
void atg_special_spherical_bessel_j0(tensor *, tensor x);
void atg_special_spherical_bessel_j0_out(tensor *, tensor out, tensor x);
void atg_special_xlog1py(tensor *, tensor self, tensor other);
void atg_special_xlog1py_other_scalar(tensor *, tensor self, scalar other);
void atg_special_xlog1py_other_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_special_xlog1py_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_xlog1py_self_scalar(tensor *, scalar self_scalar, tensor other);
void atg_special_xlog1py_self_scalar_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_special_xlogy(tensor *, tensor self, tensor other);
void atg_special_xlogy_other_scalar(tensor *, tensor self, scalar other);
void atg_special_xlogy_other_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_special_xlogy_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_xlogy_self_scalar(tensor *, scalar self_scalar, tensor other);
void atg_special_xlogy_self_scalar_out(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_special_zeta(tensor *, tensor self, tensor other);
void atg_special_zeta_other_scalar(tensor *, tensor self, scalar other);
void atg_special_zeta_other_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_special_zeta_out(tensor *, tensor out, tensor self, tensor other);
void atg_special_zeta_self_scalar(tensor *, scalar self_scalar, tensor other);
void atg_special_zeta_self_scalar_out(tensor *, tensor out, scalar self_scalar, tensor other);
tensor *atg_split(tensor self, int64_t split_size, int64_t dim);
tensor *atg_split_copy(tensor self, int64_t split_size, int64_t dim);
void atg_split_copy_tensor_out(tensor *out_data, int out_len, tensor self, int64_t split_size, int64_t dim);
tensor *atg_split_sizes(tensor self, int64_t *split_size_data, int split_size_len, int64_t dim);
tensor *atg_split_with_sizes(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
tensor *atg_split_with_sizes_copy(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_split_with_sizes_copy_out(tensor *out_data, int out_len, tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_sqrt(tensor *, tensor self);
void atg_sqrt_(tensor *, tensor self);
void atg_sqrt_out(tensor *, tensor out, tensor self);
void atg_square(tensor *, tensor self);
void atg_square_(tensor *, tensor self);
void atg_square_out(tensor *, tensor out, tensor self);
void atg_squeeze(tensor *, tensor self);
void atg_squeeze_(tensor *, tensor self);
void atg_squeeze_copy(tensor *, tensor self);
void atg_squeeze_copy_dim(tensor *, tensor self, int64_t dim);
void atg_squeeze_copy_dim_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_squeeze_copy_dims(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_squeeze_copy_dims_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len);
void atg_squeeze_copy_out(tensor *, tensor out, tensor self);
void atg_squeeze_dim(tensor *, tensor self, int64_t dim);
void atg_squeeze_dim_(tensor *, tensor self, int64_t dim);
void atg_squeeze_dims(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_squeeze_dims_(tensor *, tensor self, int64_t *dim_data, int dim_len);
void atg_sspaddmm(tensor *, tensor self, tensor mat1, tensor mat2);
void atg_sspaddmm_out(tensor *, tensor out, tensor self, tensor mat1, tensor mat2);
void atg_stack(tensor *, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_stack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len, int64_t dim);
void atg_std(tensor *, tensor self, int unbiased);
void atg_std_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_correction_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_std_mean(tensor *, tensor self, int unbiased);
void atg_std_mean_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_mean_correction_out(tensor *, tensor out0, tensor out1, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_std_mean_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_std_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_stft(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int normalized, int onesided, int return_complex, int align_to_window);
void atg_stft_center(tensor *, tensor self, int64_t n_fft, int64_t hop_length_v, uint8_t hop_length_null, int64_t win_length_v, uint8_t win_length_null, tensor window, int center, char* pad_mode_ptr, int pad_mode_len, int normalized, int onesided, int return_complex, int align_to_window);
void atg_sub(tensor *, tensor self, tensor other);
void atg_sub_(tensor *, tensor self, tensor other);
void atg_sub_out(tensor *, tensor out, tensor self, tensor other);
void atg_sub_scalar(tensor *, tensor self, scalar other);
void atg_sub_scalar_(tensor *, tensor self, scalar other);
void atg_sub_scalar_out(tensor *, tensor out, tensor self, scalar other);
void atg_subtract(tensor *, tensor self, tensor other);
void atg_subtract_(tensor *, tensor self, tensor other);
void atg_subtract_out(tensor *, tensor out, tensor self, tensor other);
void atg_subtract_scalar(tensor *, tensor self, scalar other);
void atg_subtract_scalar_(tensor *, tensor self, scalar other);
void atg_sum(tensor *, tensor self, int dtype);
void atg_sum_dim_intlist(tensor *, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_sum_intlist_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int keepdim, int dtype);
void atg_sum_out(tensor *, tensor out, tensor self, int dtype);
void atg_sum_to_size(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_svd(tensor *, tensor self, int some, int compute_uv);
void atg_svd_u(tensor *, tensor U, tensor S, tensor V, tensor self, int some, int compute_uv);
void atg_swapaxes(tensor *, tensor self, int64_t axis0, int64_t axis1);
void atg_swapaxes_(tensor *, tensor self, int64_t axis0, int64_t axis1);
void atg_swapdims(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_swapdims_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_t(tensor *, tensor self);
void atg_t_(tensor *, tensor self);
void atg_t_copy(tensor *, tensor self);
void atg_t_copy_out(tensor *, tensor out, tensor self);
void atg_take(tensor *, tensor self, tensor index);
void atg_take_along_dim(tensor *, tensor self, tensor indices, int64_t dim_v, uint8_t dim_null);
void atg_take_along_dim_out(tensor *, tensor out, tensor self, tensor indices, int64_t dim_v, uint8_t dim_null);
void atg_take_out(tensor *, tensor out, tensor self, tensor index);
void atg_tan(tensor *, tensor self);
void atg_tan_(tensor *, tensor self);
void atg_tan_out(tensor *, tensor out, tensor self);
void atg_tanh(tensor *, tensor self);
void atg_tanh_(tensor *, tensor self);
void atg_tanh_backward(tensor *, tensor grad_output, tensor output);
void atg_tanh_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor output);
void atg_tanh_out(tensor *, tensor out, tensor self);
tensor *atg_tensor_split(tensor self, int64_t sections, int64_t dim);
tensor *atg_tensor_split_indices(tensor self, int64_t *indices_data, int indices_len, int64_t dim);
tensor *atg_tensor_split_tensor_indices_or_sections(tensor self, tensor tensor_indices_or_sections, int64_t dim);
void atg_tensordot(tensor *, tensor self, tensor other, int64_t *dims_self_data, int dims_self_len, int64_t *dims_other_data, int dims_other_len);
void atg_tensordot_out(tensor *, tensor out, tensor self, tensor other, int64_t *dims_self_data, int dims_self_len, int64_t *dims_other_data, int dims_other_len);
void atg_threshold(tensor *, tensor self, scalar threshold, scalar value);
void atg_threshold_(tensor *, tensor self, scalar threshold, scalar value);
void atg_threshold_backward(tensor *, tensor grad_output, tensor self, scalar threshold);
void atg_threshold_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, tensor self, scalar threshold);
void atg_threshold_out(tensor *, tensor out, tensor self, scalar threshold, scalar value);
void atg_tile(tensor *, tensor self, int64_t *dims_data, int dims_len);
void atg_to(tensor *, tensor self, int device);
void atg_to_dense(tensor *, tensor self, int dtype, int masked_grad);
void atg_to_dense_backward(tensor *, tensor grad, tensor input, int masked_grad);
void atg_to_device(tensor *, tensor self, int device, int dtype, int non_blocking, int copy);
void atg_to_dtype(tensor *, tensor self, int dtype, int non_blocking, int copy);
void atg_to_dtype_layout(tensor *, tensor self, int options_kind, int options_device, int non_blocking, int copy);
void atg_to_mkldnn(tensor *, tensor self, int dtype);
void atg_to_mkldnn_backward(tensor *, tensor grad, tensor input);
void atg_to_mkldnn_out(tensor *, tensor out, tensor self, int dtype);
void atg_to_other(tensor *, tensor self, tensor other, int non_blocking, int copy);
void atg_to_padded_tensor(tensor *, tensor self, double padding, int64_t *output_size_data, int output_size_len);
void atg_to_padded_tensor_out(tensor *, tensor out, tensor self, double padding, int64_t *output_size_data, int output_size_len);
void atg_to_sparse(tensor *, tensor self, int8_t layout, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_bsc(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_bsr(tensor *, tensor self, int64_t *blocksize_data, int blocksize_len, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_csc(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_csr(tensor *, tensor self, int64_t dense_dim_v, uint8_t dense_dim_null);
void atg_to_sparse_sparse_dim(tensor *, tensor self, int64_t sparse_dim);
void atg_topk(tensor *, tensor self, int64_t k, int64_t dim, int largest, int sorted);
void atg_topk_values(tensor *, tensor values, tensor indices, tensor self, int64_t k, int64_t dim, int largest, int sorted);
void atg_totype(tensor *, tensor self, int scalar_type);
void atg_trace(tensor *, tensor self);
void atg_trace_backward(tensor *, tensor grad, int64_t *sizes_data, int sizes_len);
void atg_trace_out(tensor *, tensor out, tensor self);
void atg_transpose(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_copy(tensor *, tensor self, int64_t dim0, int64_t dim1);
void atg_transpose_copy_int_out(tensor *, tensor out, tensor self, int64_t dim0, int64_t dim1);
void atg_trapezoid(tensor *, tensor y, int64_t dim);
void atg_trapezoid_x(tensor *, tensor y, tensor x, int64_t dim);
void atg_trapz(tensor *, tensor y, tensor x, int64_t dim);
void atg_trapz_dx(tensor *, tensor y, double dx, int64_t dim);
void atg_triangular_solve(tensor *, tensor self, tensor A, int upper, int transpose, int unitriangular);
void atg_triangular_solve_x(tensor *, tensor X, tensor M, tensor self, tensor A, int upper, int transpose, int unitriangular);
void atg_tril(tensor *, tensor self, int64_t diagonal);
void atg_tril_(tensor *, tensor self, int64_t diagonal);
void atg_tril_indices(tensor *, int64_t row, int64_t col, int64_t offset, int options_kind, int options_device);
void atg_tril_indices_out(tensor *, tensor out, int64_t row, int64_t col, int64_t offset);
void atg_tril_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_triplet_margin_loss(tensor *, tensor anchor, tensor positive, tensor negative, double margin, double p, double eps, int swap, int64_t reduction);
void atg_triu(tensor *, tensor self, int64_t diagonal);
void atg_triu_(tensor *, tensor self, int64_t diagonal);
void atg_triu_indices(tensor *, int64_t row, int64_t col, int64_t offset, int options_kind, int options_device);
void atg_triu_indices_out(tensor *, tensor out, int64_t row, int64_t col, int64_t offset);
void atg_triu_out(tensor *, tensor out, tensor self, int64_t diagonal);
void atg_true_divide(tensor *, tensor self, tensor other);
void atg_true_divide_(tensor *, tensor self, tensor other);
void atg_true_divide_out(tensor *, tensor out, tensor self, tensor other);
void atg_true_divide_scalar(tensor *, tensor self, scalar other);
void atg_true_divide_scalar_(tensor *, tensor self, scalar other);
void atg_trunc(tensor *, tensor self);
void atg_trunc_(tensor *, tensor self);
void atg_trunc_out(tensor *, tensor out, tensor self);
void atg_type_as(tensor *, tensor self, tensor other);
tensor *atg_unbind(tensor self, int64_t dim);
tensor *atg_unbind_copy(tensor self, int64_t dim);
void atg_unbind_copy_int_out(tensor *out_data, int out_len, tensor self, int64_t dim);
void atg_unflatten(tensor *, tensor self, int64_t dim, int64_t *sizes_data, int sizes_len);
tensor *atg_unflatten_dense_tensors(tensor flat, tensor *tensors_data, int tensors_len);
void atg_unfold(tensor *, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_unfold_backward(tensor *, tensor grad_in, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t size, int64_t step);
void atg_unfold_backward_out(tensor *, tensor out, tensor grad_in, int64_t *input_sizes_data, int input_sizes_len, int64_t dim, int64_t size, int64_t step);
void atg_unfold_copy(tensor *, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_unfold_copy_out(tensor *, tensor out, tensor self, int64_t dimension, int64_t size, int64_t step);
void atg_uniform(tensor *, tensor self, double from, double to);
void atg_uniform_(tensor *, tensor self, double from, double to);
void atg_uniform_out(tensor *, tensor out, tensor self, double from, double to);
void atg_unique_consecutive(tensor *, tensor self, int return_inverse, int return_counts, int64_t dim_v, uint8_t dim_null);
void atg_unique_consecutive_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int return_inverse, int return_counts, int64_t dim_v, uint8_t dim_null);
void atg_unique_dim(tensor *, tensor self, int64_t dim, int sorted, int return_inverse, int return_counts);
void atg_unique_dim_consecutive(tensor *, tensor self, int64_t dim, int return_inverse, int return_counts);
void atg_unique_dim_consecutive_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int64_t dim, int return_inverse, int return_counts);
void atg_unique_dim_out(tensor *, tensor out0, tensor out1, tensor out2, tensor self, int64_t dim, int sorted, int return_inverse, int return_counts);
tensor *atg_unsafe_chunk(tensor self, int64_t chunks, int64_t dim);
tensor *atg_unsafe_split(tensor self, int64_t split_size, int64_t dim);
void atg_unsafe_split_tensor_out(tensor *out_data, int out_len, tensor self, int64_t split_size, int64_t dim);
tensor *atg_unsafe_split_with_sizes(tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_unsafe_split_with_sizes_out(tensor *out_data, int out_len, tensor self, int64_t *split_sizes_data, int split_sizes_len, int64_t dim);
void atg_unsqueeze(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_copy(tensor *, tensor self, int64_t dim);
void atg_unsqueeze_copy_out(tensor *, tensor out, tensor self, int64_t dim);
void atg_upsample_bicubic2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bicubic2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_bilinear2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_bilinear2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_bilinear2d_vec_out(tensor *, tensor out, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_linear1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_v, uint8_t scales_null);
void atg_upsample_linear1d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest1d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_v, uint8_t scales_null);
void atg_upsample_nearest1d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest2d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest2d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest2d_vec_out(tensor *, tensor out, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_nearest3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_nearest3d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, double *scale_factors_data, int scale_factors_len);
void atg_upsample_trilinear3d(tensor *, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_backward(tensor *, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_backward_grad_input(tensor *, tensor grad_input, tensor grad_output, int64_t *output_size_data, int output_size_len, int64_t *input_size_data, int input_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_out(tensor *, tensor out, tensor self, int64_t *output_size_data, int output_size_len, int align_corners, double scales_d_v, uint8_t scales_d_null, double scales_h_v, uint8_t scales_h_null, double scales_w_v, uint8_t scales_w_null);
void atg_upsample_trilinear3d_vec(tensor *, tensor input, int64_t *output_size_data, int output_size_len, int align_corners, double *scale_factors_data, int scale_factors_len);
void atg_value_selecting_reduction_backward(tensor *, tensor grad, int64_t dim, tensor indices, int64_t *sizes_data, int sizes_len, int keepdim);
void atg_values(tensor *, tensor self);
void atg_values_copy(tensor *, tensor self);
void atg_values_copy_out(tensor *, tensor out, tensor self);
void atg_vander(tensor *, tensor x, int64_t n_v, uint8_t n_null, int increasing);
void atg_var(tensor *, tensor self, int unbiased);
void atg_var_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_correction_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_var_mean(tensor *, tensor self, int unbiased);
void atg_var_mean_correction(tensor *, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_mean_correction_out(tensor *, tensor out0, tensor out1, tensor self, int64_t *dim_data, int dim_len, scalar correction, int keepdim);
void atg_var_mean_dim(tensor *, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_var_out(tensor *, tensor out, tensor self, int64_t *dim_data, int dim_len, int unbiased, int keepdim);
void atg_vdot(tensor *, tensor self, tensor other);
void atg_vdot_out(tensor *, tensor out, tensor self, tensor other);
void atg_view(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_view_as(tensor *, tensor self, tensor other);
void atg_view_as_complex(tensor *, tensor self);
void atg_view_as_complex_copy(tensor *, tensor self);
void atg_view_as_complex_copy_out(tensor *, tensor out, tensor self);
void atg_view_as_real(tensor *, tensor self);
void atg_view_as_real_copy(tensor *, tensor self);
void atg_view_as_real_copy_out(tensor *, tensor out, tensor self);
void atg_view_copy(tensor *, tensor self, int64_t *size_data, int size_len);
void atg_view_copy_dtype(tensor *, tensor self, int dtype);
void atg_view_copy_dtype_out(tensor *, tensor out, tensor self, int dtype);
void atg_view_copy_out(tensor *, tensor out, tensor self, int64_t *size_data, int size_len);
void atg_view_dtype(tensor *, tensor self, int dtype);
tensor *atg_vsplit(tensor self, int64_t sections);
tensor *atg_vsplit_array(tensor self, int64_t *indices_data, int indices_len);
void atg_vstack(tensor *, tensor *tensors_data, int tensors_len);
void atg_vstack_out(tensor *, tensor out, tensor *tensors_data, int tensors_len);
tensor *atg_where(tensor condition);
void atg_where_scalar(tensor *, tensor condition, scalar self_scalar, scalar other);
void atg_where_scalarother(tensor *, tensor condition, tensor self, scalar other);
void atg_where_scalarself(tensor *, tensor condition, scalar self_scalar, tensor other);
void atg_where_self(tensor *, tensor condition, tensor self, tensor other);
void atg_where_self_out(tensor *, tensor out, tensor condition, tensor self, tensor other);
void atg_xlogy(tensor *, tensor self, tensor other);
void atg_xlogy_(tensor *, tensor self, tensor other);
void atg_xlogy_outscalar_other(tensor *, tensor out, tensor self, scalar other);
void atg_xlogy_outscalar_self(tensor *, tensor out, scalar self_scalar, tensor other);
void atg_xlogy_outtensor(tensor *, tensor out, tensor self, tensor other);
void atg_xlogy_scalar_other(tensor *, tensor self, scalar other);
void atg_xlogy_scalar_other_(tensor *, tensor self, scalar other);
void atg_xlogy_scalar_self(tensor *, scalar self_scalar, tensor other);
void atg_zero(tensor *, tensor self);
void atg_zero_(tensor *, tensor self);
void atg_zero_out(tensor *, tensor out, tensor self);
void atg_zeros(tensor *, int64_t *size_data, int size_len, int options_kind, int options_device);
void atg_zeros_like(tensor *, tensor self);
void atg_zeros_like_out(tensor *, tensor out, tensor self);
void atg_zeros_out(tensor *, tensor out, int64_t *size_data, int size_len);
}