yandex-cloud 2025.4.14

/// Defines the options for text generation.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GenerationOptions {
    /// Enables streaming of partially generated text.
    #[prost(bool, tag = "1")]
    pub partial_results: bool,
    /// Affects creativity and randomness of responses. Should be a double number between 0 (inclusive) and 1 (inclusive).
    /// Lower values produce more straightforward responses, while higher values lead to increased creativity and randomness.
    #[prost(message, optional, tag = "2")]
    pub temperature: ::core::option::Option<f64>,
    /// Sets the maximum limit on the total number of tokens used for both the input prompt and the generated response.
    /// Must be greater than zero and not exceed 7400 tokens.
    #[prost(message, optional, tag = "3")]
    pub max_tokens: ::core::option::Option<i64>,
}
/// Represents an alternative generated response, including its score and token count.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Alternative {
    /// The generated text response.
    #[prost(string, tag = "1")]
    pub text: ::prost::alloc::string::String,
    /// The score or confidence of the generated text.
    #[prost(double, tag = "2")]
    pub score: f64,
    /// The number of tokens in the generated response.
    #[prost(int64, tag = "3")]
    pub num_tokens: i64,
}
/// Represents a message within a chat.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Message {
    /// Identifies the sender of the message.
    #[prost(string, tag = "1")]
    pub role: ::prost::alloc::string::String,
    /// The text content of the message.
    #[prost(string, tag = "2")]
    pub text: ::prost::alloc::string::String,
}
/// Represents a token, the basic unit of text, used by the LLM.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Token {
    /// An internal token identifier.
    #[prost(int64, tag = "1")]
    pub id: i64,
    /// The textual representation of the token.
    #[prost(string, tag = "2")]
    pub text: ::prost::alloc::string::String,
    /// Indicates whether the token is special or not. Special tokens define the model's behavior and are not visible to users.
    #[prost(bool, tag = "3")]
    pub special: bool,
}
/// Request for instructing the model to generate text.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct InstructRequest {
    /// The name or identifier of the model to be used for text generation.
    /// Possible value for now: `general`.
    #[prost(string, tag = "1")]
    pub model: ::prost::alloc::string::String,
    /// Configuration options for text generation.
    #[prost(message, optional, tag = "2")]
    pub generation_options: ::core::option::Option<GenerationOptions>,
    /// Text precondition or context of the request.
    /// For example, if the instruction is "You are the youngest Nobel laureate",
    /// the request text might be "Tell us about your daily routine".
    #[prost(oneof = "instruct_request::Instruction", tags = "3, 5")]
    pub instruction: ::core::option::Option<instruct_request::Instruction>,
    /// Request for text generation.
    #[prost(oneof = "instruct_request::Request", tags = "4")]
    pub request: ::core::option::Option<instruct_request::Request>,
}
/// Nested message and enum types in `InstructRequest`.
pub mod instruct_request {
    /// Text precondition or context of the request.
    /// For example, if the instruction is "You are the youngest Nobel laureate",
    /// the request text might be "Tell us about your daily routine".
    #[allow(clippy::derive_partial_eq_without_eq)]
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Instruction {
        /// The text-based instruction for text generation.
        #[prost(string, tag = "3")]
        InstructionText(::prost::alloc::string::String),
        /// A URI containing instructions for text generation.
        #[prost(string, tag = "5")]
        InstructionUri(::prost::alloc::string::String),
    }
    /// Request for text generation.
    #[allow(clippy::derive_partial_eq_without_eq)]
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Request {
        /// The text-based request for text generation.
        #[prost(string, tag = "4")]
        RequestText(::prost::alloc::string::String),
    }
}
/// Response containing generated text alternatives and token count.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct InstructResponse {
    /// A list of alternative text responses.
    #[prost(message, repeated, tag = "1")]
    pub alternatives: ::prost::alloc::vec::Vec<Alternative>,
    /// The number of tokens used in the prompt, including both the \[instruction_text\] and \[request_text\].
    #[prost(int64, tag = "2")]
    pub num_prompt_tokens: i64,
}
/// Request to engage in a chat conversation with a text generation model.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ChatRequest {
    /// The name or identifier of the model to be used for the chat.
    /// Possible value for now: `general`.
    #[prost(string, tag = "1")]
    pub model: ::prost::alloc::string::String,
    /// Configuration options for text generation.
    #[prost(message, optional, tag = "2")]
    pub generation_options: ::core::option::Option<GenerationOptions>,
    /// A list of messages in the conversation.
    #[prost(message, repeated, tag = "4")]
    pub messages: ::prost::alloc::vec::Vec<Message>,
    /// Text precondition or context of the request.
    /// For example, the instruction may be "You are a helpful assistant".
    #[prost(oneof = "chat_request::Instruction", tags = "3")]
    pub instruction: ::core::option::Option<chat_request::Instruction>,
}
/// Nested message and enum types in `ChatRequest`.
pub mod chat_request {
    /// Text precondition or context of the request.
    /// For example, the instruction may be "You are a helpful assistant".
    #[allow(clippy::derive_partial_eq_without_eq)]
    #[derive(Clone, PartialEq, ::prost::Oneof)]
    pub enum Instruction {
        /// The text-based instruction for the conversation.
        #[prost(string, tag = "3")]
        InstructionText(::prost::alloc::string::String),
    }
}
/// Contains a model-generated response for a chat query.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ChatResponse {
    /// The assistant's message in the chat conversation.
    #[prost(message, optional, tag = "1")]
    pub message: ::core::option::Option<Message>,
    /// Total number of tokens used in both the chat request and chat response.
    #[prost(int64, tag = "2")]
    pub num_tokens: i64,
}
/// Request to tokenize input text.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TokenizeRequest {
    /// The name or identifier of the model to be used for tokenization.
    /// Possible values for now: `general`, `general:embedding`.
    #[prost(string, tag = "1")]
    pub model: ::prost::alloc::string::String,
    /// The input text to tokenize.
    #[prost(string, tag = "2")]
    pub text: ::prost::alloc::string::String,
}
/// Tokenization response.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct TokenizeResponse {
    /// A list of tokens obtained from tokenization.
    #[prost(message, repeated, tag = "1")]
    pub tokens: ::prost::alloc::vec::Vec<Token>,
}
/// Represents a request to obtain embeddings for text data.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct EmbeddingRequest {
    /// The type of embedding to be generated.
    #[prost(enumeration = "embedding_request::EmbeddingType", tag = "1")]
    pub embedding_type: i32,
    /// The name or identifier of the model to be used for embedding. Possible value for now: `general:embedding`.
    #[prost(string, tag = "2")]
    pub model: ::prost::alloc::string::String,
    /// The input text for which the embedding is requested.
    #[prost(string, tag = "3")]
    pub text: ::prost::alloc::string::String,
}
/// Nested message and enum types in `EmbeddingRequest`.
pub mod embedding_request {
    /// Enum to specify the type of embedding to be generated.
    #[derive(
        Clone,
        Copy,
        Debug,
        PartialEq,
        Eq,
        Hash,
        PartialOrd,
        Ord,
        ::prost::Enumeration
    )]
    #[repr(i32)]
    pub enum EmbeddingType {
        /// Unspecified embedding type.
        Unspecified = 0,
        /// Embedding for a query. Use this when you have a short query or search term
        /// that you want to obtain an embedding for. Query embeddings are typically
        /// used in information retrieval and search applications.
        Query = 1,
        /// Embedding for a document. Use this when you have a longer document or a piece
        /// of text that you want to obtain an embedding for. Document embeddings are often
        /// used in natural language understanding and document similarity tasks.
        Document = 2,
    }
    impl EmbeddingType {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                EmbeddingType::Unspecified => "EMBEDDING_TYPE_UNSPECIFIED",
                EmbeddingType::Query => "EMBEDDING_TYPE_QUERY",
                EmbeddingType::Document => "EMBEDDING_TYPE_DOCUMENT",
            }
        }
        /// Creates an enum from field names used in the ProtoBuf definition.
        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
            match value {
                "EMBEDDING_TYPE_UNSPECIFIED" => Some(Self::Unspecified),
                "EMBEDDING_TYPE_QUERY" => Some(Self::Query),
                "EMBEDDING_TYPE_DOCUMENT" => Some(Self::Document),
                _ => None,
            }
        }
    }
}
/// Represents a response containing embeddings for input text data.
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct EmbeddingResponse {
    /// A repeated list of double values representing the embedding.
    #[prost(double, repeated, tag = "1")]
    pub embedding: ::prost::alloc::vec::Vec<f64>,
    /// The number of tokens in the input text.
    #[prost(int64, tag = "2")]
    pub num_tokens: i64,
}
/// Generated client implementations.
pub mod text_generation_service_client {
    #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)]
    use tonic::codegen::*;
    use tonic::codegen::http::Uri;
    /// Service for text generation and conversation.
    #[derive(Debug, Clone)]
    pub struct TextGenerationServiceClient<T> {
        inner: tonic::client::Grpc<T>,
    }
    impl TextGenerationServiceClient<tonic::transport::Channel> {
        /// Attempt to create a new client by connecting to a given endpoint.
        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
        where
            D: TryInto<tonic::transport::Endpoint>,
            D::Error: Into<StdError>,
        {
            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
            Ok(Self::new(conn))
        }
    }
    impl<T> TextGenerationServiceClient<T>
    where
        T: tonic::client::GrpcService<tonic::body::BoxBody>,
        T::Error: Into<StdError>,
        T::ResponseBody: Body<Data = Bytes> + Send + 'static,
        <T::ResponseBody as Body>::Error: Into<StdError> + Send,
    {
        pub fn new(inner: T) -> Self {
            let inner = tonic::client::Grpc::new(inner);
            Self { inner }
        }
        pub fn with_origin(inner: T, origin: Uri) -> Self {
            let inner = tonic::client::Grpc::with_origin(inner, origin);
            Self { inner }
        }
        pub fn with_interceptor<F>(
            inner: T,
            interceptor: F,
        ) -> TextGenerationServiceClient<InterceptedService<T, F>>
        where
            F: tonic::service::Interceptor,
            T::ResponseBody: Default,
            T: tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
                Response = http::Response<
                    <T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
                >,
            >,
            <T as tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
            >>::Error: Into<StdError> + Send + Sync,
        {
            TextGenerationServiceClient::new(InterceptedService::new(inner, interceptor))
        }
        /// Compress requests with the given encoding.
        ///
        /// This requires the server to support it otherwise it might respond with an
        /// error.
        #[must_use]
        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.send_compressed(encoding);
            self
        }
        /// Enable decompressing responses.
        #[must_use]
        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.accept_compressed(encoding);
            self
        }
        /// Limits the maximum size of a decoded message.
        ///
        /// Default: `4MB`
        #[must_use]
        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_decoding_message_size(limit);
            self
        }
        /// Limits the maximum size of an encoded message.
        ///
        /// Default: `usize::MAX`
        #[must_use]
        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_encoding_message_size(limit);
            self
        }
        /// RPC method for instructing the model to generate text.
        pub async fn instruct(
            &mut self,
            request: impl tonic::IntoRequest<super::InstructRequest>,
        ) -> std::result::Result<
            tonic::Response<tonic::codec::Streaming<super::InstructResponse>>,
            tonic::Status,
        > {
            self.inner
                .ready()
                .await
                .map_err(|e| {
                    tonic::Status::new(
                        tonic::Code::Unknown,
                        format!("Service was not ready: {}", e.into()),
                    )
                })?;
            let codec = tonic::codec::ProstCodec::default();
            let path = http::uri::PathAndQuery::from_static(
                "/yandex.cloud.ai.llm.v1alpha.TextGenerationService/Instruct",
            );
            let mut req = request.into_request();
            req.extensions_mut()
                .insert(
                    GrpcMethod::new(
                        "yandex.cloud.ai.llm.v1alpha.TextGenerationService",
                        "Instruct",
                    ),
                );
            self.inner.server_streaming(req, path, codec).await
        }
        /// RPC method for engaging in a chat conversation with the model.
        pub async fn chat(
            &mut self,
            request: impl tonic::IntoRequest<super::ChatRequest>,
        ) -> std::result::Result<
            tonic::Response<tonic::codec::Streaming<super::ChatResponse>>,
            tonic::Status,
        > {
            self.inner
                .ready()
                .await
                .map_err(|e| {
                    tonic::Status::new(
                        tonic::Code::Unknown,
                        format!("Service was not ready: {}", e.into()),
                    )
                })?;
            let codec = tonic::codec::ProstCodec::default();
            let path = http::uri::PathAndQuery::from_static(
                "/yandex.cloud.ai.llm.v1alpha.TextGenerationService/Chat",
            );
            let mut req = request.into_request();
            req.extensions_mut()
                .insert(
                    GrpcMethod::new(
                        "yandex.cloud.ai.llm.v1alpha.TextGenerationService",
                        "Chat",
                    ),
                );
            self.inner.server_streaming(req, path, codec).await
        }
    }
}
/// Generated client implementations.
pub mod tokenizer_service_client {
    #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)]
    use tonic::codegen::*;
    use tonic::codegen::http::Uri;
    /// Service for tokenizing input text.
    #[derive(Debug, Clone)]
    pub struct TokenizerServiceClient<T> {
        inner: tonic::client::Grpc<T>,
    }
    impl TokenizerServiceClient<tonic::transport::Channel> {
        /// Attempt to create a new client by connecting to a given endpoint.
        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
        where
            D: TryInto<tonic::transport::Endpoint>,
            D::Error: Into<StdError>,
        {
            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
            Ok(Self::new(conn))
        }
    }
    impl<T> TokenizerServiceClient<T>
    where
        T: tonic::client::GrpcService<tonic::body::BoxBody>,
        T::Error: Into<StdError>,
        T::ResponseBody: Body<Data = Bytes> + Send + 'static,
        <T::ResponseBody as Body>::Error: Into<StdError> + Send,
    {
        pub fn new(inner: T) -> Self {
            let inner = tonic::client::Grpc::new(inner);
            Self { inner }
        }
        pub fn with_origin(inner: T, origin: Uri) -> Self {
            let inner = tonic::client::Grpc::with_origin(inner, origin);
            Self { inner }
        }
        pub fn with_interceptor<F>(
            inner: T,
            interceptor: F,
        ) -> TokenizerServiceClient<InterceptedService<T, F>>
        where
            F: tonic::service::Interceptor,
            T::ResponseBody: Default,
            T: tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
                Response = http::Response<
                    <T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
                >,
            >,
            <T as tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
            >>::Error: Into<StdError> + Send + Sync,
        {
            TokenizerServiceClient::new(InterceptedService::new(inner, interceptor))
        }
        /// Compress requests with the given encoding.
        ///
        /// This requires the server to support it otherwise it might respond with an
        /// error.
        #[must_use]
        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.send_compressed(encoding);
            self
        }
        /// Enable decompressing responses.
        #[must_use]
        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.accept_compressed(encoding);
            self
        }
        /// Limits the maximum size of a decoded message.
        ///
        /// Default: `4MB`
        #[must_use]
        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_decoding_message_size(limit);
            self
        }
        /// Limits the maximum size of an encoded message.
        ///
        /// Default: `usize::MAX`
        #[must_use]
        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_encoding_message_size(limit);
            self
        }
        /// RPC method for tokenizing input text.
        pub async fn tokenize(
            &mut self,
            request: impl tonic::IntoRequest<super::TokenizeRequest>,
        ) -> std::result::Result<
            tonic::Response<super::TokenizeResponse>,
            tonic::Status,
        > {
            self.inner
                .ready()
                .await
                .map_err(|e| {
                    tonic::Status::new(
                        tonic::Code::Unknown,
                        format!("Service was not ready: {}", e.into()),
                    )
                })?;
            let codec = tonic::codec::ProstCodec::default();
            let path = http::uri::PathAndQuery::from_static(
                "/yandex.cloud.ai.llm.v1alpha.TokenizerService/Tokenize",
            );
            let mut req = request.into_request();
            req.extensions_mut()
                .insert(
                    GrpcMethod::new(
                        "yandex.cloud.ai.llm.v1alpha.TokenizerService",
                        "Tokenize",
                    ),
                );
            self.inner.unary(req, path, codec).await
        }
    }
}
/// Generated client implementations.
pub mod embeddings_service_client {
    #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)]
    use tonic::codegen::*;
    use tonic::codegen::http::Uri;
    /// Service for obtaining embeddings for text data.
    #[derive(Debug, Clone)]
    pub struct EmbeddingsServiceClient<T> {
        inner: tonic::client::Grpc<T>,
    }
    impl EmbeddingsServiceClient<tonic::transport::Channel> {
        /// Attempt to create a new client by connecting to a given endpoint.
        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
        where
            D: TryInto<tonic::transport::Endpoint>,
            D::Error: Into<StdError>,
        {
            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
            Ok(Self::new(conn))
        }
    }
    impl<T> EmbeddingsServiceClient<T>
    where
        T: tonic::client::GrpcService<tonic::body::BoxBody>,
        T::Error: Into<StdError>,
        T::ResponseBody: Body<Data = Bytes> + Send + 'static,
        <T::ResponseBody as Body>::Error: Into<StdError> + Send,
    {
        pub fn new(inner: T) -> Self {
            let inner = tonic::client::Grpc::new(inner);
            Self { inner }
        }
        pub fn with_origin(inner: T, origin: Uri) -> Self {
            let inner = tonic::client::Grpc::with_origin(inner, origin);
            Self { inner }
        }
        pub fn with_interceptor<F>(
            inner: T,
            interceptor: F,
        ) -> EmbeddingsServiceClient<InterceptedService<T, F>>
        where
            F: tonic::service::Interceptor,
            T::ResponseBody: Default,
            T: tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
                Response = http::Response<
                    <T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
                >,
            >,
            <T as tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
            >>::Error: Into<StdError> + Send + Sync,
        {
            EmbeddingsServiceClient::new(InterceptedService::new(inner, interceptor))
        }
        /// Compress requests with the given encoding.
        ///
        /// This requires the server to support it otherwise it might respond with an
        /// error.
        #[must_use]
        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.send_compressed(encoding);
            self
        }
        /// Enable decompressing responses.
        #[must_use]
        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.accept_compressed(encoding);
            self
        }
        /// Limits the maximum size of a decoded message.
        ///
        /// Default: `4MB`
        #[must_use]
        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_decoding_message_size(limit);
            self
        }
        /// Limits the maximum size of an encoded message.
        ///
        /// Default: `usize::MAX`
        #[must_use]
        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_encoding_message_size(limit);
            self
        }
        /// RPC method to obtain embeddings for input text data.
        pub async fn embedding(
            &mut self,
            request: impl tonic::IntoRequest<super::EmbeddingRequest>,
        ) -> std::result::Result<
            tonic::Response<super::EmbeddingResponse>,
            tonic::Status,
        > {
            self.inner
                .ready()
                .await
                .map_err(|e| {
                    tonic::Status::new(
                        tonic::Code::Unknown,
                        format!("Service was not ready: {}", e.into()),
                    )
                })?;
            let codec = tonic::codec::ProstCodec::default();
            let path = http::uri::PathAndQuery::from_static(
                "/yandex.cloud.ai.llm.v1alpha.EmbeddingsService/Embedding",
            );
            let mut req = request.into_request();
            req.extensions_mut()
                .insert(
                    GrpcMethod::new(
                        "yandex.cloud.ai.llm.v1alpha.EmbeddingsService",
                        "Embedding",
                    ),
                );
            self.inner.unary(req, path, codec).await
        }
    }
}
/// Generated client implementations.
pub mod text_generation_async_service_client {
    #![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)]
    use tonic::codegen::*;
    use tonic::codegen::http::Uri;
    /// Service for asynchronous text generation.
    #[derive(Debug, Clone)]
    pub struct TextGenerationAsyncServiceClient<T> {
        inner: tonic::client::Grpc<T>,
    }
    impl TextGenerationAsyncServiceClient<tonic::transport::Channel> {
        /// Attempt to create a new client by connecting to a given endpoint.
        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
        where
            D: TryInto<tonic::transport::Endpoint>,
            D::Error: Into<StdError>,
        {
            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
            Ok(Self::new(conn))
        }
    }
    impl<T> TextGenerationAsyncServiceClient<T>
    where
        T: tonic::client::GrpcService<tonic::body::BoxBody>,
        T::Error: Into<StdError>,
        T::ResponseBody: Body<Data = Bytes> + Send + 'static,
        <T::ResponseBody as Body>::Error: Into<StdError> + Send,
    {
        pub fn new(inner: T) -> Self {
            let inner = tonic::client::Grpc::new(inner);
            Self { inner }
        }
        pub fn with_origin(inner: T, origin: Uri) -> Self {
            let inner = tonic::client::Grpc::with_origin(inner, origin);
            Self { inner }
        }
        pub fn with_interceptor<F>(
            inner: T,
            interceptor: F,
        ) -> TextGenerationAsyncServiceClient<InterceptedService<T, F>>
        where
            F: tonic::service::Interceptor,
            T::ResponseBody: Default,
            T: tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
                Response = http::Response<
                    <T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
                >,
            >,
            <T as tonic::codegen::Service<
                http::Request<tonic::body::BoxBody>,
            >>::Error: Into<StdError> + Send + Sync,
        {
            TextGenerationAsyncServiceClient::new(
                InterceptedService::new(inner, interceptor),
            )
        }
        /// Compress requests with the given encoding.
        ///
        /// This requires the server to support it otherwise it might respond with an
        /// error.
        #[must_use]
        pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.send_compressed(encoding);
            self
        }
        /// Enable decompressing responses.
        #[must_use]
        pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
            self.inner = self.inner.accept_compressed(encoding);
            self
        }
        /// Limits the maximum size of a decoded message.
        ///
        /// Default: `4MB`
        #[must_use]
        pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_decoding_message_size(limit);
            self
        }
        /// Limits the maximum size of an encoded message.
        ///
        /// Default: `usize::MAX`
        #[must_use]
        pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
            self.inner = self.inner.max_encoding_message_size(limit);
            self
        }
        /// RPC method for instructing the model to generate text.
        pub async fn instruct(
            &mut self,
            request: impl tonic::IntoRequest<super::InstructRequest>,
        ) -> std::result::Result<
            tonic::Response<super::super::super::super::operation::Operation>,
            tonic::Status,
        > {
            self.inner
                .ready()
                .await
                .map_err(|e| {
                    tonic::Status::new(
                        tonic::Code::Unknown,
                        format!("Service was not ready: {}", e.into()),
                    )
                })?;
            let codec = tonic::codec::ProstCodec::default();
            let path = http::uri::PathAndQuery::from_static(
                "/yandex.cloud.ai.llm.v1alpha.TextGenerationAsyncService/Instruct",
            );
            let mut req = request.into_request();
            req.extensions_mut()
                .insert(
                    GrpcMethod::new(
                        "yandex.cloud.ai.llm.v1alpha.TextGenerationAsyncService",
                        "Instruct",
                    ),
                );
            self.inner.unary(req, path, codec).await
        }
    }
}