// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1/document.proto";
import "google/cloud/documentai/v1/document_io.proto";
import "google/cloud/documentai/v1/document_schema.proto";
import "google/cloud/documentai/v1/operation_metadata.proto";
import "google/cloud/documentai/v1/processor.proto";
import "google/cloud/documentai/v1/processor_type.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiProcessorService";
option java_package = "com.google.cloud.documentai.v1";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1";
option ruby_package = "Google::Cloud::DocumentAI::V1";
option (google.api.resource_definition) = {
type: "documentai.googleapis.com/HumanReviewConfig"
pattern: "projects/{project}/locations/{location}/processors/{processor}/humanReviewConfig"
};
option (google.api.resource_definition) = {
type: "documentai.googleapis.com/Location"
pattern: "projects/{project}/locations/{location}"
};
// Service to call Cloud DocumentAI to process documents according to the
// processor's definition. Processors are built using state-of-the-art Google
// AI such as natural language, computer vision, and translation to extract
// structured information from unstructured or semi-structured documents.
service DocumentProcessorService {
option (google.api.default_host) = "documentai.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
// Processes a single document.
rpc ProcessDocument(ProcessRequest) returns (ProcessResponse) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/processors/*}:process"
body: "*"
additional_bindings {
post: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*}:process"
body: "*"
}
};
option (google.api.method_signature) = "name";
}
// LRO endpoint to batch process many documents. The output is written
// to Cloud Storage as JSON in the [Document] format.
rpc BatchProcessDocuments(BatchProcessRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/processors/*}:batchProcess"
body: "*"
additional_bindings {
post: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*}:batchProcess"
body: "*"
}
};
option (google.api.method_signature) = "name";
option (google.longrunning.operation_info) = {
response_type: "BatchProcessResponse"
metadata_type: "BatchProcessMetadata"
};
}
// Fetches processor types. Note that we do not use ListProcessorTypes here
// because it is not paginated.
rpc FetchProcessorTypes(FetchProcessorTypesRequest) returns (FetchProcessorTypesResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*}:fetchProcessorTypes"
};
option (google.api.method_signature) = "parent";
}
// Lists the processor types that exist.
rpc ListProcessorTypes(ListProcessorTypesRequest) returns (ListProcessorTypesResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*}/processorTypes"
};
option (google.api.method_signature) = "parent";
}
// Lists all processors which belong to this project.
rpc ListProcessors(ListProcessorsRequest) returns (ListProcessorsResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*}/processors"
};
option (google.api.method_signature) = "parent";
}
// Gets a processor detail.
rpc GetProcessor(GetProcessorRequest) returns (Processor) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/processors/*}"
};
option (google.api.method_signature) = "name";
}
// Gets a processor version detail.
rpc GetProcessorVersion(GetProcessorVersionRequest) returns (ProcessorVersion) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*}"
};
option (google.api.method_signature) = "name";
}
// Lists all versions of a processor.
rpc ListProcessorVersions(ListProcessorVersionsRequest) returns (ListProcessorVersionsResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*/processors/*}/processorVersions"
};
option (google.api.method_signature) = "parent";
}
// Deletes the processor version, all artifacts under the processor version
// will be deleted.
rpc DeleteProcessorVersion(DeleteProcessorVersionRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
delete: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*}"
};
option (google.api.method_signature) = "name";
option (google.longrunning.operation_info) = {
response_type: "google.protobuf.Empty"
metadata_type: "DeleteProcessorVersionMetadata"
};
}
// Deploys the processor version.
rpc DeployProcessorVersion(DeployProcessorVersionRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*}:deploy"
body: "*"
};
option (google.api.method_signature) = "name";
option (google.longrunning.operation_info) = {
response_type: "DeployProcessorVersionResponse"
metadata_type: "DeployProcessorVersionMetadata"
};
}
// Undeploys the processor version.
rpc UndeployProcessorVersion(UndeployProcessorVersionRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*}:undeploy"
body: "*"
};
option (google.api.method_signature) = "name";
option (google.longrunning.operation_info) = {
response_type: "UndeployProcessorVersionResponse"
metadata_type: "UndeployProcessorVersionMetadata"
};
}
// Creates a processor from the type processor that the user chose.
// The processor will be at "ENABLED" state by default after its creation.
rpc CreateProcessor(CreateProcessorRequest) returns (Processor) {
option (google.api.http) = {
post: "/v1/{parent=projects/*/locations/*}/processors"
body: "processor"
};
option (google.api.method_signature) = "parent,processor";
}
// Deletes the processor, unloads all deployed model artifacts if it was
// enabled and then deletes all artifacts associated with this processor.
rpc DeleteProcessor(DeleteProcessorRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
delete: "/v1/{name=projects/*/locations/*/processors/*}"
};
option (google.api.method_signature) = "name";
option (google.longrunning.operation_info) = {
response_type: "google.protobuf.Empty"
metadata_type: "DeleteProcessorMetadata"
};
}
// Enables a processor
rpc EnableProcessor(EnableProcessorRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/processors/*}:enable"
body: "*"
};
option (google.longrunning.operation_info) = {
response_type: "EnableProcessorResponse"
metadata_type: "EnableProcessorMetadata"
};
}
// Disables a processor
rpc DisableProcessor(DisableProcessorRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{name=projects/*/locations/*/processors/*}:disable"
body: "*"
};
option (google.longrunning.operation_info) = {
response_type: "DisableProcessorResponse"
metadata_type: "DisableProcessorMetadata"
};
}
// Set the default (active) version of a [Processor][google.cloud.documentai.v1.Processor] that will be used in
// [ProcessDocument][google.cloud.documentai.v1.DocumentProcessorService.ProcessDocument] and
// [BatchProcessDocuments][google.cloud.documentai.v1.DocumentProcessorService.BatchProcessDocuments].
rpc SetDefaultProcessorVersion(SetDefaultProcessorVersionRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{processor=projects/*/locations/*/processors/*}:setDefaultProcessorVersion"
body: "*"
};
option (google.longrunning.operation_info) = {
response_type: "SetDefaultProcessorVersionResponse"
metadata_type: "SetDefaultProcessorVersionMetadata"
};
}
// Send a document for Human Review. The input document should be processed by
// the specified processor.
rpc ReviewDocument(ReviewDocumentRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{human_review_config=projects/*/locations/*/processors/*/humanReviewConfig}:reviewDocument"
body: "*"
};
option (google.api.method_signature) = "human_review_config";
option (google.longrunning.operation_info) = {
response_type: "ReviewDocumentResponse"
metadata_type: "ReviewDocumentOperationMetadata"
};
}
}
// Request message for the process document method.
message ProcessRequest {
// The document payload.
oneof source {
// An inline document proto.
Document inline_document = 4;
// A raw document content (bytes).
RawDocument raw_document = 5;
}
// Required. The resource name of the [Processor][google.cloud.documentai.v1.Processor] or
// [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion]
// to use for processing. If a [Processor][google.cloud.documentai.v1.Processor] is specified, the server will use
// its [default version][google.cloud.documentai.v1.Processor.default_processor_version]. Format:
// `projects/{project}/locations/{location}/processors/{processor}`, or
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "*"
}
];
// Whether Human Review feature should be skipped for this request. Default to
// false.
bool skip_human_review = 3;
// Specifies which fields to include in ProcessResponse's document.
google.protobuf.FieldMask field_mask = 6;
}
// The status of human review on a processed document.
message HumanReviewStatus {
// The final state of human review on a processed document.
enum State {
// Human review state is unspecified. Most likely due to an internal error.
STATE_UNSPECIFIED = 0;
// Human review is skipped for the document. This can happen because human
// review is not enabled on the processor or the processing request has
// been set to skip this document.
SKIPPED = 1;
// Human review validation is triggered and passed, so no review is needed.
VALIDATION_PASSED = 2;
// Human review validation is triggered and the document is under review.
IN_PROGRESS = 3;
// Some error happened during triggering human review, see the
// [state_message] for details.
ERROR = 4;
}
// The state of human review on the processing request.
State state = 1;
// A message providing more details about the human review state.
string state_message = 2;
// The name of the operation triggered by the processed document. This field
// is populated only when the [state] is [HUMAN_REVIEW_IN_PROGRESS]. It has
// the same response type and metadata as the long running operation returned
// by [ReviewDocument] method.
string human_review_operation = 3;
}
// Response message for the process document method.
message ProcessResponse {
// The document payload, will populate fields based on the processor's
// behavior.
Document document = 1;
// The status of human review on the processed document.
HumanReviewStatus human_review_status = 3;
}
// Request message for batch process document method.
message BatchProcessRequest {
// Required. The resource name of [Processor][google.cloud.documentai.v1.Processor] or
// [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion].
// Format: `projects/{project}/locations/{location}/processors/{processor}`,
// or
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "*"
}
];
// The input documents for batch process.
BatchDocumentsInputConfig input_documents = 5;
// The overall output config for batch process.
DocumentOutputConfig document_output_config = 6;
// Whether Human Review feature should be skipped for this request. Default to
// false.
bool skip_human_review = 4;
}
// Response message for batch process document method.
message BatchProcessResponse {
}
// The long running operation metadata for batch process method.
message BatchProcessMetadata {
// The status of a each individual document in the batch process.
message IndividualProcessStatus {
// The source of the document, same as the [input_gcs_source] field in the
// request when the batch process started. The batch process is started by
// take snapshot of that document, since a user can move or change that
// document during the process.
string input_gcs_source = 1;
// The status processing the document.
google.rpc.Status status = 2;
// The output_gcs_destination (in the request as `output_gcs_destination`)
// of the processed document if it was successful, otherwise empty.
string output_gcs_destination = 3;
// The status of human review on the processed document.
HumanReviewStatus human_review_status = 5;
}
// Possible states of the batch processing operation.
enum State {
// The default value. This value is used if the state is omitted.
STATE_UNSPECIFIED = 0;
// Request operation is waiting for scheduling.
WAITING = 1;
// Request is being processed.
RUNNING = 2;
// The batch processing completed successfully.
SUCCEEDED = 3;
// The batch processing was being cancelled.
CANCELLING = 4;
// The batch processing was cancelled.
CANCELLED = 5;
// The batch processing has failed.
FAILED = 6;
}
// The state of the current batch processing.
State state = 1;
// A message providing more details about the current state of processing.
// For example, the error message if the operation is failed.
string state_message = 2;
// The creation time of the operation.
google.protobuf.Timestamp create_time = 3;
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
// The list of response details of each document.
repeated IndividualProcessStatus individual_process_statuses = 5;
}
// Request message for fetch processor types.
message FetchProcessorTypesRequest {
// Required. The project of processor type to list.
// The available processor types may depend on the allow-listing on projects.
// Format: `projects/{project}/locations/{location}`
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "documentai.googleapis.com/ProcessorType"
}
];
}
// Response message for fetch processor types.
message FetchProcessorTypesResponse {
// The list of processor types.
repeated ProcessorType processor_types = 1;
}
// Request message for list processor types.
message ListProcessorTypesRequest {
// Required. The location of processor type to list.
// The available processor types may depend on the allow-listing on projects.
// Format: `projects/{project}/locations/{location}`
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "documentai.googleapis.com/ProcessorType"
}
];
// The maximum number of processor types to return.
// If unspecified, at most 100 processor types will be returned.
// The maximum value is 500; values above 500 will be coerced to 500.
int32 page_size = 2;
// Used to retrieve the next page of results, empty if at the end of the list.
string page_token = 3;
}
// Response message for list processor types.
message ListProcessorTypesResponse {
// The processor types.
repeated ProcessorType processor_types = 1;
// Points to the next page, otherwise empty.
string next_page_token = 2;
}
// Request message for list all processors belongs to a project.
message ListProcessorsRequest {
// Required. The parent (project and location) which owns this collection of Processors.
// Format: `projects/{project}/locations/{location}`
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "documentai.googleapis.com/Processor"
}
];
// The maximum number of processors to return.
// If unspecified, at most 50 processors will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;
// We will return the processors sorted by creation time. The page token
// will point to the next processor.
string page_token = 3;
}
// Response message for list processors.
message ListProcessorsResponse {
// The list of processors.
repeated Processor processors = 1;
// Points to the next processor, otherwise empty.
string next_page_token = 2;
}
// Request message for get processor.
message GetProcessorRequest {
// Required. The processor resource name.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
}
// Request message for get processor version.
message GetProcessorVersionRequest {
// Required. The processor resource name.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];
}
// Request message for list all processor versions belongs to a processor.
message ListProcessorVersionsRequest {
// Required. The parent (project, location and processor) to list all versions.
// Format: `projects/{project}/locations/{location}/processors/{processor}`
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "documentai.googleapis.com/ProcessorVersion"
}
];
// The maximum number of processor versions to return.
// If unspecified, at most 10 processor versions will be returned.
// The maximum value is 20; values above 20 will be coerced to 20.
int32 page_size = 2;
// We will return the processor versions sorted by creation time. The page
// token will point to the next processor version.
string page_token = 3;
}
// Response message for list processors.
message ListProcessorVersionsResponse {
// The list of processors.
repeated ProcessorVersion processor_versions = 1;
// Points to the next processor, otherwise empty.
string next_page_token = 2;
}
// Request message for the delete processor version method.
message DeleteProcessorVersionRequest {
// Required. The processor version resource name to be deleted.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];
}
// The long running operation metadata for delete processor version method.
message DeleteProcessorVersionMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for the deploy processor version method.
message DeployProcessorVersionRequest {
// Required. The processor version resource name to be deployed.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];
}
// Response message for the deploy processor version method.
message DeployProcessorVersionResponse {
}
// The long running operation metadata for deploy processor version method.
message DeployProcessorVersionMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for the undeploy processor version method.
message UndeployProcessorVersionRequest {
// Required. The processor version resource name to be undeployed.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];
}
// Response message for the undeploy processor version method.
message UndeployProcessorVersionResponse {
}
// The long running operation metadata for the undeploy processor version
// method.
message UndeployProcessorVersionMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for create a processor. Notice this request is sent to
// a regionalized backend service, and if the processor type is not available
// on that region, the creation will fail.
message CreateProcessorRequest {
// Required. The parent (project and location) under which to create the processor.
// Format: `projects/{project}/locations/{location}`
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
child_type: "documentai.googleapis.com/Processor"
}
];
// Required. The processor to be created, requires [processor_type] and [display_name]
// to be set. Also, the processor is under CMEK if CMEK fields are set.
Processor processor = 2 [(google.api.field_behavior) = REQUIRED];
}
// Request message for the delete processor method.
message DeleteProcessorRequest {
// Required. The processor resource name to be deleted.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
}
// The long running operation metadata for delete processor method.
message DeleteProcessorMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 5;
}
// Request message for the enable processor method.
message EnableProcessorRequest {
// Required. The processor resource name to be enabled.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
}
// Response message for the enable processor method.
// Intentionally empty proto for adding fields in future.
message EnableProcessorResponse {
}
// The long running operation metadata for enable processor method.
message EnableProcessorMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 5;
}
// Request message for the disable processor method.
message DisableProcessorRequest {
// Required. The processor resource name to be disabled.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
}
// Response message for the disable processor method.
// Intentionally empty proto for adding fields in future.
message DisableProcessorResponse {
}
// The long running operation metadata for disable processor method.
message DisableProcessorMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 5;
}
// Request message for the set default processor version method.
message SetDefaultProcessorVersionRequest {
// Required. The resource name of the [Processor][google.cloud.documentai.v1.Processor] to change default version.
string processor = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];
// Required. The resource name of child [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion] to use as default.
// Format:
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{version}`
string default_processor_version = 2 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];
}
// Response message for set default processor version method.
message SetDefaultProcessorVersionResponse {
}
// The long running operation metadata for set default processor version
// method.
message SetDefaultProcessorVersionMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;
}
// Request message for review document method.
message ReviewDocumentRequest {
// The priority level of the human review task.
enum Priority {
// The default priority level.
DEFAULT = 0;
// The urgent priority level. The labeling manager should allocate labeler
// resource to the urgent task queue to respect this priority level.
URGENT = 1;
}
// The document payload.
oneof source {
// An inline document proto.
Document inline_document = 4;
}
// Required. The resource name of the HumanReviewConfig that the document will be
// reviewed with.
string human_review_config = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/HumanReviewConfig"
}
];
// Whether the validation should be performed on the ad-hoc review request.
bool enable_schema_validation = 3;
// The priority of the human review task.
Priority priority = 5;
// The document schema of the human review task.
DocumentSchema document_schema = 6;
}
// Response message for review document method.
message ReviewDocumentResponse {
// Possible states of the review operation.
enum State {
// The default value. This value is used if the state is omitted.
STATE_UNSPECIFIED = 0;
// The review operation is rejected by the reviewer.
REJECTED = 1;
// The review operation is succeeded.
SUCCEEDED = 2;
}
// The Cloud Storage uri for the human reviewed document if the review is
// succeeded.
string gcs_destination = 1;
// The state of the review operation.
State state = 2;
// The reason why the review is rejected by reviewer.
string rejection_reason = 3;
}
// The long running operation metadata for review document method.
message ReviewDocumentOperationMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 5;
// The Crowd Compute question ID.
string question_id = 6;
}