// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta2;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/cloud/documentai/v1beta2/document.proto";
import "google/cloud/documentai/v1beta2/geometry.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiProto";
option java_package = "com.google.cloud.documentai.v1beta2";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2";
option ruby_package = "Google::Cloud::DocumentAI::V1beta2";
// Service to parse structured information from unstructured or semi-structured
// documents using state-of-the-art Google AI such as natural language,
// computer vision, and translation.
service DocumentUnderstandingService {
option (google.api.default_host) = "documentai.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
// LRO endpoint to batch process many documents. The output is written
// to Cloud Storage as JSON in the [Document] format.
rpc BatchProcessDocuments(BatchProcessDocumentsRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/locations/*}/documents:batchProcess"
body: "*"
additional_bindings {
post: "/v1beta2/{parent=projects/*}/documents:batchProcess"
body: "*"
}
};
option (google.api.method_signature) = "requests";
option (google.longrunning.operation_info) = {
response_type: "BatchProcessDocumentsResponse"
metadata_type: "OperationMetadata"
};
}
// Processes a single document.
rpc ProcessDocument(ProcessDocumentRequest) returns (Document) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/locations/*}/documents:process"
body: "*"
additional_bindings {
post: "/v1beta2/{parent=projects/*}/documents:process"
body: "*"
}
};
}
}
// Request to batch process documents as an asynchronous operation. The output
// is written to Cloud Storage as JSON in the [Document] format.
message BatchProcessDocumentsRequest {
// Required. Individual requests for each document.
repeated ProcessDocumentRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
// Target project and location to make a call.
//
// Format: `projects/{project-id}/locations/{location-id}`.
//
// If no location is specified, a region will be chosen automatically.
string parent = 2;
}
// Request to process one document.
message ProcessDocumentRequest {
// Target project and location to make a call.
//
// Format: `projects/{project-id}/locations/{location-id}`.
//
// If no location is specified, a region will be chosen automatically.
// This field is only populated when used in ProcessDocument method.
string parent = 9;
// Required. Information about the input file.
InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The desired output location. This field is only needed in
// BatchProcessDocumentsRequest.
OutputConfig output_config = 2 [(google.api.field_behavior) = OPTIONAL];
// Specifies a known document type for deeper structure detection. Valid
// values are currently "general" and "invoice". If not provided, "general"\
// is used as default. If any other value is given, the request is rejected.
string document_type = 3;
// Controls table extraction behavior. If not specified, the system will
// decide reasonable defaults.
TableExtractionParams table_extraction_params = 4;
// Controls form extraction behavior. If not specified, the system will
// decide reasonable defaults.
FormExtractionParams form_extraction_params = 5;
// Controls entity extraction behavior. If not specified, the system will
// decide reasonable defaults.
EntityExtractionParams entity_extraction_params = 6;
// Controls OCR behavior. If not specified, the system will decide reasonable
// defaults.
OcrParams ocr_params = 7;
// Controls AutoML model prediction behavior. AutoMlParams cannot be used
// together with other Params.
AutoMlParams automl_params = 8;
}
// Response to an batch document processing request. This is returned in
// the LRO Operation after the operation is complete.
message BatchProcessDocumentsResponse {
// Responses for each individual document.
repeated ProcessDocumentResponse responses = 1;
}
// Response to a single document processing request.
message ProcessDocumentResponse {
// Information about the input file. This is the same as the corresponding
// input config in the request.
InputConfig input_config = 1;
// The output location of the parsed responses. The responses are written to
// this location as JSON-serialized `Document` objects.
OutputConfig output_config = 2;
}
// Parameters to control Optical Character Recognition (OCR) behavior.
message OcrParams {
// List of languages to use for OCR. In most cases, an empty value
// yields the best results since it enables automatic language detection. For
// languages based on the Latin alphabet, setting `language_hints` is not
// needed. In rare cases, when the language of the text in the image is known,
// setting a hint will help get better results (although it will be a
// significant hindrance if the hint is wrong). Document processing returns an
// error if one or more of the specified languages is not one of the
// supported languages.
repeated string language_hints = 1;
}
// Parameters to control table extraction behavior.
message TableExtractionParams {
// Whether to enable table extraction.
bool enabled = 1;
// Optional. Table bounding box hints that can be provided to complex cases
// which our algorithm cannot locate the table(s) in.
repeated TableBoundHint table_bound_hints = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Table header hints. The extraction will bias towards producing
// these terms as table headers, which may improve accuracy.
repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
// Model version of the table extraction system. Default is "builtin/stable".
// Specify "builtin/latest" for the latest model.
string model_version = 4;
}
// A hint for a table bounding box on the page for table parsing.
message TableBoundHint {
// Optional. Page number for multi-paged inputs this hint applies to. If not
// provided, this hint will apply to all pages by default. This value is
// 1-based.
int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
// Bounding box hint for a table on this page. The coordinates must be
// normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
BoundingPoly bounding_box = 2;
}
// Parameters to control form extraction behavior.
message FormExtractionParams {
// Whether to enable form extraction.
bool enabled = 1;
// User can provide pairs of (key text, value type) to improve the parsing
// result.
//
// For example, if a document has a field called "Date" that holds a date
// value and a field called "Amount" that may hold either a currency value
// (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the
// following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key":
// "Amount", "value_types": [ "PRICE", "NUMBER" ]} ]
//
// If the value type is unknown, but you want to provide hints for the keys,
// you can leave the value_types field blank. e.g. {"key": "Date",
// "value_types": []}
repeated KeyValuePairHint key_value_pair_hints = 2;
// Model version of the form extraction system. Default is
// "builtin/stable". Specify "builtin/latest" for the latest model.
// For custom form models, specify: “custom/{model_name}". Model name
// format is "bucket_name/path/to/modeldir" corresponding to
// "gs://bucket_name/path/to/modeldir" where annotated examples are stored.
string model_version = 3;
}
// User-provided hint for key value pair.
message KeyValuePairHint {
// The key text for the hint.
string key = 1;
// Type of the value. This is case-insensitive, and could be one of:
// ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
// ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
// be ignored.
repeated string value_types = 2;
}
// Parameters to control entity extraction behavior.
message EntityExtractionParams {
// Whether to enable entity extraction.
bool enabled = 1;
// Model version of the entity extraction. Default is
// "builtin/stable". Specify "builtin/latest" for the latest model.
string model_version = 2;
}
// Parameters to control AutoML model prediction behavior.
message AutoMlParams {
// Resource name of the AutoML model.
//
// Format: `projects/{project-id}/locations/{location-id}/models/{model-id}`.
string model = 1;
}
// The desired input location and metadata.
message InputConfig {
// Required.
oneof source {
// The Google Cloud Storage location to read the input from. This must be a
// single file.
GcsSource gcs_source = 1;
// Content in bytes, represented as a stream of bytes.
// Note: As with all `bytes` fields, proto buffer messages use a pure binary
// representation, whereas JSON representations use base64.
//
// This field only works for synchronous ProcessDocument method.
bytes contents = 3;
}
// Required. Mimetype of the input. Current supported mimetypes are application/pdf,
// image/tiff, and image/gif.
// In addition, application/json type is supported for requests with
// [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params] field set. The JSON file needs to
// be in [Document][google.cloud.documentai.v1beta2.Document] format.
string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
}
// The desired output location and metadata.
message OutputConfig {
// Required.
oneof destination {
// The Google Cloud Storage location to write the output to.
GcsDestination gcs_destination = 1;
}
// The max number of pages to include into each output Document shard JSON on
// Google Cloud Storage.
//
// The valid range is [1, 100]. If not specified, the default value is 20.
//
// For example, for one pdf file with 100 pages, 100 parsed pages will be
// produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
// containing 20 parsed pages will be written under the prefix
// [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
// x and y are 1-indexed page numbers.
//
// Example GCS outputs with 157 pages and pages_per_shard = 50:
//
// <prefix>pages-001-to-050.json
// <prefix>pages-051-to-100.json
// <prefix>pages-101-to-150.json
// <prefix>pages-151-to-157.json
int32 pages_per_shard = 2;
}
// The Google Cloud Storage location where the input file will be read from.
message GcsSource {
string uri = 1 [(google.api.field_behavior) = REQUIRED];
}
// The Google Cloud Storage location where the output file will be written to.
message GcsDestination {
string uri = 1 [(google.api.field_behavior) = REQUIRED];
}
// Contains metadata for the BatchProcessDocuments operation.
message OperationMetadata {
enum State {
// The default value. This value is used if the state is omitted.
STATE_UNSPECIFIED = 0;
// Request is received.
ACCEPTED = 1;
// Request operation is waiting for scheduling.
WAITING = 2;
// Request is being processed.
RUNNING = 3;
// The batch processing completed successfully.
SUCCEEDED = 4;
// The batch processing was cancelled.
CANCELLED = 5;
// The batch processing has failed.
FAILED = 6;
}
// The state of the current batch processing.
State state = 1;
// A message providing more details about the current state of processing.
string state_message = 2;
// The creation time of the operation.
google.protobuf.Timestamp create_time = 3;
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
}