// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta2;
import "google/api/field_behavior.proto";
import "google/cloud/documentai/v1beta2/geometry.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentProto";
option java_package = "com.google.cloud.documentai.v1beta2";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2";
option ruby_package = "Google::Cloud::DocumentAI::V1beta2";
// Document represents the canonical document resource in Document Understanding
// AI.
// It is an interchange format that provides insights into documents and allows
// for collaboration between users and Document Understanding AI to iterate and
// optimize for quality.
message Document {
// For a large document, sharding may be performed to produce several
// document shards. Each document shard contains this field to detail which
// shard it is.
message ShardInfo {
// The 0-based index of this shard.
int64 shard_index = 1;
// Total number of shards.
int64 shard_count = 2;
// The index of the first character in [Document.text][google.cloud.documentai.v1beta2.Document.text] in the overall
// document global text.
int64 text_offset = 3;
}
// Label attaches schema information and/or other metadata to segments within
// a [Document][google.cloud.documentai.v1beta2.Document]. Multiple [Label][google.cloud.documentai.v1beta2.Document.Label]s on a single field can denote either
// different labels, different instances of the same label created at
// different times, or some combination of both.
message Label {
// Provenance of the label.
oneof source {
// Label is generated AutoML model. This field stores the full resource
// name of the AutoML model.
//
// Format:
// `projects/{project-id}/locations/{location-id}/models/{model-id}`
string automl_model = 2;
}
// Name of the label.
//
// When the label is generated from AutoML Text Classification model, this
// field represents the name of the category.
string name = 1;
// Confidence score between 0 and 1 for label assignment.
float confidence = 3;
}
// Annotation for common text style attributes. This adheres to CSS
// conventions as much as possible.
message Style {
// Font size with unit.
message FontSize {
// Font size for the text.
float size = 1;
// Unit for the font size. Follows CSS naming (in, px, pt, etc.).
string unit = 2;
}
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
TextAnchor text_anchor = 1;
// Text color.
google.type.Color color = 2;
// Text background color.
google.type.Color background_color = 3;
// Font weight. Possible values are normal, bold, bolder, and lighter.
// https://www.w3schools.com/cssref/pr_font_weight.asp
string font_weight = 4;
// Text style. Possible values are normal, italic, and oblique.
// https://www.w3schools.com/cssref/pr_font_font-style.asp
string text_style = 5;
// Text decoration. Follows CSS standard.
// <text-decoration-line> <text-decoration-color> <text-decoration-style>
// https://www.w3schools.com/cssref/pr_text_text-decoration.asp
string text_decoration = 6;
// Font size.
FontSize font_size = 7;
}
// A page in a [Document][google.cloud.documentai.v1beta2.Document].
message Page {
// Dimension for the page.
message Dimension {
// Page width.
float width = 1;
// Page height.
float height = 2;
// Dimension unit.
string unit = 3;
}
// Visual element describing a layout unit on a page.
message Layout {
// Detected human reading orientation.
enum Orientation {
// Unspecified orientation.
ORIENTATION_UNSPECIFIED = 0;
// Orientation is aligned with page up.
PAGE_UP = 1;
// Orientation is aligned with page right.
// Turn the head 90 degrees clockwise from upright to read.
PAGE_RIGHT = 2;
// Orientation is aligned with page down.
// Turn the head 180 degrees from upright to read.
PAGE_DOWN = 3;
// Orientation is aligned with page left.
// Turn the head 90 degrees counterclockwise from upright to read.
PAGE_LEFT = 4;
}
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
TextAnchor text_anchor = 1;
// Confidence of the current [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] within context of the object this
// layout is for. e.g. confidence can be for a single token, a table,
// a visual element, etc. depending on context. Range [0, 1].
float confidence = 2;
// The bounding polygon for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
BoundingPoly bounding_poly = 3;
// Detected orientation for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
Orientation orientation = 4;
// Optional. This is the identifier used by referencing [PageAnchor][google.cloud.documentai.v1beta2.Document.PageAnchor]s.
string id = 5 [(google.api.field_behavior) = OPTIONAL];
}
// A block has a set of lines (collected into paragraphs) that have a
// common line-spacing and orientation.
message Block {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta2.Document.Page.Block].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// A collection of lines that a human would perceive as a paragraph.
message Paragraph {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// A collection of tokens that a human would perceive as a line.
// Does not cross column boundaries, can be horizontal, vertical, etc.
message Line {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta2.Document.Page.Line].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// A detected token.
message Token {
// Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
message DetectedBreak {
// Enum to denote the type of break found.
enum Type {
// Unspecified break type.
TYPE_UNSPECIFIED = 0;
// A single whitespace.
SPACE = 1;
// A wider whitespace.
WIDE_SPACE = 2;
// A hyphen that indicates that a token has been split across lines.
HYPHEN = 3;
}
// Detected break type.
Type type = 1;
}
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
Layout layout = 1;
// Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
DetectedBreak detected_break = 2;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;
}
// Detected non-text visual elements e.g. checkbox, signature etc. on the
// page.
message VisualElement {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
Layout layout = 1;
// Type of the [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
string type = 2;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;
}
// A table representation similar to HTML table structure.
message Table {
// A row of table cells.
message TableRow {
// Cells that make up this row.
repeated TableCell cells = 1;
}
// A cell representation inside the table.
message TableCell {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell].
Layout layout = 1;
// How many rows this cell spans.
int32 row_span = 2;
// How many columns this cell spans.
int32 col_span = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
}
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta2.Document.Page.Table].
Layout layout = 1;
// Header rows of the table.
repeated TableRow header_rows = 2;
// Body rows of the table.
repeated TableRow body_rows = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
}
// A form field detected on the page.
message FormField {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] name. e.g. `Address`, `Email`,
// `Grand total`, `Phone number`, etc.
Layout field_name = 1;
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] value.
Layout field_value = 2;
// A list of detected languages for name together with confidence.
repeated DetectedLanguage name_detected_languages = 3;
// A list of detected languages for value together with confidence.
repeated DetectedLanguage value_detected_languages = 4;
// If the value is non-textual, this field represents the type. Current
// valid values are:
// - blank (this indicates the field_value is normal text)
// - "unfilled_checkbox"
// - "filled_checkbox"
string value_type = 5;
// An internal field, created for Labeling UI to export key text.
string corrected_key_text = 6;
// An internal field, created for Labeling UI to export value text.
string corrected_value_text = 7;
}
// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;
// Confidence of detected language. Range [0, 1].
float confidence = 2;
}
// 1-based index for current [Page][google.cloud.documentai.v1beta2.Document.Page] in a parent [Document][google.cloud.documentai.v1beta2.Document].
// Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta2.Document] for individual
// processing.
int32 page_number = 1;
// Physical dimension of the page.
Dimension dimension = 2;
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the page.
Layout layout = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
// A list of visually detected text blocks on the page.
// A block has a set of lines (collected into paragraphs) that have a common
// line-spacing and orientation.
repeated Block blocks = 5;
// A list of visually detected text paragraphs on the page.
// A collection of lines that a human would perceive as a paragraph.
repeated Paragraph paragraphs = 6;
// A list of visually detected text lines on the page.
// A collection of tokens that a human would perceive as a line.
repeated Line lines = 7;
// A list of visually detected tokens on the page.
repeated Token tokens = 8;
// A list of detected non-text visual elements e.g. checkbox,
// signature etc. on the page.
repeated VisualElement visual_elements = 9;
// A list of visually detected tables on the page.
repeated Table tables = 10;
// A list of visually detected form fields on the page.
repeated FormField form_fields = 11;
}
// A phrase in the text that is a known entity type, such as a person, an
// organization, or location.
message Entity {
// Provenance of the entity.
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
TextAnchor text_anchor = 1;
// Required. Entity type from a schema e.g. `Address`.
string type = 2 [(google.api.field_behavior) = REQUIRED];
// Text value in the document e.g. `1600 Amphitheatre Pkwy`.
string mention_text = 3;
// Deprecated. Use `id` field instead.
string mention_id = 4;
// Optional. Confidence of detected Schema entity. Range [0, 1].
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. Represents the provenance of this entity wrt. the location on the
// page where it was found.
PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. Canonical id. This will be a unique value in the entity list
// for this document.
string id = 7 [(google.api.field_behavior) = OPTIONAL];
}
// Relationship between [Entities][google.cloud.documentai.v1beta2.Document.Entity].
message EntityRelation {
// Subject entity id.
string subject_id = 1;
// Object entity id.
string object_id = 2;
// Relationship description.
string relation = 3;
}
// Text reference indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
message TextAnchor {
// A text segment in the [Document.text][google.cloud.documentai.v1beta2.Document.text]. The indices may be out of bounds
// which indicate that the text extends into another document shard for
// large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1beta2.Document.ShardInfo.text_offset]
message TextSegment {
// [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1beta2.Document.text].
int64 start_index = 1;
// [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the
// [Document.text][google.cloud.documentai.v1beta2.Document.text].
int64 end_index = 2;
}
// The text segments from the [Document.text][google.cloud.documentai.v1beta2.Document.text].
repeated TextSegment text_segments = 1;
// Contains the content of the text span so that users do
// not have to look it up in the text_segments.
string content = 2;
}
// Referencing elements in [Document.pages][google.cloud.documentai.v1beta2.Document.pages].
message PageAnchor {
// Represents a weak reference to a page element within a document.
message PageRef {
// The type of layout that is being referenced.
enum LayoutType {
// Layout Unspecified.
LAYOUT_TYPE_UNSPECIFIED = 0;
// References a [Page.blocks][google.cloud.documentai.v1beta2.Document.Page.blocks] element.
BLOCK = 1;
// References a [Page.paragraphs][google.cloud.documentai.v1beta2.Document.Page.paragraphs] element.
PARAGRAPH = 2;
// References a [Page.lines][google.cloud.documentai.v1beta2.Document.Page.lines] element.
LINE = 3;
// References a [Page.tokens][google.cloud.documentai.v1beta2.Document.Page.tokens] element.
TOKEN = 4;
// References a [Page.visual_elements][google.cloud.documentai.v1beta2.Document.Page.visual_elements] element.
VISUAL_ELEMENT = 5;
// Refrrences a [Page.tables][google.cloud.documentai.v1beta2.Document.Page.tables] element.
TABLE = 6;
// References a [Page.form_fields][google.cloud.documentai.v1beta2.Document.Page.form_fields] element.
FORM_FIELD = 7;
}
// Required. Index into the [Document.pages][google.cloud.documentai.v1beta2.Document.pages] element
int64 page = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The type of the layout element that is being referenced. If not
// specified the whole page is assumed to be referenced.
LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The [Page.Layout.id][google.cloud.documentai.v1beta2.Document.Page.Layout.id] on the page that this element
// references. If [LayoutRef.type][] is specified this id must also be
// specified.
string layout_id = 3 [(google.api.field_behavior) = OPTIONAL];
}
// One or more references to visual page elements
repeated PageRef page_refs = 1;
}
// Original source document from the user.
oneof source {
// Currently supports Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported.
// See [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
// info.
string uri = 1;
// Inline document content, represented as a stream of bytes.
// Note: As with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 2;
}
// An IANA published MIME type (also referred to as media type). For more
// information, see
// https://www.iana.org/assignments/media-types/media-types.xhtml.
string mime_type = 3;
// UTF-8 encoded text in reading order from the document.
string text = 4;
// Styles for the [Document.text][google.cloud.documentai.v1beta2.Document.text].
repeated Style text_styles = 5;
// Visual page layout for the [Document][google.cloud.documentai.v1beta2.Document].
repeated Page pages = 6;
// A list of entities detected on [Document.text][google.cloud.documentai.v1beta2.Document.text]. For document shards,
// entities in this list may cross shard boundaries.
repeated Entity entities = 7;
// Relationship among [Document.entities][google.cloud.documentai.v1beta2.Document.entities].
repeated EntityRelation entity_relations = 8;
// Information about the sharding if this document is sharded part of a larger
// document. If the document is not sharded, this message is not specified.
ShardInfo shard_info = 9;
// [Label][google.cloud.documentai.v1beta2.Document.Label]s for this document.
repeated Label labels = 11;
// Any error that occurred while processing this document.
google.rpc.Status error = 10;
}