// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.contentwarehouse.v1;
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1/document.proto";
import "google/protobuf/timestamp.proto";
import "google/type/datetime.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/contentwarehouse/v1;contentwarehouse";
option java_multiple_files = true;
option java_outer_classname = "DocumentProto";
option java_package = "com.google.cloud.contentwarehouse.v1";
// Defines the structure for content warehouse document proto.
message Document {
option (google.api.resource) = {
type: "contentwarehouse.googleapis.com/Document"
pattern: "projects/{project}/locations/{location}/documents/{document}"
pattern: "projects/{project}/locations/{location}/documents/referenceId/{reference_id}"
};
// The resource name of the document.
// Format:
// projects/{project_number}/locations/{location}/documents/{document_id}.
//
// The name is ignored when creating a document.
string name = 1;
// The reference ID set by customers. Must be unique per project and location.
string reference_id = 11;
// Required. Display name of the document given by the user. This name will be displayed
// in the UI.
// Customer can populate this field with the name of the document. This
// differs from the 'title' field as 'title' is optional and stores the top
// heading in the document.
string display_name = 2 [(google.api.field_behavior) = REQUIRED];
// Title that describes the document.
// This is usually present in the top section of the document, and is a
// mandatory field for the question-answering feature.
string title = 18;
// Uri to display the document, for example, in the UI.
string display_uri = 17;
// The Document schema name.
// Format:
// projects/{project_number}/locations/{location}/documentSchemas/{document_schema_id}.
string document_schema_name = 3 [(google.api.resource_reference) = {
type: "contentwarehouse.googleapis.com/DocumentSchema"
}];
oneof structured_content {
// Other document format, such as PPTX, XLXS
string plain_text = 15;
// Document AI format to save the structured content, including OCR.
google.cloud.documentai.v1.Document cloud_ai_document = 4;
}
// A path linked to structured content file.
string structured_content_uri = 16;
// Raw document file.
oneof raw_document {
// Raw document file in Cloud Storage path.
string raw_document_path = 5;
// Raw document content.
bytes inline_raw_document = 6;
}
// List of values that are user supplied metadata.
repeated Property properties = 7;
// Output only. The time when the document is last updated.
google.protobuf.Timestamp update_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the document is created.
google.protobuf.Timestamp create_time = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
// This is used when DocAI was not used to load the document and parsing/
// extracting is needed for the inline_raw_document. For example, if
// inline_raw_document is the byte representation of a PDF file, then
// this should be set to: RAW_DOCUMENT_FILE_TYPE_PDF.
RawDocumentFileType raw_document_file_type = 10;
// If true, makes the document visible to asynchronous policies and rules.
bool async_enabled = 12;
// If true, text extraction will not be performed.
bool text_extraction_disabled = 19;
// The user who creates the document.
string creator = 13;
// The user who lastly updates the document.
string updater = 14;
}
// References to the documents.
message DocumentReference {
// Required. Name of the referenced document.
string document_name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "contentwarehouse.googleapis.com/Document"
}
];
// display_name of the referenced document; this name does not need to be
// consistent to the display_name in the Document proto, depending on the ACL
// constraint.
string display_name = 2;
// Stores the subset of the referenced document's content.
// This is useful to allow user peek the information of the referenced
// document.
string snippet = 3;
// The document type of the document being referenced.
bool document_is_folder = 4;
// Output only. The time when the document is last updated.
google.protobuf.Timestamp update_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the document is created.
google.protobuf.Timestamp create_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the document is deleted.
google.protobuf.Timestamp delete_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Property of a document.
message Property {
// Required. Must match the name of a PropertyDefinition in the DocumentSchema.
string name = 1 [(google.api.field_behavior) = REQUIRED];
// Type of the property.
// Must match the property_options type of the matching PropertyDefinition.
// Value of the Property parsed into a specific data type.
// Specific type value(s) obtained from Document AIs Property.mention_text
// field.
oneof values {
// Integer property values.
IntegerArray integer_values = 2;
// Float property values.
FloatArray float_values = 3;
// String/text property values.
TextArray text_values = 4;
// Enum property values.
EnumArray enum_values = 5;
// Nested structured data property values.
PropertyArray property_values = 6;
// Date time property values.
// It is not supported by CMEK compliant deployment.
DateTimeArray date_time_values = 7;
// Map property values.
MapProperty map_property = 8;
// Timestamp property values.
// It is not supported by CMEK compliant deployment.
TimestampArray timestamp_values = 9;
}
}
// Integer values.
message IntegerArray {
// List of integer values.
repeated int32 values = 1;
}
// Float values.
message FloatArray {
// List of float values.
repeated float values = 1;
}
// String/text values.
message TextArray {
// List of text values.
repeated string values = 1;
}
// Enum values.
message EnumArray {
// List of enum values.
repeated string values = 1;
}
// DateTime values.
message DateTimeArray {
// List of datetime values.
// Both OffsetDateTime and ZonedDateTime are supported.
repeated google.type.DateTime values = 1;
}
// Timestamp values.
message TimestampArray {
// List of timestamp values.
repeated TimestampValue values = 1;
}
// Timestamp value type.
message TimestampValue {
oneof value {
// Timestamp value
google.protobuf.Timestamp timestamp_value = 1;
// The string must represent a valid instant in UTC and is parsed using
// java.time.format.DateTimeFormatter.ISO_INSTANT.
// e.g. "2013-09-29T18:46:19Z"
string text_value = 2;
}
}
// Property values.
message PropertyArray {
// List of property values.
repeated Property properties = 1;
}
// Map property value.
// Represents a structured entries of key value pairs, consisting of field names
// which map to dynamically typed values.
message MapProperty {
// Unordered map of dynamically typed values.
map<string, Value> fields = 1;
}
// `Value` represents a dynamically typed value which can be either be
// a float, a integer, a string, or a datetime value. A producer of value is
// expected to set one of these variants. Absence of any variant indicates an
// error.
message Value {
// The kind of value.
oneof kind {
// Represents a float value.
float float_value = 1;
// Represents a integer value.
int32 int_value = 2;
// Represents a string value.
string string_value = 3;
// Represents an enum value.
EnumValue enum_value = 4;
// Represents a datetime value.
google.type.DateTime datetime_value = 5;
// Represents a timestamp value.
TimestampValue timestamp_value = 6;
// Represents a boolean value.
bool boolean_value = 7;
}
}
// Represents the string value of the enum field.
message EnumValue {
// String value of the enum field. This must match defined set of enums
// in document schema using EnumTypeOptions.
string value = 1;
}
// When a raw document is supplied, this indicates the file format
enum RawDocumentFileType {
// No raw document specified or it is non-parsable
RAW_DOCUMENT_FILE_TYPE_UNSPECIFIED = 0;
// Adobe PDF format
RAW_DOCUMENT_FILE_TYPE_PDF = 1;
// Microsoft Word format
RAW_DOCUMENT_FILE_TYPE_DOCX = 2;
// Microsoft Excel format
RAW_DOCUMENT_FILE_TYPE_XLSX = 3;
// Microsoft Powerpoint format
RAW_DOCUMENT_FILE_TYPE_PPTX = 4;
// UTF-8 encoded text format
RAW_DOCUMENT_FILE_TYPE_TEXT = 5;
}