Struct JsonOptions

Source
pub struct JsonOptions {
Show 26 fields pub schema: Option<String>, pub compression: Option<String>, pub primitives_as_string: Option<bool>, pub prefers_decimal: Option<bool>, pub allow_comments: Option<bool>, pub allow_unquoted_field_names: Option<bool>, pub allow_single_quotes: Option<bool>, pub allow_numeric_leading_zeros: Option<bool>, pub allow_backslash_escaping_any_character: Option<bool>, pub mode: Option<String>, pub column_name_of_corrupt_record: Option<String>, pub date_format: Option<String>, pub timestamp_format: Option<String>, pub multi_line: Option<bool>, pub allow_unquoted_control_chars: Option<bool>, pub line_sep: Option<String>, pub sampling_ratio: Option<f64>, pub drop_field_if_all_null: Option<bool>, pub encoding: Option<String>, pub locale: Option<String>, pub allow_non_numeric_numbers: Option<bool>, pub time_zone: Option<String>, pub timestamp_ntz_format: Option<String>, pub enable_datetime_parsing_fallback: Option<bool>, pub ignore_null_fields: Option<bool>, pub common: CommonFileOptions,
}
Expand description

A struct that represents options for configuring JSON file parsing.

By default, this supports JSON Lines (newline-delimited JSON). For single-record-per-file JSON, set the multi_line option to true.

If the schema option is not specified, the input schema is inferred from the data.

§Options

  • schema: An optional schema for the JSON data, either as a StructType or a DDL string.
  • compression: Compression codec to use when reading JSON files (e.g., gzip, bzip2).
  • primitives_as_string: Treat primitive types (e.g., integers, booleans) as strings.
  • prefers_decimal: Prefer parsing numbers as decimals rather than floating points.
  • allow_comments: Allow comments in JSON files (e.g., lines starting with // or /* */).
  • allow_unquoted_field_names: Allow field names without quotes.
  • allow_single_quotes: Allow the use of single quotes instead of double quotes for strings.
  • allow_numeric_leading_zeros: Allow numbers to have leading zeros (e.g., 007).
  • allow_backslash_escaping_any_character: Allow backslashes to escape any character.
  • mode: The parsing mode (e.g., PERMISSIVE, DROPMALFORMED, FAILFAST).
  • column_name_of_corrupt_record: Name of the column where corrupted records are placed.
  • date_format: Custom date format (e.g., yyyy-MM-dd).
  • timestamp_format: Custom timestamp format (e.g., yyyy-MM-dd HH:mm:ss).
  • multi_line: Read multiline JSON files (e.g., when a single JSON object spans multiple lines).
  • allow_unquoted_control_chars: Allow unquoted control characters in JSON (e.g., ASCII control characters).
  • line_sep: Custom line separator (default is \n).
  • sampling_ratio: Fraction of the data used for schema inference (e.g., 0.1 for 10%).
  • drop_field_if_all_null: Drop fields that are NULL in all rows.
  • encoding: Character encoding (default is UTF-8).
  • locale: Locale for parsing dates and numbers (e.g., en-US).
  • allow_non_numeric_numbers: Allow special non-numeric numbers (e.g., NaN, Infinity).
  • time_zone: Time zone used for parsing dates and timestamps (e.g., UTC, America/Los_Angeles).
  • timestamp_ntz_format: Format for parsing timestamp without time zone (NTZ) values (e.g., yyyy-MM-dd'T'HH:mm:ss).
  • enable_datetime_parsing_fallback: Enable fallback mechanism for datetime parsing if the initial parsing fails.
  • ignore_null_fields: Ignore NULL fields in the JSON structure, treating them as absent.
  • common - Common file options that are shared across multiple file formats.

§Example

let options = JsonOptions::new()
    .schema("name STRING, salary INT")
    .multi_line(true)
    .allow_comments(true)
    .encoding("UTF-8")
    .time_zone("UTC")
    .compression("gzip");

let df = spark.read().json(["/path/to/json"], options)?;

Fields§

§schema: Option<String>§compression: Option<String>§primitives_as_string: Option<bool>§prefers_decimal: Option<bool>§allow_comments: Option<bool>§allow_unquoted_field_names: Option<bool>§allow_single_quotes: Option<bool>§allow_numeric_leading_zeros: Option<bool>§allow_backslash_escaping_any_character: Option<bool>§mode: Option<String>§column_name_of_corrupt_record: Option<String>§date_format: Option<String>§timestamp_format: Option<String>§multi_line: Option<bool>§allow_unquoted_control_chars: Option<bool>§line_sep: Option<String>§sampling_ratio: Option<f64>§drop_field_if_all_null: Option<bool>§encoding: Option<String>§locale: Option<String>§allow_non_numeric_numbers: Option<bool>§time_zone: Option<String>§timestamp_ntz_format: Option<String>§enable_datetime_parsing_fallback: Option<bool>§ignore_null_fields: Option<bool>§common: CommonFileOptions

Implementations§

Source§

impl JsonOptions

Source

pub fn schema(self, value: &str) -> Self

Source

pub fn primitives_as_string(self, value: bool) -> Self

Source

pub fn prefers_decimal(self, value: bool) -> Self

Source

pub fn allow_comments(self, value: bool) -> Self

Source

pub fn allow_unquoted_field_names(self, value: bool) -> Self

Source

pub fn allow_single_quotes(self, value: bool) -> Self

Source

pub fn allow_numeric_leading_zeros(self, value: bool) -> Self

Source

pub fn allow_backslash_escaping_any_character(self, value: bool) -> Self

Source

pub fn mode(self, value: &str) -> Self

Source

pub fn column_name_of_corrupt_record(self, value: &str) -> Self

Source

pub fn date_format(self, value: &str) -> Self

Source

pub fn timestamp_format(self, value: &str) -> Self

Source

pub fn multi_line(self, value: bool) -> Self

Source

pub fn allow_unquoted_control_chars(self, value: bool) -> Self

Source

pub fn line_sep(self, value: &str) -> Self

Source

pub fn sampling_ratio(self, value: f64) -> Self

Source

pub fn drop_field_if_all_null(self, value: bool) -> Self

Source

pub fn encoding(self, value: &str) -> Self

Source

pub fn locale(self, value: &str) -> Self

Source

pub fn allow_non_numeric_numbers(self, value: bool) -> Self

Source

pub fn time_zone(self, value: &str) -> Self

Source

pub fn timestamp_ntz_format(self, value: &str) -> Self

Source

pub fn enable_datetime_parsing_fallback(self, value: bool) -> Self

Source

pub fn compression(self, value: &str) -> Self

Source

pub fn ignore_null_fields(self, value: bool) -> Self

Trait Implementations§

Source§

impl Clone for JsonOptions

Source§

fn clone(&self) -> JsonOptions

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl ConfigOpts for JsonOptions

Source§

impl Debug for JsonOptions

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for JsonOptions

Source§

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> FromRef<T> for T
where T: Clone,

Source§

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.
Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoRequest<T> for T

Source§

fn into_request(self) -> Request<T>

Wrap the input message T in a tonic::Request
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> MaybeSendSync for T