1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
//! A data source used by BigML.

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use super::id::*;
use super::status::*;
use super::{Resource, ResourceCommon, Updatable};

/// A data source used by BigML.
///
/// TODO: Still lots of missing fields.
#[derive(Clone, Debug, Deserialize, Resource, Serialize, Updatable)]
#[api_name = "source"]
pub struct Source {
    /// Common resource information. These fields will be serialized at the
    /// top-level of this structure by `serde`.
    #[serde(flatten)]
    #[updatable(flatten)]
    pub common: ResourceCommon,

    /// The ID of this resource.
    pub resource: Id<Source>,

    /// The status of this source.
    pub status: GenericStatus,

    /// The name of the file uploaded.
    pub file_name: Option<String>,

    /// An MD5 hash of the uploaded file.
    pub md5: String,

    /// The number of bytes of the source.
    pub size: u64,

    /// Whether BigML should automatically expand dates into year, day of week, etc.
    #[updatable]
    pub disable_datetime: Option<bool>,

    /// The fields in this source, keyed by BigML internal ID.
    #[updatable]
    pub fields: Option<HashMap<String, Field>>,

    /// Placeholder to allow extensibility without breaking the API.
    #[serde(skip)]
    _placeholder: (),
}

/// Arguments used to create a data source.
///
/// TODO: Add more fields so people need to use `update` less.
#[derive(Debug, Serialize)]
pub struct Args {
    /// The URL of the data source.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub remote: Option<String>,

    /// The raw data to use.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub data: Option<String>,

    /// Set to true if you want to avoid date expansion into year, day of week, etc.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub disable_datetime: Option<bool>,

    /// The name of this source.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,

    /// User-defined tags.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub tags: Vec<String>,

    /// Placeholder to allow extensibility without breaking the API.
    #[serde(skip)]
    _placeholder: (),
}

impl Args {
    /// Create a new `Args` from a remote data source.
    pub fn remote<S: Into<String>>(remote: S) -> Args {
        Args {
            remote: Some(remote.into()),
            data: None,
            disable_datetime: None,
            name: None,
            tags: vec![],
            _placeholder: (),
        }
    }

    /// Create a new `Args` from a small amount of inline data.
    pub fn data<S: Into<String>>(data: S) -> Args {
        Args {
            remote: None,
            data: Some(data.into()),
            disable_datetime: None,
            name: None,
            tags: vec![],
            _placeholder: (),
        }
    }
}

impl super::Args for Args {
    type Resource = Source;
}

/// Information about a field in a data source.
#[derive(Clone, Debug, Deserialize, Serialize, Updatable)]
pub struct Field {
    /// The name of this field.
    pub name: String,

    /// The type of data stored in this field.
    #[updatable]
    pub optype: Optype,

    /// Date formats to use when parsing this field. See [the BigML docs][docs] for
    /// details.
    ///
    /// [docs]: https://bigml.com/api/sources#sr_datetime_detection
    #[updatable]
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub time_formats: Vec<String>,

    // The locale of this field.
    //pub locale: Option<String>,

    // (This is not well-documented in the BigML API.)
    //pub missing_tokens: Option<Vec<String>>,
    /// Placeholder to allow extensibility without breaking the API.
    #[serde(skip)]
    _placeholder: (),
}

/// The type of a data field.
#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
pub enum Optype {
    /// Treat this as a date value.
    #[serde(rename = "datetime")]
    DateTime,

    /// Treat this as a numeric value.
    #[serde(rename = "numeric")]
    Numeric,
    /// Threat this as a category with multiple possible values, but not
    /// arbitrary strings.
    #[serde(rename = "categorical")]
    Categorical,
    /// Treat this as text.  This uses different machine learning
    /// algorithms than `Categorical`.
    #[serde(rename = "text")]
    Text,
    /// Treat this as a list of muliple items separated by an auto-detected
    /// separator.
    #[serde(rename = "items")]
    Items,
}

impl Updatable for Optype {
    type Update = Self;
}

#[test]
fn update_source_name() {
    use super::ResourceCommonUpdate;
    use serde_json::json;
    let source_update = SourceUpdate {
        common: Some(ResourceCommonUpdate {
            name: Some("example".to_owned()),
            ..ResourceCommonUpdate::default()
        }),
        ..SourceUpdate::default()
    };
    assert_eq!(json!(source_update), json!({ "name": "example" }));
}