Ralph Knag | 1fca6ac | 2017-12-05 12:05:57 -0500 | [diff] [blame] | 1 | .. This work is licensed under a Creative Commons Attribution 4.0 International License.
|
| 2 | .. http://creativecommons.org/licenses/by/4.0
|
| 3 |
|
| 4 | .. _data-formats:
|
| 5 |
|
| 6 | Data Formats
|
| 7 | ============
|
| 8 |
|
| 9 | | Because the DCAE designer composes your component with others at
|
| 10 | service design time, in most cases you do not know what specific
|
| 11 | component(s) your component will send data to during runtime. Thus, it
|
| 12 | is vital that DCAE has a language of describing the data passed
|
| 13 | between components, so that it is known which components are
|
| 14 | composable with others. Data formats are descriptions of data—they are
|
| 15 | the data contract between your component and other components. You
|
| 16 | need to describe the available outputs and assumed inputs of your
|
| 17 | components as data formats. These data descriptions are onboarded into
|
| 18 | ASDC, and each receives a UUID. If component X outputs data format
|
| 19 | DF-Y, and another component Z specifies DF-Y as their input data
|
| 20 | format, then X is said to be *composable* with that component. The
|
| 21 | data formats are referenced in the component specifications by the
|
| 22 | data format’s id and version.
|
| 23 | | The vision is to have a repository of shared data formats that
|
| 24 | developers and teams can re-use and also provide them the means to
|
| 25 | extend and create new custom data formats.
|
| 26 |
|
| 27 | .. _dataformat_metadata:
|
| 28 |
|
| 29 | Meta Schema Definition
|
| 30 | ----------------------
|
| 31 |
|
| 32 | The “Meta Schema” implementation defines how data format JSON schemas
|
| 33 | can be written to define user input. It is itself a JSON schema (thus it
|
| 34 | is a “meta schema”). It requires the name of the data format entry, the
|
| 35 | data format entry version and allows a description under “self” object.
|
| 36 | The meta schema version must be specified as the value of the
|
| 37 | “dataformatversion” key. Then the input schema itself is described.
|
| 38 | There are four types of schema descriptions objects - jsonschema for
|
| 39 | inline standard JSON Schema definitions of JSON inputs, delimitedschema
|
| 40 | for delimited data input using a defined JSON description, unstructured
|
| 41 | for unstructured text, and reference that allows a pointer to another
|
| 42 | artifact for a schema. The reference allows for XML schema, but can be
|
| 43 | used as a pointer to JSON, Delimited Format, and Unstructured schemas as
|
| 44 | well.
|
| 45 |
|
| 46 | The current Meta Schema implementation is defined below:
|
| 47 |
|
| 48 | ::
|
| 49 |
|
| 50 | {
|
| 51 | "$schema": "http://json-schema.org/draft-04/schema#",
|
| 52 | "title": "Data format specification schema Version 1.0",
|
| 53 | "type": "object",
|
| 54 | "oneOf": [{
|
| 55 | "properties": {
|
| 56 | "self": {
|
| 57 | "$ref": "#/definitions/self"
|
| 58 | },
|
| 59 | "dataformatversion": {
|
| 60 | "$ref": "#/definitions/dataformatversion"
|
| 61 | },
|
| 62 | "reference": {
|
| 63 |
|
| 64 | "type": "object",
|
| 65 | "description": "A reference to an external schema - name/version is used to access the artifact",
|
| 66 | "properties": {
|
| 67 | "name": {
|
| 68 | "$ref": "#/definitions/name"
|
| 69 | },
|
| 70 | "version": {
|
| 71 | "$ref": "#/definitions/version"
|
| 72 | },
|
| 73 | "format": {
|
| 74 | "$ref": "#/definitions/format"
|
| 75 | }
|
| 76 | },
|
| 77 | "required": [
|
| 78 | "name",
|
| 79 | "version",
|
| 80 | "format"
|
| 81 | ],
|
| 82 | "additionalProperties": false
|
| 83 | }
|
| 84 | },
|
| 85 | "required": ["self", "dataformatversion", "reference"],
|
| 86 | "additionalProperties": false
|
| 87 | }, {
|
| 88 | "properties": {
|
| 89 | "self": {
|
| 90 | "$ref": "#/definitions/self"
|
| 91 | },
|
| 92 | "dataformatversion": {
|
| 93 | "$ref": "#/definitions/dataformatversion"
|
| 94 | },
|
| 95 | "jsonschema": {
|
| 96 | "$schema": "http://json-schema.org/draft-04/schema#",
|
| 97 | "description": "The actual JSON schema for this data format"
|
| 98 | }
|
| 99 |
|
| 100 | },
|
| 101 | "required": ["self", "dataformatversion", "jsonschema"],
|
| 102 | "additionalProperties": false
|
| 103 | }, {
|
| 104 | "properties": {
|
| 105 | "self": {
|
| 106 | "$ref": "#/definitions/self"
|
| 107 | },
|
| 108 | "dataformatversion": {
|
| 109 | "$ref": "#/definitions/dataformatversion"
|
| 110 | },
|
| 111 | "delimitedschema": {
|
| 112 | "type": "object",
|
| 113 | "description": "A JSON schema for delimited files",
|
| 114 | "properties": {
|
| 115 | "delimiter": {
|
| 116 | "enum": [",", "|", "\t", ";"]
|
| 117 | },
|
| 118 | "fields": {
|
| 119 | "type": "array",
|
| 120 | "description": "Array of field descriptions",
|
| 121 | "items": {
|
| 122 | "$ref": "#/definitions/field"
|
| 123 | }
|
| 124 | }
|
| 125 | },
|
| 126 | "additionalProperties": false
|
| 127 | }
|
| 128 | },
|
| 129 | "required": ["self", "dataformatversion", "delimitedschema"],
|
| 130 | "additionalProperties": false
|
| 131 | }, {
|
| 132 | "properties": {
|
| 133 | "self": {
|
| 134 | "$ref": "#/definitions/self"
|
| 135 | },
|
| 136 | "dataformatversion": {
|
| 137 | "$ref": "#/definitions/dataformatversion"
|
| 138 | },
|
| 139 | "unstructured": {
|
| 140 | "type": "object",
|
| 141 | "description": "A JSON schema for unstructured text",
|
| 142 | "properties": {
|
| 143 | "encoding": {
|
| 144 | "type": "string",
|
| 145 | "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"]
|
| 146 | }
|
| 147 | },
|
| 148 | "additionalProperties": false
|
| 149 |
|
| 150 | }
|
| 151 | },
|
| 152 | "required": ["self", "dataformatversion", "unstructured"],
|
| 153 | "additionalProperties": false
|
| 154 | }],
|
| 155 | "definitions": {
|
| 156 | "name": {
|
| 157 | "type": "string"
|
| 158 | },
|
| 159 | "version": {
|
| 160 | "type": "string",
|
| 161 | "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$"
|
| 162 | },
|
| 163 | "self": {
|
| 164 | "description": "Identifying Information for the Data Format - name/version can be used to access the artifact",
|
| 165 | "type": "object",
|
| 166 | "properties": {
|
| 167 | "name": {
|
| 168 | "$ref": "#/definitions/name"
|
| 169 | },
|
| 170 | "version": {
|
| 171 | "$ref": "#/definitions/version"
|
| 172 | },
|
| 173 | "description": {
|
| 174 | "type": "string"
|
| 175 | }
|
| 176 | },
|
| 177 | "required": [
|
| 178 | "name",
|
| 179 | "version"
|
| 180 | ],
|
| 181 | "additionalProperties": false
|
| 182 | },
|
| 183 | "format": {
|
| 184 | "description": "Reference schema type",
|
| 185 | "type": "string",
|
| 186 | "enum": [
|
| 187 | "JSON",
|
| 188 | "Delimited Format",
|
| 189 | "XML",
|
| 190 | "Unstructured"
|
| 191 | ]
|
| 192 | },
|
| 193 | "field": {
|
| 194 | "description": "A field definition for the delimited schema",
|
| 195 | "type": "object",
|
| 196 | "properties": {
|
| 197 | "name": {
|
| 198 | "type": "string"
|
| 199 | },
|
| 200 | "description": {
|
| 201 | "type": "string"
|
| 202 | },
|
| 203 | "fieldtype": {
|
| 204 | "description": "the field type - from the XML schema types",
|
| 205 | "type": "string",
|
| 206 | "enum": ["string", "boolean",
|
| 207 | "decimal", "float", "double",
|
| 208 | "duration", "dateTime", "time",
|
| 209 | "date", "gYearMonth", "gYear",
|
| 210 | "gMonthDay", "gDay", "gMonth",
|
| 211 | "hexBinary", "base64Binary",
|
| 212 | "anyURI", "QName", "NOTATION",
|
| 213 | "normalizedString", "token",
|
| 214 | "language", "IDREFS", "ENTITIES",
|
| 215 | "NMTOKEN", "NMTOKENS", "Name",
|
| 216 | "NCName", "ID", "IDREF", "ENTITY",
|
| 217 | "integer", "nonPositiveInteger",
|
| 218 | "negativeInteger", "long", "int",
|
| 219 | "short", "byte",
|
| 220 | "nonNegativeInteger", "unsignedLong",
|
| 221 | "unsignedInt", "unsignedShort",
|
| 222 | "unsignedByte", "positiveInteger"
|
| 223 |
|
| 224 | ]
|
| 225 | },
|
| 226 | "fieldPattern": {
|
| 227 | "description": "Regular expression that defines the field format",
|
| 228 | "type": "integer"
|
| 229 | },
|
| 230 | "fieldMaxLength": {
|
| 231 | "description": "The maximum length of the field",
|
| 232 | "type": "integer"
|
| 233 | },
|
| 234 | "fieldMinLength": {
|
| 235 | "description": "The minimum length of the field",
|
| 236 | "type": "integer"
|
| 237 | },
|
| 238 | "fieldMinimum": {
|
| 239 | "description": "The minimum numeric value of the field",
|
| 240 | "type": "integer"
|
| 241 | },
|
| 242 | "fieldMaximum": {
|
| 243 | "description": "The maximum numeric value of the field",
|
| 244 | "type": "integer"
|
| 245 | }
|
| 246 | },
|
| 247 | "additionalProperties": false
|
| 248 | },
|
| 249 | "dataformatversion": {
|
| 250 | "type": "string",
|
| 251 | "enum": ["1.0.0"]
|
| 252 | }
|
| 253 | }
|
| 254 | }
|
| 255 |
|
| 256 | Examples
|
| 257 | -----------
|
| 258 |
|
| 259 | By reference example - Common Event Format
|
| 260 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 261 |
|
| 262 | First the full JSON schema description of the Common Event Format would
|
| 263 | be loaded with a name of “Common Event Format” and the current version
|
| 264 | of “25.0.0”.
|
| 265 |
|
| 266 | Then the data format description is loaded by this schema:
|
| 267 |
|
| 268 | ::
|
| 269 |
|
| 270 | {
|
| 271 | "self": {
|
| 272 | "name": "Common Event Format Definition",
|
| 273 | "version": "25.0.0",
|
| 274 | "description": "Common Event Format Definition"
|
| 275 |
|
| 276 | },
|
| 277 | "dataformatversion": "1.0.0",
|
| 278 | "reference": {
|
| 279 | "name": "Common Event Format",
|
| 280 | "format": "JSON",
|
| 281 | "version": "25.0.0"
|
| 282 | }
|
| 283 | }
|
| 284 |
|
| 285 |
|
| 286 |
|
| 287 | Simple JSON Example
|
| 288 | ~~~~~~~~~~~~~~~~~~~~~~~~
|
| 289 |
|
| 290 |
|
| 291 | ::
|
| 292 |
|
| 293 | {
|
| 294 | "self": {
|
| 295 | "name": "Simple JSON Example",
|
| 296 | "version": "1.0.0",
|
| 297 | "description": "An example of unnested JSON schema for Input and output"
|
| 298 |
|
| 299 | },
|
| 300 | "dataformatversion": "1.0.0",
|
| 301 | "jsonschema": {
|
| 302 | "$schema": "http://json-schema.org/draft-04/schema#",
|
| 303 | "type": "object",
|
| 304 | "properties": {
|
| 305 | "raw-text": {
|
| 306 | "type": "string"
|
| 307 | }
|
| 308 | },
|
| 309 | "required": ["raw-text"],
|
| 310 | "additionalProperties": false
|
| 311 | }
|
| 312 | }
|
| 313 |
|
| 314 | Nested JSON Example
|
| 315 | ~~~~~~~~~~~~~~~~~~~~~~~~
|
| 316 |
|
| 317 | ::
|
| 318 |
|
| 319 | {
|
| 320 | "self": {
|
| 321 | "name": "Nested JSON Example",
|
| 322 | "version": "1.0.0",
|
| 323 | "description": "An example of nested JSON schema for Input and output"
|
| 324 |
|
| 325 | },
|
| 326 | "dataformatversion": "1.0.0",
|
| 327 | "jsonschema": {
|
| 328 | "$schema": "http://json-schema.org/draft-04/schema#",
|
| 329 | "properties": {
|
| 330 | "numFound": {
|
| 331 | "type": "integer"
|
| 332 | },
|
| 333 | "start": {
|
| 334 | "type": "integer"
|
| 335 | },
|
| 336 | "engagements": {
|
| 337 | "type": "array",
|
| 338 | "items": {
|
| 339 | "properties": {
|
| 340 | "engagementID": {
|
| 341 | "type": "string",
|
| 342 | "transcript": {
|
| 343 | "type": "array",
|
| 344 | "items": {
|
| 345 | "type": {
|
| 346 | "type": "string"
|
| 347 | },
|
| 348 | "content": {
|
| 349 | "type": "string"
|
| 350 | },
|
| 351 | "senderName": {
|
| 352 | "type": "string"
|
| 353 | },
|
| 354 | "iso": {
|
| 355 | "type": "string"
|
| 356 | },
|
| 357 | "timestamp": {
|
| 358 | "type": "integer"
|
| 359 | },
|
| 360 | "senderId": {
|
| 361 | "type": "string"
|
| 362 | }
|
| 363 | }
|
| 364 | }
|
| 365 | }
|
| 366 | }
|
| 367 | }
|
| 368 | }
|
| 369 | },
|
| 370 | "additionalProperties": false
|
| 371 | }
|
| 372 | }
|
| 373 |
|
| 374 | Unstructured Example
|
| 375 | ~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 376 |
|
| 377 | ::
|
| 378 |
|
| 379 | {
|
| 380 | "self": {
|
| 381 | "name": "Unstructured Text Example",
|
| 382 | "version": "25.0.0",
|
| 383 | "description": "An example of a unstructured text used for both input and output for "
|
| 384 |
|
| 385 | },
|
| 386 | "dataformatversion": "1.0.0",
|
| 387 | "unstructured": {
|
| 388 | "encoding": "UTF-8"
|
| 389 | }
|
| 390 | }
|
| 391 |
|
| 392 |
|
| 393 | An example of a delimited schema
|
| 394 | --------------------------------
|
| 395 |
|
| 396 | ::
|
| 397 |
|
| 398 | {
|
| 399 | "self": {
|
| 400 | "name": "Delimited Format Example",
|
| 401 | "version": "1.0.0",
|
| 402 | "description": "Delimited format example just for testing"
|
| 403 |
|
| 404 | },
|
| 405 | "dataformatversion": "1.0.0",
|
| 406 | "delimitedschema": {
|
| 407 | "delimiter": "|",
|
| 408 | "fields": [{
|
| 409 | "name": "field1",
|
| 410 | "description": "test field1",
|
| 411 | "fieldtype": "string"
|
| 412 | }, {
|
| 413 | "name": "field2",
|
| 414 | "description": "test field2",
|
| 415 | "fieldtype": "boolean"
|
| 416 | }]
|
| 417 | }
|
| 418 | }
|