blob: ecb019c22dfd74c315df16d5cac49ab839950932 [file] [log] [blame]
Ralph Knag1fca6ac2017-12-05 12:05:57 -05001.. This work is licensed under a Creative Commons Attribution 4.0 International License.
2.. http://creativecommons.org/licenses/by/4.0
3
4.. _data-formats:
5
6Data Formats
7============
8
9| Because the DCAE designer composes your component with others at
10 service design time, in most cases you do not know what specific
11 component(s) your component will send data to during runtime. Thus, it
12 is vital that DCAE has a language of describing the data passed
13 between components, so that it is known which components are
14 composable with others. Data formats are descriptions of datathey are
15 the data contract between your component and other components. You
16 need to describe the available outputs and assumed inputs of your
17 components as data formats. These data descriptions are onboarded into
18 ASDC, and each receives a UUID. If component X outputs data format
19 DF-Y, and another component Z specifies DF-Y as their input data
20 format, then X is said to be *composable* with that component. The
21 data formats are referenced in the component specifications by the
22 data formats id and version.
23| The vision is to have a repository of shared data formats that
24 developers and teams can re-use and also provide them the means to
25 extend and create new custom data formats.
26
27.. _dataformat_metadata:
28
29Meta Schema Definition
30----------------------
31
32The Meta Schema implementation defines how data format JSON schemas
33can be written to define user input. It is itself a JSON schema (thus it
34is a meta schema”). It requires the name of the data format entry, the
35data format entry version and allows a description under self object.
36The meta schema version must be specified as the value of the
37dataformatversion key. Then the input schema itself is described.
38There are four types of schema descriptions objects - jsonschema for
39inline standard JSON Schema definitions of JSON inputs, delimitedschema
40for delimited data input using a defined JSON description, unstructured
41for unstructured text, and reference that allows a pointer to another
42artifact for a schema. The reference allows for XML schema, but can be
43used as a pointer to JSON, Delimited Format, and Unstructured schemas as
44well.
45
46The current Meta Schema implementation is defined below:
47
48::
49
50 {
51 "$schema": "http://json-schema.org/draft-04/schema#",
52 "title": "Data format specification schema Version 1.0",
53 "type": "object",
54 "oneOf": [{
55 "properties": {
56 "self": {
57 "$ref": "#/definitions/self"
58 },
59 "dataformatversion": {
60 "$ref": "#/definitions/dataformatversion"
61 },
62 "reference": {
63
64 "type": "object",
65 "description": "A reference to an external schema - name/version is used to access the artifact",
66 "properties": {
67 "name": {
68 "$ref": "#/definitions/name"
69 },
70 "version": {
71 "$ref": "#/definitions/version"
72 },
73 "format": {
74 "$ref": "#/definitions/format"
75 }
76 },
77 "required": [
78 "name",
79 "version",
80 "format"
81 ],
82 "additionalProperties": false
83 }
84 },
85 "required": ["self", "dataformatversion", "reference"],
86 "additionalProperties": false
87 }, {
88 "properties": {
89 "self": {
90 "$ref": "#/definitions/self"
91 },
92 "dataformatversion": {
93 "$ref": "#/definitions/dataformatversion"
94 },
95 "jsonschema": {
96 "$schema": "http://json-schema.org/draft-04/schema#",
97 "description": "The actual JSON schema for this data format"
98 }
99
100 },
101 "required": ["self", "dataformatversion", "jsonschema"],
102 "additionalProperties": false
103 }, {
104 "properties": {
105 "self": {
106 "$ref": "#/definitions/self"
107 },
108 "dataformatversion": {
109 "$ref": "#/definitions/dataformatversion"
110 },
111 "delimitedschema": {
112 "type": "object",
113 "description": "A JSON schema for delimited files",
114 "properties": {
115 "delimiter": {
116 "enum": [",", "|", "\t", ";"]
117 },
118 "fields": {
119 "type": "array",
120 "description": "Array of field descriptions",
121 "items": {
122 "$ref": "#/definitions/field"
123 }
124 }
125 },
126 "additionalProperties": false
127 }
128 },
129 "required": ["self", "dataformatversion", "delimitedschema"],
130 "additionalProperties": false
131 }, {
132 "properties": {
133 "self": {
134 "$ref": "#/definitions/self"
135 },
136 "dataformatversion": {
137 "$ref": "#/definitions/dataformatversion"
138 },
139 "unstructured": {
140 "type": "object",
141 "description": "A JSON schema for unstructured text",
142 "properties": {
143 "encoding": {
144 "type": "string",
145 "enum": ["ASCII", "UTF-8", "UTF-16", "UTF-32"]
146 }
147 },
148 "additionalProperties": false
149
150 }
151 },
152 "required": ["self", "dataformatversion", "unstructured"],
153 "additionalProperties": false
154 }],
155 "definitions": {
156 "name": {
157 "type": "string"
158 },
159 "version": {
160 "type": "string",
161 "pattern": "^(\\d+\\.)(\\d+\\.)(\\*|\\d+)$"
162 },
163 "self": {
164 "description": "Identifying Information for the Data Format - name/version can be used to access the artifact",
165 "type": "object",
166 "properties": {
167 "name": {
168 "$ref": "#/definitions/name"
169 },
170 "version": {
171 "$ref": "#/definitions/version"
172 },
173 "description": {
174 "type": "string"
175 }
176 },
177 "required": [
178 "name",
179 "version"
180 ],
181 "additionalProperties": false
182 },
183 "format": {
184 "description": "Reference schema type",
185 "type": "string",
186 "enum": [
187 "JSON",
188 "Delimited Format",
189 "XML",
190 "Unstructured"
191 ]
192 },
193 "field": {
194 "description": "A field definition for the delimited schema",
195 "type": "object",
196 "properties": {
197 "name": {
198 "type": "string"
199 },
200 "description": {
201 "type": "string"
202 },
203 "fieldtype": {
204 "description": "the field type - from the XML schema types",
205 "type": "string",
206 "enum": ["string", "boolean",
207 "decimal", "float", "double",
208 "duration", "dateTime", "time",
209 "date", "gYearMonth", "gYear",
210 "gMonthDay", "gDay", "gMonth",
211 "hexBinary", "base64Binary",
212 "anyURI", "QName", "NOTATION",
213 "normalizedString", "token",
214 "language", "IDREFS", "ENTITIES",
215 "NMTOKEN", "NMTOKENS", "Name",
216 "NCName", "ID", "IDREF", "ENTITY",
217 "integer", "nonPositiveInteger",
218 "negativeInteger", "long", "int",
219 "short", "byte",
220 "nonNegativeInteger", "unsignedLong",
221 "unsignedInt", "unsignedShort",
222 "unsignedByte", "positiveInteger"
223
224 ]
225 },
226 "fieldPattern": {
227 "description": "Regular expression that defines the field format",
228 "type": "integer"
229 },
230 "fieldMaxLength": {
231 "description": "The maximum length of the field",
232 "type": "integer"
233 },
234 "fieldMinLength": {
235 "description": "The minimum length of the field",
236 "type": "integer"
237 },
238 "fieldMinimum": {
239 "description": "The minimum numeric value of the field",
240 "type": "integer"
241 },
242 "fieldMaximum": {
243 "description": "The maximum numeric value of the field",
244 "type": "integer"
245 }
246 },
247 "additionalProperties": false
248 },
249 "dataformatversion": {
250 "type": "string",
251 "enum": ["1.0.0"]
252 }
253 }
254 }
255
256Examples
257-----------
258
259By reference example - Common Event Format
260~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
261
262First the full JSON schema description of the Common Event Format would
263be loaded with a name of Common Event Format and the current version
264of 25.0.0”.
265
266Then the data format description is loaded by this schema:
267
268::
269
270 {
271 "self": {
272 "name": "Common Event Format Definition",
273 "version": "25.0.0",
274 "description": "Common Event Format Definition"
275
276 },
277 "dataformatversion": "1.0.0",
278 "reference": {
279 "name": "Common Event Format",
280 "format": "JSON",
281 "version": "25.0.0"
282 }
283 }
284
285
286
287Simple JSON Example
288~~~~~~~~~~~~~~~~~~~~~~~~
289
290
291::
292
293 {
294 "self": {
295 "name": "Simple JSON Example",
296 "version": "1.0.0",
297 "description": "An example of unnested JSON schema for Input and output"
298
299 },
300 "dataformatversion": "1.0.0",
301 "jsonschema": {
302 "$schema": "http://json-schema.org/draft-04/schema#",
303 "type": "object",
304 "properties": {
305 "raw-text": {
306 "type": "string"
307 }
308 },
309 "required": ["raw-text"],
310 "additionalProperties": false
311 }
312 }
313
314Nested JSON Example
315~~~~~~~~~~~~~~~~~~~~~~~~
316
317::
318
319 {
320 "self": {
321 "name": "Nested JSON Example",
322 "version": "1.0.0",
323 "description": "An example of nested JSON schema for Input and output"
324
325 },
326 "dataformatversion": "1.0.0",
327 "jsonschema": {
328 "$schema": "http://json-schema.org/draft-04/schema#",
329 "properties": {
330 "numFound": {
331 "type": "integer"
332 },
333 "start": {
334 "type": "integer"
335 },
336 "engagements": {
337 "type": "array",
338 "items": {
339 "properties": {
340 "engagementID": {
341 "type": "string",
342 "transcript": {
343 "type": "array",
344 "items": {
345 "type": {
346 "type": "string"
347 },
348 "content": {
349 "type": "string"
350 },
351 "senderName": {
352 "type": "string"
353 },
354 "iso": {
355 "type": "string"
356 },
357 "timestamp": {
358 "type": "integer"
359 },
360 "senderId": {
361 "type": "string"
362 }
363 }
364 }
365 }
366 }
367 }
368 }
369 },
370 "additionalProperties": false
371 }
372 }
373
374Unstructured Example
375~~~~~~~~~~~~~~~~~~~~~~~~~
376
377::
378
379 {
380 "self": {
381 "name": "Unstructured Text Example",
382 "version": "25.0.0",
383 "description": "An example of a unstructured text used for both input and output for "
384
385 },
386 "dataformatversion": "1.0.0",
387 "unstructured": {
388 "encoding": "UTF-8"
389 }
390 }
391
392
393An example of a delimited schema
394--------------------------------
395
396::
397
398 {
399 "self": {
400 "name": "Delimited Format Example",
401 "version": "1.0.0",
402 "description": "Delimited format example just for testing"
403
404 },
405 "dataformatversion": "1.0.0",
406 "delimitedschema": {
407 "delimiter": "|",
408 "fields": [{
409 "name": "field1",
410 "description": "test field1",
411 "fieldtype": "string"
412 }, {
413 "name": "field2",
414 "description": "test field2",
415 "fieldtype": "boolean"
416 }]
417 }
418 }