Index entry for a WebDataset-backed dataset with references to storage location and sample schema
tid
Timestamp-based ID
Properties
contentMetadata
unknown
Optional
Dataset-level content metadata (e.g., instrument settings, acquisition parameters). Structure is validated against the schema referenced by metadataSchemaRef when present. Stored as an open JSON object.
createdAt
string
datetime
Required
Timestamp when this dataset entry was created
description
string
Optional
Human-readable description of the dataset
maxLength: 5000 byteslicense
string
Optional
License identifier or URL. SPDX identifiers recommended (e.g., MIT, Apache-2.0, CC-BY-4.0) or full SPDX URLs (e.g., http://spdx.org/licenses/MIT). Aligns with Schema.org license property.
maxLength: 200 bytesmetadata
bytes
Optional
Msgpack-encoded metadata dict for arbitrary extended key-value pairs. Use this for additional metadata beyond the core top-level fields (license, tags, size). Top-level fields are preferred for discoverable/searchable metadata.
maxLength: 100000metadataSchemaRef
string
at-uri
Optional
Optional AT-URI reference to a schema record defining the structure of this dataset's content metadata. When present, contentMetadata is validated against this schema at write time.
maxLength: 500 bytesname
string
Required
Human-readable dataset name
maxLength: 200 bytesschemaRef
string
at-uri
Required
AT-URI reference to the schema record for this dataset's samples
maxLength: 500 bytessize
ref
#datasetSize
Optional
Dataset size information (optional)
storage
union
Required
Storage location for dataset files (WebDataset tar archives)
tags
array
of
string
Optional
Searchable tags for dataset discovery. Aligns with Schema.org keywords property.
maxLength: 30 itemsView raw schema
{
"key": "tid",
"type": "record",
"record": {
"type": "object",
"required": [
"name",
"schemaRef",
"storage",
"createdAt"
],
"properties": {
"name": {
"type": "string",
"maxLength": 200,
"description": "Human-readable dataset name"
},
"size": {
"ref": "#datasetSize",
"type": "ref",
"description": "Dataset size information (optional)"
},
"tags": {
"type": "array",
"items": {
"type": "string",
"maxLength": 150
},
"maxLength": 30,
"description": "Searchable tags for dataset discovery. Aligns with Schema.org keywords property."
},
"license": {
"type": "string",
"maxLength": 200,
"description": "License identifier or URL. SPDX identifiers recommended (e.g., MIT, Apache-2.0, CC-BY-4.0) or full SPDX URLs (e.g., http://spdx.org/licenses/MIT). Aligns with Schema.org license property."
},
"storage": {
"refs": [
"science.alt.dataset.storageHttp",
"science.alt.dataset.storageS3",
"science.alt.dataset.storageBlobs"
],
"type": "union",
"description": "Storage location for dataset files (WebDataset tar archives)"
},
"metadata": {
"type": "bytes",
"maxLength": 100000,
"description": "Msgpack-encoded metadata dict for arbitrary extended key-value pairs. Use this for additional metadata beyond the core top-level fields (license, tags, size). Top-level fields are preferred for discoverable/searchable metadata."
},
"createdAt": {
"type": "string",
"format": "datetime",
"description": "Timestamp when this dataset entry was created"
},
"schemaRef": {
"type": "string",
"format": "at-uri",
"maxLength": 500,
"description": "AT-URI reference to the schema record for this dataset's samples"
},
"description": {
"type": "string",
"maxLength": 5000,
"description": "Human-readable description of the dataset"
},
"contentMetadata": {
"type": "unknown",
"description": "Dataset-level content metadata (e.g., instrument settings, acquisition parameters). Structure is validated against the schema referenced by metadataSchemaRef when present. Stored as an open JSON object."
},
"metadataSchemaRef": {
"type": "string",
"format": "at-uri",
"maxLength": 500,
"description": "Optional AT-URI reference to a schema record defining the structure of this dataset's content metadata. When present, contentMetadata is validated against this schema at write time."
}
}
},
"description": "Index entry for a WebDataset-backed dataset with references to storage location and sample schema"
}