tech.transparencia.document.item

transparencia.pds.transparencia.tech

{
  "id": "tech.transparencia.document.item",
  "defs": {
    "main": {
      "key": "tid",
      "type": "record",
      "record": {
        "type": "object",
        "required": [
          "title",
          "documentType",
          "source",
          "retrieval",
          "publishedAt",
          "createdAt"
        ],
        "properties": {
          "title": {
            "type": "string",
            "maxLength": 4096,
            "description": "Official or source-provided title of the document.",
            "maxGraphemes": 1024
          },
          "source": {
            "ref": "com.atproto.repo.strongRef",
            "type": "ref",
            "description": "Strong reference to the tech.transparencia.document.source record for the publisher or repository (e.g., DOF, UNFCCC). Identifies which source this document came from."
          },
          "topics": {
            "type": "array",
            "items": {
              "type": "string",
              "maxLength": 512,
              "maxGraphemes": 128
            },
            "maxLength": 30,
            "description": "Free-form topics, tags, or source categories attached to the document."
          },
          "country": {
            "type": "string",
            "maxLength": 2,
            "description": "Primary country connected to the document, as an ISO 3166-1 alpha-2 code (e.g., 'MX', 'BR', 'US'). Omit for international documents."
          },
          "domains": {
            "type": "array",
            "items": {
              "type": "string",
              "maxLength": 128,
              "knownValues": [
                "government",
                "politics",
                "law",
                "justice",
                "environment",
                "climate",
                "education",
                "health",
                "budget",
                "procurement",
                "economy",
                "finance",
                "labor",
                "energy",
                "infrastructure",
                "security",
                "science-technology",
                "society",
                "human-rights",
                "other"
              ],
              "maxGraphemes": 64
            },
            "maxLength": 20,
            "description": "Broad public-interest domains covered by the document. Open set; consumers should tolerate unknown values."
          },
          "issuedAt": {
            "type": "string",
            "format": "datetime",
            "description": "When the issuing authority signed, issued, adopted, or approved the document, if different from publication time."
          },
          "language": {
            "type": "string",
            "format": "language",
            "description": "Primary language of the document content (BCP-47, e.g., 'es-MX', 'en', 'pt-BR')."
          },
          "subtitle": {
            "type": "string",
            "maxLength": 4096,
            "description": "Optional subtitle, section heading, or secondary title.",
            "maxGraphemes": 1024
          },
          "createdAt": {
            "type": "string",
            "format": "datetime",
            "description": "When this AT Protocol record was created."
          },
          "retrieval": {
            "ref": "#retrieval",
            "type": "ref",
            "description": "Per-document retrieval metadata: canonical URLs, MIME type, checksums, file size, and access status of the specific retrieved representation."
          },
          "updatedAt": {
            "type": "string",
            "format": "datetime",
            "description": "When this record was last materially updated."
          },
          "description": {
            "type": "string",
            "maxLength": 10000,
            "description": "Short source-provided description or human-readable abstract. AI summaries should be stored in enrichment records.",
            "maxGraphemes": 2000
          },
          "effectiveAt": {
            "type": "string",
            "format": "datetime",
            "description": "When the document's legal or administrative effects begin, if applicable and explicitly known."
          },
          "identifiers": {
            "type": "array",
            "items": {
              "ref": "#identifier",
              "type": "ref"
            },
            "maxLength": 50,
            "description": "External identifiers such as DOF IDs, UNFCCC symbols, file numbers, docket numbers, ISBNs, or local archival IDs. For content hashes use retrieval.sha256; for URLs use retrieval.url."
          },
          "publishedAt": {
            "type": "string",
            "format": "datetime",
            "description": "When the document was published by the source. Use midnight UTC when only a calendar date is available."
          },
          "documentType": {
            "type": "string",
            "maxLength": 128,
            "description": "Machine-readable document category. Open set; known values cover common official and institutional documents.",
            "knownValues": [
              "official-publication",
              "official-gazette-issue",
              "official-gazette-entry",
              "law",
              "decree",
              "agreement",
              "notice",
              "regulation",
              "standard",
              "report",
              "audit-report",
              "budget-document",
              "contract",
              "procurement-document",
              "court-ruling",
              "legislative-bill",
              "legislative-opinion",
              "treaty",
              "submission",
              "technical-paper",
              "environmental-impact-document",
              "education-policy-document",
              "dataset-documentation",
              "meeting-minutes",
              "resolution",
              "other"
            ]
          },
          "jurisdiction": {
            "type": "string",
            "maxLength": 256,
            "description": "Legal or administrative jurisdiction covered by the document (e.g., 'federal', 'state', 'municipal', 'international').",
            "knownValues": [
              "local",
              "municipal",
              "state",
              "federal",
              "national",
              "regional",
              "international",
              "supranational",
              "unknown"
            ],
            "maxGraphemes": 64
          },
          "issuingBodies": {
            "type": "array",
            "items": {
              "ref": "tech.transparencia.defs#organization",
              "type": "ref"
            },
            "maxLength": 20,
            "description": "Organizations, public bodies, institutions, or authorities responsible for issuing, publishing, filing, or adopting the document. Uses the shared tech.transparencia.defs#organization type. Conventional role values include 'publisher', 'issuer', 'author', 'adopter', 'filer', 'regulator', 'court', 'legislature', 'repository'."
          }
        }
      },
      "description": "Core document metadata for official and institutional documents. Stores identity, provenance, and public context, but not full text, sections, chunks, AI analysis, or ingestion pipeline state."
    },
    "retrieval": {
      "type": "object",
      "required": [
        "url",
        "retrievedAt"
      ],
      "properties": {
        "url": {
          "type": "string",
          "format": "uri",
          "description": "URL where this document was found or retrieved."
        },
        "pdfUrl": {
          "type": "string",
          "format": "uri",
          "description": "PDF or downloadable document URL, if available."
        },
        "sha256": {
          "type": "string",
          "maxLength": 64,
          "description": "SHA-256 checksum of the retrieved file or canonical source payload, if available."
        },
        "htmlUrl": {
          "type": "string",
          "format": "uri",
          "description": "HTML landing page or web version of the document, if available."
        },
        "license": {
          "type": "string",
          "maxLength": 512,
          "description": "Per-document license override, if the document is licensed differently from the source-level default.",
          "maxGraphemes": 128
        },
        "fileName": {
          "type": "string",
          "maxLength": 1024,
          "description": "Original or normalized file name, if applicable.",
          "maxGraphemes": 256
        },
        "mimeType": {
          "type": "string",
          "maxLength": 128,
          "description": "MIME type of the retrieved representation (e.g., 'text/html', 'application/pdf')."
        },
        "sourceId": {
          "type": "string",
          "maxLength": 512,
          "description": "Source-system identifier for deduplication, if provided by the upstream source."
        },
        "sizeBytes": {
          "type": "integer",
          "minimum": 0,
          "description": "Size of the retrieved file or canonical representation in bytes."
        },
        "accessType": {
          "type": "string",
          "maxLength": 64,
          "description": "Access status of the source at retrieval time. Use 'previously-public' for documents that were once publicly accessible but have since been withdrawn or removed by the source.",
          "knownValues": [
            "public",
            "restricted",
            "paywalled",
            "previously-public",
            "unknown"
          ]
        },
        "retrievedAt": {
          "type": "string",
          "format": "datetime",
          "description": "When the source was retrieved by the pipeline."
        },
        "canonicalUrl": {
          "type": "string",
          "format": "uri",
          "description": "Canonical, normalized, or preferred public URL for the document."
        }
      },
      "description": "Per-document retrieval metadata for a single retrieved representation. Publisher-level metadata (name, base URL, license) lives on the tech.transparencia.document.source record referenced by 'source'."
    },
    "identifier": {
      "type": "object",
      "required": [
        "type",
        "value"
      ],
      "properties": {
        "url": {
          "type": "string",
          "format": "uri",
          "description": "Optional URL where this identifier can be resolved or verified."
        },
        "type": {
          "type": "string",
          "maxLength": 128,
          "description": "Identifier type or namespace.",
          "knownValues": [
            "dof_id",
            "dof_publication_id",
            "unfccc_symbol",
            "official_file_number",
            "docket_number",
            "case_number",
            "law_number",
            "isbn",
            "issn",
            "doi",
            "other"
          ]
        },
        "value": {
          "type": "string",
          "maxLength": 1024,
          "description": "Identifier value.",
          "maxGraphemes": 256
        }
      },
      "description": "External identifier assigned to a document by a source system, authority, archive, or standard. For content hashes use retrieval.sha256; for URLs use retrieval.url."
    }
  },
  "$type": "com.atproto.lexicon.schema",
  "lexicon": 1,
  "description": "A canonical public-interest document record. Represents one official publication, report, filing, act, submission, or other source document before structural parsing or AI enrichment."
}

Validate Record

Validate a record against tech.transparencia.document.item

Validation Options
Automatically resolve and include external schemas for full validation
Treat any remaining unresolved references as valid

Metadata

DID
did:plc:top57c5tklg2fjl66hpidi45
CID
bafyreie3kpd2fu2t4bepjfyfi5lyttbxkgsk67esgllqzpsbmcbqvtj5xi
Indexed At
2026-05-18 18:55 UTC
AT-URI
at://did:plc:top57c5tklg2fjl66hpidi45/com.atproto.lexicon.schema/tech.transparencia.document.item

Version History (3 versions)

Referenced Schemas (1)

Lexicon Garden

@