tech.transparencia.document.item

transparencia.pds.transparencia.tech

Schema Diff

+2 -2

From

CID
bafyreibkl6cvow3...
Indexed At
2026-05-18 19:45 UTC
View this version

To

CID
bafyreif4gc5j4rr...
Indexed At
2026-05-19 03:28 UTC
View this version

Compatibility Analysis

Backward Compatible

No changes detected.

1 1
{
2 2
  "id": "tech.transparencia.document.item",
3 3
  "defs": {
4 4
    "main": {
5 5
      "key": "tid",
6 6
      "type": "record",
7 7
      "record": {
8 8
        "type": "object",
9 9
        "required": [
10 10
          "title",
11 11
          "documentType",
12 12
          "source",
13 13
          "retrieval",
14 14
          "publishedAt",
15 15
          "createdAt"
16 16
        ],
17 17
        "properties": {
18 18
          "title": {
19 19
            "type": "string",
20 20
            "maxLength": 4096,
21 21
            "description": "Official or source-provided title of the document.",
22 22
            "maxGraphemes": 1024
23 23
          },
24 24
          "source": {
25 25
            "ref": "com.atproto.repo.strongRef",
26 26
            "type": "ref",
27 27
            "description": "Strong reference to the tech.transparencia.document.source record for the publisher or repository (e.g., DOF, UNFCCC). Identifies which source this document came from."
28 28
          },
29 29
          "topics": {
30 30
            "type": "array",
31 31
            "items": {
32 32
              "type": "string",
33 33
              "maxLength": 512,
34 34
              "maxGraphemes": 128
35 35
            },
36 36
            "maxLength": 30,
37 37
            "description": "Free-form topics, tags, or source categories attached to the document."
38 38
          },
39 39
          "country": {
40 40
            "type": "string",
41 41
            "maxLength": 2,
42 42
            "description": "Primary country connected to the document, as an ISO 3166-1 alpha-2 code (e.g., 'MX', 'BR', 'US'). Omit for international documents."
43 43
          },
44 44
          "domains": {
45 45
            "type": "array",
46 46
            "items": {
47 47
              "type": "string",
48 48
              "maxLength": 128,
49 49
              "knownValues": [
50 50
                "government",
51 51
                "politics",
52 52
                "law",
53 53
                "justice",
54 54
                "environment",
55 55
                "climate",
56 56
                "education",
57 57
                "health",
58 58
                "budget",
59 59
                "procurement",
60 60
                "economy",
61 61
                "finance",
62 62
                "labor",
63 63
                "energy",
64 64
                "infrastructure",
65 65
                "security",
66 66
                "science-technology",
67 67
                "society",
68 68
                "human-rights",
69 69
                "other"
70 70
              ],
71 71
              "maxGraphemes": 64
72 72
            },
73 73
            "maxLength": 20,
74 74
            "description": "Broad public-interest domains covered by the document. Open set; consumers should tolerate unknown values."
75 75
          },
76 76
          "issuedAt": {
77 77
            "type": "string",
78 78
            "format": "datetime",
79 79
            "description": "When the issuing authority signed, issued, adopted, or approved the document, if different from publication time."
80 80
          },
81 81
          "language": {
82 82
            "type": "string",
83 83
            "format": "language",
84 84
            "description": "Primary language of the document content (BCP-47, e.g., 'es-MX', 'en', 'pt-BR')."
85 85
          },
86 86
          "subtitle": {
87 87
            "type": "string",
88 88
            "maxLength": 4096,
89 89
            "description": "Optional subtitle, section heading, or secondary title.",
90 90
            "maxGraphemes": 1024
91 91
          },
92 92
          "createdAt": {
93 93
            "type": "string",
94 94
            "format": "datetime",
95 95
            "description": "When this AT Protocol record was created."
96 96
          },
97 97
          "retrieval": {
98 98
            "ref": "#retrieval",
99 99
            "type": "ref",
100 100
            "description": "Per-document retrieval metadata: canonical URLs, MIME type, checksums, file size, and access status of the specific retrieved representation."
101 101
          },
102 102
          "updatedAt": {
103 103
            "type": "string",
104 104
            "format": "datetime",
105 105
            "description": "When this record was last materially updated."
106 106
          },
107 107
          "description": {
108 108
            "type": "string",
109 109
            "maxLength": 10000,
110 110
            "description": "Short source-provided description or human-readable abstract. AI summaries should be stored in enrichment records.",
111 111
            "maxGraphemes": 2000
112 112
          },
113 113
          "effectiveAt": {
114 114
            "type": "string",
115 115
            "format": "datetime",
116 116
            "description": "When the document's legal or administrative effects begin, if applicable and explicitly known."
117 117
          },
118 118
          "identifiers": {
119 119
            "type": "array",
120 120
            "items": {
121 121
              "ref": "#identifier",
122 122
              "type": "ref"
123 123
            },
124 124
            "maxLength": 50,
125 125
            "description": "External identifiers such as DOF IDs, UNFCCC symbols, file numbers, docket numbers, ISBNs, or local archival IDs. For content hashes use retrieval.sha256; for URLs use retrieval.url."
126 126
          },
127 127
          "publishedAt": {
128 128
            "type": "string",
129 129
            "format": "datetime",
130 130
            "description": "When the document was published by the source. Use midnight UTC when only a calendar date is available."
131 131
          },
132 132
          "documentType": {
133 133
            "type": "string",
134 134
            "maxLength": 128,
135 135
            "description": "Machine-readable document category. Open set; known values cover common official and institutional documents.",
136 136
            "knownValues": [
137 137
              "official-publication",
138 138
              "official-gazette-issue",
139 139
              "official-gazette-entry",
140 140
              "law",
141 141
              "decree",
142 142
              "agreement",
143 143
              "notice",
144 144
              "regulation",
145 145
              "standard",
146 146
              "report",
147 147
              "audit-report",
148 148
              "budget-document",
149 149
              "contract",
150 150
              "procurement-document",
151 151
              "court-ruling",
152 152
              "legislative-bill",
153 153
              "legislative-opinion",
154 154
              "treaty",
155 155
              "submission",
156 156
              "technical-paper",
157 157
              "environmental-impact-document",
158 158
              "education-policy-document",
159 159
              "dataset-documentation",
160 160
              "meeting-minutes",
161 161
              "resolution",
162 162
              "other"
163 163
            ]
164 164
          },
165 165
          "jurisdiction": {
166 166
            "type": "string",
167 167
            "maxLength": 256,
168 168
            "description": "Legal or administrative jurisdiction covered by the document (e.g., 'federal', 'state', 'municipal', 'international').",
169 169
            "knownValues": [
170 170
              "local",
171 171
              "municipal",
172 172
              "state",
173 173
              "federal",
174 174
              "national",
175 175
              "regional",
176 176
              "international",
177 177
              "supranational",
178 178
              "unknown"
179 179
            ],
180 180
            "maxGraphemes": 64
181 181
          },
182 182
          "issuingBodies": {
183 183
            "type": "array",
184 184
            "items": {
185 185
              "ref": "tech.transparencia.defs#organization",
186 186
              "type": "ref"
187 187
            },
188 188
            "maxLength": 20,
189 189
            "description": "Organizations, public bodies, institutions, or authorities responsible for issuing, publishing, filing, or adopting the document. Uses the shared tech.transparencia.defs#organization type. Conventional role values include 'publisher', 'issuer', 'author', 'adopter', 'filer', 'regulator', 'court', 'legislature', 'repository'."
190 190
          }
191 191
        }
192 192
      },
193 193
      "description": "Core document metadata for official and institutional documents. Stores identity, provenance, and public context, but not full text, sections, chunks, AI analysis, or ingestion pipeline state."
194 194
    },
195 195
    "retrieval": {
196 196
      "type": "object",
197 197
      "required": [
198 198
        "url",
199 199
        "retrievedAt"
200 200
      ],
201 201
      "properties": {
202 202
        "url": {
203 203
          "type": "string",
204 204
          "format": "uri",
205 205
          "description": "URL where this document was found or retrieved."
206 206
        },
207 207
        "blob": {
208 208
          "type": "blob",
209 209
          "accept": [
210 210
            "application/pdf",
211 211
            "text/html",
212 212
            "text/plain",
213 213
            "application/json"
214 214
          ],
215 -
          "maxSize": 10000000,
216 -
          "description": "Optional binary attachment preserving the actual document bytes on this PDS. Typical contents: the source PDF, an HTML snapshot, the extracted plain text used by the enrichment pipeline, or the original upstream JSON payload (e.g., a SIDOF response). The other retrieval fields (url, pdfUrl, sha256) still reference the original public source — this blob is the archived copy. Max 10 MB."
215 +
          "maxSize": 50000000,
216 +
          "description": "Optional binary attachment preserving the actual document bytes on this PDS. Typical contents: the source PDF, an HTML snapshot, the extracted plain text used by the enrichment pipeline, or the original upstream JSON payload (e.g., a SIDOF response). The other retrieval fields (url, pdfUrl, sha256) still reference the original public source — this blob is the archived copy. Max 50 MB."
217 217
        },
218 218
        "pdfUrl": {
219 219
          "type": "string",
220 220
          "format": "uri",
221 221
          "description": "PDF or downloadable document URL, if available."
222 222
        },
223 223
        "sha256": {
224 224
          "type": "string",
225 225
          "maxLength": 64,
226 226
          "description": "SHA-256 checksum of the retrieved file or canonical source payload, if available."
227 227
        },
228 228
        "htmlUrl": {
229 229
          "type": "string",
230 230
          "format": "uri",
231 231
          "description": "HTML landing page or web version of the document, if available."
232 232
        },
233 233
        "license": {
234 234
          "type": "string",
235 235
          "maxLength": 512,
236 236
          "description": "Per-document license override, if the document is licensed differently from the source-level default.",
237 237
          "maxGraphemes": 128
238 238
        },
239 239
        "fileName": {
240 240
          "type": "string",
241 241
          "maxLength": 1024,
242 242
          "description": "Original or normalized file name, if applicable.",
243 243
          "maxGraphemes": 256
244 244
        },
245 245
        "mimeType": {
246 246
          "type": "string",
247 247
          "maxLength": 128,
248 248
          "description": "MIME type of the retrieved representation (e.g., 'text/html', 'application/pdf')."
249 249
        },
250 250
        "sourceId": {
251 251
          "type": "string",
252 252
          "maxLength": 512,
253 253
          "description": "Source-system identifier for deduplication, if provided by the upstream source."
254 254
        },
255 255
        "sizeBytes": {
256 256
          "type": "integer",
257 257
          "minimum": 0,
258 258
          "description": "Size of the retrieved file or canonical representation in bytes."
259 259
        },
260 260
        "accessType": {
261 261
          "type": "string",
262 262
          "maxLength": 64,
263 263
          "description": "Access status of the source at retrieval time. Use 'previously-public' for documents that were once publicly accessible but have since been withdrawn or removed by the source.",
264 264
          "knownValues": [
265 265
            "public",
266 266
            "restricted",
267 267
            "paywalled",
268 268
            "previously-public",
269 269
            "unknown"
270 270
          ]
271 271
        },
272 272
        "retrievedAt": {
273 273
          "type": "string",
274 274
          "format": "datetime",
275 275
          "description": "When the source was retrieved by the pipeline."
276 276
        },
277 277
        "canonicalUrl": {
278 278
          "type": "string",
279 279
          "format": "uri",
280 280
          "description": "Canonical, normalized, or preferred public URL for the document."
281 281
        }
282 282
      },
283 283
      "description": "Per-document retrieval metadata for a single retrieved representation. Publisher-level metadata (name, base URL, license) lives on the tech.transparencia.document.source record referenced by 'source'."
284 284
    },
285 285
    "identifier": {
286 286
      "type": "object",
287 287
      "required": [
288 288
        "type",
289 289
        "value"
290 290
      ],
291 291
      "properties": {
292 292
        "url": {
293 293
          "type": "string",
294 294
          "format": "uri",
295 295
          "description": "Optional URL where this identifier can be resolved or verified."
296 296
        },
297 297
        "type": {
298 298
          "type": "string",
299 299
          "maxLength": 128,
300 300
          "description": "Identifier type or namespace.",
301 301
          "knownValues": [
302 302
            "dof_id",
303 303
            "dof_publication_id",
304 304
            "unfccc_symbol",
305 305
            "official_file_number",
306 306
            "docket_number",
307 307
            "case_number",
308 308
            "law_number",
309 309
            "isbn",
310 310
            "issn",
311 311
            "doi",
312 312
            "other"
313 313
          ]
314 314
        },
315 315
        "value": {
316 316
          "type": "string",
317 317
          "maxLength": 1024,
318 318
          "description": "Identifier value.",
319 319
          "maxGraphemes": 256
320 320
        }
321 321
      },
322 322
      "description": "External identifier assigned to a document by a source system, authority, archive, or standard. For content hashes use retrieval.sha256; for URLs use retrieval.url."
323 323
    }
324 324
  },
325 325
  "$type": "com.atproto.lexicon.schema",
326 326
  "lexicon": 1,
327 327
  "description": "A canonical public-interest document record. Represents one official publication, report, filing, act, submission, or other source document before structural parsing or AI enrichment."
328 328
}

Compare Other Versions

Lexicon Garden

@