tech.transparencia.document.item

transparencia.pds.transparencia.tech

Schema Diff

+11 -0

From

CID
bafyreie3kpd2fu2...
Indexed At
2026-05-18 18:55 UTC
View this version

To

CID
bafyreif4gc5j4rr...
Indexed At
2026-05-19 03:28 UTC
View this version

Compatibility Analysis

Backward Compatible

Backward compatible. 2 non-breaking changes.

Non-Breaking Changes (2)
  • AddedVertex AddedVertex { vertex_id: "tech.transparencia.document.item#retrieval.blob" }
  • AddedEdge AddedEdge { src: "tech.transparencia.document.item#retrieval", tgt: "tech.transparencia.document.item#retrieval.blob", kind: "prop", name: Some("blob") }

Migration Guidance

Added Elements

  • AddedVertex { vertex_id: "tech.transparencia.document.item#retrieval.blob" }

Additional Notes

  • Non-breaking: AddedEdge { src: "tech.transparencia.document.item#retrieval", tgt: "tech.transparencia.document.item#retrieval.blob", kind: "prop", name: Some("blob") }
1 1
{
2 2
  "id": "tech.transparencia.document.item",
3 3
  "defs": {
4 4
    "main": {
5 5
      "key": "tid",
6 6
      "type": "record",
7 7
      "record": {
8 8
        "type": "object",
9 9
        "required": [
10 10
          "title",
11 11
          "documentType",
12 12
          "source",
13 13
          "retrieval",
14 14
          "publishedAt",
15 15
          "createdAt"
16 16
        ],
17 17
        "properties": {
18 18
          "title": {
19 19
            "type": "string",
20 20
            "maxLength": 4096,
21 21
            "description": "Official or source-provided title of the document.",
22 22
            "maxGraphemes": 1024
23 23
          },
24 24
          "source": {
25 25
            "ref": "com.atproto.repo.strongRef",
26 26
            "type": "ref",
27 27
            "description": "Strong reference to the tech.transparencia.document.source record for the publisher or repository (e.g., DOF, UNFCCC). Identifies which source this document came from."
28 28
          },
29 29
          "topics": {
30 30
            "type": "array",
31 31
            "items": {
32 32
              "type": "string",
33 33
              "maxLength": 512,
34 34
              "maxGraphemes": 128
35 35
            },
36 36
            "maxLength": 30,
37 37
            "description": "Free-form topics, tags, or source categories attached to the document."
38 38
          },
39 39
          "country": {
40 40
            "type": "string",
41 41
            "maxLength": 2,
42 42
            "description": "Primary country connected to the document, as an ISO 3166-1 alpha-2 code (e.g., 'MX', 'BR', 'US'). Omit for international documents."
43 43
          },
44 44
          "domains": {
45 45
            "type": "array",
46 46
            "items": {
47 47
              "type": "string",
48 48
              "maxLength": 128,
49 49
              "knownValues": [
50 50
                "government",
51 51
                "politics",
52 52
                "law",
53 53
                "justice",
54 54
                "environment",
55 55
                "climate",
56 56
                "education",
57 57
                "health",
58 58
                "budget",
59 59
                "procurement",
60 60
                "economy",
61 61
                "finance",
62 62
                "labor",
63 63
                "energy",
64 64
                "infrastructure",
65 65
                "security",
66 66
                "science-technology",
67 67
                "society",
68 68
                "human-rights",
69 69
                "other"
70 70
              ],
71 71
              "maxGraphemes": 64
72 72
            },
73 73
            "maxLength": 20,
74 74
            "description": "Broad public-interest domains covered by the document. Open set; consumers should tolerate unknown values."
75 75
          },
76 76
          "issuedAt": {
77 77
            "type": "string",
78 78
            "format": "datetime",
79 79
            "description": "When the issuing authority signed, issued, adopted, or approved the document, if different from publication time."
80 80
          },
81 81
          "language": {
82 82
            "type": "string",
83 83
            "format": "language",
84 84
            "description": "Primary language of the document content (BCP-47, e.g., 'es-MX', 'en', 'pt-BR')."
85 85
          },
86 86
          "subtitle": {
87 87
            "type": "string",
88 88
            "maxLength": 4096,
89 89
            "description": "Optional subtitle, section heading, or secondary title.",
90 90
            "maxGraphemes": 1024
91 91
          },
92 92
          "createdAt": {
93 93
            "type": "string",
94 94
            "format": "datetime",
95 95
            "description": "When this AT Protocol record was created."
96 96
          },
97 97
          "retrieval": {
98 98
            "ref": "#retrieval",
99 99
            "type": "ref",
100 100
            "description": "Per-document retrieval metadata: canonical URLs, MIME type, checksums, file size, and access status of the specific retrieved representation."
101 101
          },
102 102
          "updatedAt": {
103 103
            "type": "string",
104 104
            "format": "datetime",
105 105
            "description": "When this record was last materially updated."
106 106
          },
107 107
          "description": {
108 108
            "type": "string",
109 109
            "maxLength": 10000,
110 110
            "description": "Short source-provided description or human-readable abstract. AI summaries should be stored in enrichment records.",
111 111
            "maxGraphemes": 2000
112 112
          },
113 113
          "effectiveAt": {
114 114
            "type": "string",
115 115
            "format": "datetime",
116 116
            "description": "When the document's legal or administrative effects begin, if applicable and explicitly known."
117 117
          },
118 118
          "identifiers": {
119 119
            "type": "array",
120 120
            "items": {
121 121
              "ref": "#identifier",
122 122
              "type": "ref"
123 123
            },
124 124
            "maxLength": 50,
125 125
            "description": "External identifiers such as DOF IDs, UNFCCC symbols, file numbers, docket numbers, ISBNs, or local archival IDs. For content hashes use retrieval.sha256; for URLs use retrieval.url."
126 126
          },
127 127
          "publishedAt": {
128 128
            "type": "string",
129 129
            "format": "datetime",
130 130
            "description": "When the document was published by the source. Use midnight UTC when only a calendar date is available."
131 131
          },
132 132
          "documentType": {
133 133
            "type": "string",
134 134
            "maxLength": 128,
135 135
            "description": "Machine-readable document category. Open set; known values cover common official and institutional documents.",
136 136
            "knownValues": [
137 137
              "official-publication",
138 138
              "official-gazette-issue",
139 139
              "official-gazette-entry",
140 140
              "law",
141 141
              "decree",
142 142
              "agreement",
143 143
              "notice",
144 144
              "regulation",
145 145
              "standard",
146 146
              "report",
147 147
              "audit-report",
148 148
              "budget-document",
149 149
              "contract",
150 150
              "procurement-document",
151 151
              "court-ruling",
152 152
              "legislative-bill",
153 153
              "legislative-opinion",
154 154
              "treaty",
155 155
              "submission",
156 156
              "technical-paper",
157 157
              "environmental-impact-document",
158 158
              "education-policy-document",
159 159
              "dataset-documentation",
160 160
              "meeting-minutes",
161 161
              "resolution",
162 162
              "other"
163 163
            ]
164 164
          },
165 165
          "jurisdiction": {
166 166
            "type": "string",
167 167
            "maxLength": 256,
168 168
            "description": "Legal or administrative jurisdiction covered by the document (e.g., 'federal', 'state', 'municipal', 'international').",
169 169
            "knownValues": [
170 170
              "local",
171 171
              "municipal",
172 172
              "state",
173 173
              "federal",
174 174
              "national",
175 175
              "regional",
176 176
              "international",
177 177
              "supranational",
178 178
              "unknown"
179 179
            ],
180 180
            "maxGraphemes": 64
181 181
          },
182 182
          "issuingBodies": {
183 183
            "type": "array",
184 184
            "items": {
185 185
              "ref": "tech.transparencia.defs#organization",
186 186
              "type": "ref"
187 187
            },
188 188
            "maxLength": 20,
189 189
            "description": "Organizations, public bodies, institutions, or authorities responsible for issuing, publishing, filing, or adopting the document. Uses the shared tech.transparencia.defs#organization type. Conventional role values include 'publisher', 'issuer', 'author', 'adopter', 'filer', 'regulator', 'court', 'legislature', 'repository'."
190 190
          }
191 191
        }
192 192
      },
193 193
      "description": "Core document metadata for official and institutional documents. Stores identity, provenance, and public context, but not full text, sections, chunks, AI analysis, or ingestion pipeline state."
194 194
    },
195 195
    "retrieval": {
196 196
      "type": "object",
197 197
      "required": [
198 198
        "url",
199 199
        "retrievedAt"
200 200
      ],
201 201
      "properties": {
202 202
        "url": {
203 203
          "type": "string",
204 204
          "format": "uri",
205 205
          "description": "URL where this document was found or retrieved."
206 206
        },
207 +
        "blob": {
208 +
          "type": "blob",
209 +
          "accept": [
210 +
            "application/pdf",
211 +
            "text/html",
212 +
            "text/plain",
213 +
            "application/json"
214 +
          ],
215 +
          "maxSize": 50000000,
216 +
          "description": "Optional binary attachment preserving the actual document bytes on this PDS. Typical contents: the source PDF, an HTML snapshot, the extracted plain text used by the enrichment pipeline, or the original upstream JSON payload (e.g., a SIDOF response). The other retrieval fields (url, pdfUrl, sha256) still reference the original public source — this blob is the archived copy. Max 50 MB."
217 +
        },
207 218
        "pdfUrl": {
208 219
          "type": "string",
209 220
          "format": "uri",
210 221
          "description": "PDF or downloadable document URL, if available."
211 222
        },
212 223
        "sha256": {
213 224
          "type": "string",
214 225
          "maxLength": 64,
215 226
          "description": "SHA-256 checksum of the retrieved file or canonical source payload, if available."
216 227
        },
217 228
        "htmlUrl": {
218 229
          "type": "string",
219 230
          "format": "uri",
220 231
          "description": "HTML landing page or web version of the document, if available."
221 232
        },
222 233
        "license": {
223 234
          "type": "string",
224 235
          "maxLength": 512,
225 236
          "description": "Per-document license override, if the document is licensed differently from the source-level default.",
226 237
          "maxGraphemes": 128
227 238
        },
228 239
        "fileName": {
229 240
          "type": "string",
230 241
          "maxLength": 1024,
231 242
          "description": "Original or normalized file name, if applicable.",
232 243
          "maxGraphemes": 256
233 244
        },
234 245
        "mimeType": {
235 246
          "type": "string",
236 247
          "maxLength": 128,
237 248
          "description": "MIME type of the retrieved representation (e.g., 'text/html', 'application/pdf')."
238 249
        },
239 250
        "sourceId": {
240 251
          "type": "string",
241 252
          "maxLength": 512,
242 253
          "description": "Source-system identifier for deduplication, if provided by the upstream source."
243 254
        },
244 255
        "sizeBytes": {
245 256
          "type": "integer",
246 257
          "minimum": 0,
247 258
          "description": "Size of the retrieved file or canonical representation in bytes."
248 259
        },
249 260
        "accessType": {
250 261
          "type": "string",
251 262
          "maxLength": 64,
252 263
          "description": "Access status of the source at retrieval time. Use 'previously-public' for documents that were once publicly accessible but have since been withdrawn or removed by the source.",
253 264
          "knownValues": [
254 265
            "public",
255 266
            "restricted",
256 267
            "paywalled",
257 268
            "previously-public",
258 269
            "unknown"
259 270
          ]
260 271
        },
261 272
        "retrievedAt": {
262 273
          "type": "string",
263 274
          "format": "datetime",
264 275
          "description": "When the source was retrieved by the pipeline."
265 276
        },
266 277
        "canonicalUrl": {
267 278
          "type": "string",
268 279
          "format": "uri",
269 280
          "description": "Canonical, normalized, or preferred public URL for the document."
270 281
        }
271 282
      },
272 283
      "description": "Per-document retrieval metadata for a single retrieved representation. Publisher-level metadata (name, base URL, license) lives on the tech.transparencia.document.source record referenced by 'source'."
273 284
    },
274 285
    "identifier": {
275 286
      "type": "object",
276 287
      "required": [
277 288
        "type",
278 289
        "value"
279 290
      ],
280 291
      "properties": {
281 292
        "url": {
282 293
          "type": "string",
283 294
          "format": "uri",
284 295
          "description": "Optional URL where this identifier can be resolved or verified."
285 296
        },
286 297
        "type": {
287 298
          "type": "string",
288 299
          "maxLength": 128,
289 300
          "description": "Identifier type or namespace.",
290 301
          "knownValues": [
291 302
            "dof_id",
292 303
            "dof_publication_id",
293 304
            "unfccc_symbol",
294 305
            "official_file_number",
295 306
            "docket_number",
296 307
            "case_number",
297 308
            "law_number",
298 309
            "isbn",
299 310
            "issn",
300 311
            "doi",
301 312
            "other"
302 313
          ]
303 314
        },
304 315
        "value": {
305 316
          "type": "string",
306 317
          "maxLength": 1024,
307 318
          "description": "Identifier value.",
308 319
          "maxGraphemes": 256
309 320
        }
310 321
      },
311 322
      "description": "External identifier assigned to a document by a source system, authority, archive, or standard. For content hashes use retrieval.sha256; for URLs use retrieval.url."
312 323
    }
313 324
  },
314 325
  "$type": "com.atproto.lexicon.schema",
315 326
  "lexicon": 1,
316 327
  "description": "A canonical public-interest document record. Represents one official publication, report, filing, act, submission, or other source document before structural parsing or AI enrichment."
317 328
}

Compare Other Versions

Lexicon Garden

@