73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468 | class RecordGraphBuilder(object):
"""
Class that can generate the triples necessary to represent a record in rdf format.
"""
def __init__(
self,
record: Record,
namespaces: Namespaces,
output_format: str,
version: Optional[int],
):
"""
:param record: a Record object
:param namespaces: the namespaces object to use
:param output_format: the format the generated graph will be output in. This will be used to
create the metadata URI (i.e. object_url + .{output_format})
:param version: the version of the record in question, or None if there is no version
"""
self.record = record
self.namespaces = namespaces
self.output_format = output_format
self.version = version
# figure out the rounded version of the record
self.rounded_version = toolkit.get_action('vds_version_round')(
{},
{
'resource_id': self.record.resource_id,
'version': self.version,
},
)
# figure out the object URI for the record (possibly with version)
if version is None:
self.base_object_uri = object_uri(record.data['occurrenceID'])
else:
self.base_object_uri = object_uri(
record.data['occurrenceID'], version=self.rounded_version
)
# this will be used as the subject for any record data triples
self.record_ref = URIRef(self.base_object_uri)
# grab the gbif version of the record if we can
self.gbif_record = self._get_gbif_record()
def _get_gbif_record(self):
"""
Retrieve the GBIF representation of the record if we can.
:returns: the GBIF record dict or None if we couldn't get it or didn't have a
GBIF ID associated with the record
"""
gbif_id = self.record.data.get('gbifID', None)
if gbif_id is not None:
try:
context = {'ignore_auth': True}
data_dict = {'gbif_id': gbif_id}
return toolkit.get_action('gbif_record_show')(context, data_dict)
except toolkit.ObjectNotFound:
pass
return None
def __iter__(self):
"""
Iterating over this object will yield the triples that represent the record.
:returns: yields 3-tuples
"""
triple_generators = [
self._metadata(),
self._cetaf_cspp(),
self._gbif(),
self._images(),
self._dwc(),
self._version_info(),
self._extras(),
]
for triple in itertools.chain(*triple_generators):
# if the object part of the triple is None, don't yield it. This is tied to the
# _get_value function on this class which essentially provides a way of avoiding having
# to use lots of if checks (i.e. if field has value, yield)
if triple[2] is not None:
yield triple
def _get_value(self, field, source=None):
"""
Retrieve a value from the given source and yield it wrapped in a Literal. The
default source is the self.record.data dict. This function works in conjunction
with the __iter__ function above which filters out triples containing a None as
the object (the 3rd value).
:param field: the field to get the value of
:param source: the source dict to retrieve the field's value from
:returns: the value wrapped in a Literal or None if the field doesn't exist on
the source
"""
if source is None:
source = self.record.data
value = source.get(field, None)
if value is not None:
return Literal(value)
else:
return None
def _metadata(self):
"""
Yields triples which describe this RDF output, i.e. a meta-metadata description.
:returns: yields triples
"""
metadata_uri = '{}.{}'.format(self.base_object_uri, self.output_format)
meta_ref = URIRef(metadata_uri)
yield meta_ref, self.namespaces.dc.subject, self.record_ref
yield (
meta_ref,
self.namespaces.dc.creator,
Literal('Natural History Museum, London'),
)
yield meta_ref, self.namespaces.dc.created, Literal(datetime.now())
def _cetaf_cspp(self):
"""
Yields the triples required for the record data to ensure we conform to the
CETAF CSPP recommendations, for more info see here:
https://cetafidentifiers.biowikifarm.net/wiki/CSPP.
:returns: yields triples
"""
yield (
self.record_ref,
self.namespaces.dc.title,
self._get_value('scientificName'),
)
yield self.record_ref, self.namespaces.dc.type, Literal('Specimen')
yield (
self.record_ref,
self.namespaces.dwc.scientificName,
self._get_value('scientificName'),
)
yield self.record_ref, self.namespaces.dwc.family, self._get_value('family')
# find the previous determinations for this record and yield them as the
# previousIdentifications term, ignoring the current determination the record is filed as
determination_names = self.record.data.get('determinationNames', [])
filed_as = self.record.data.get('determinationFiledAs', [])
if determination_names and filed_as:
names = (
name
for name, filed in zip(determination_names, filed_as)
if filed == 'No'
)
yield (
self.record_ref,
self.namespaces.dwc.previousIdentifications,
Literal(as_dwc_list(names)),
)
yield (
self.record_ref,
self.namespaces.dwc.fieldNumber,
self._get_value('fieldNumber'),
)
yield (
self.record_ref,
self.namespaces.dwc.recordedBy,
self._get_value('recordedBy'),
)
# if there is associated media, yield it as a list
images = self.record.images
if images:
value = as_dwc_list(image.url for image in images)
yield self.record_ref, self.namespaces.dwc.associatedMedia, Literal(value)
yield (
self.record_ref,
self.namespaces.dwc.decimalLatitude,
self._get_value('decimalLatitude'),
)
yield (
self.record_ref,
self.namespaces.dwc.decimalLongitude,
self._get_value('decimalLongitude'),
)
# if there's a GBIF record, see if we can yield a country code for the record from it
if self.gbif_record is not None:
yield (
self.record_ref,
self.namespaces.dwc.countryCode,
self._get_value('countryCode', source=self.gbif_record),
)
if self.record.data.get('created', None) is not None:
yield (
self.record_ref,
self.namespaces.dc.created,
Literal(self.record.data['created']),
)
yield self.record_ref, self.namespaces.dc.publisher, URIRef('https://nhm.ac.uk')
def _images(self):
"""
Yields triples describing the images associated with this record, if there are
any. Each image is connected to the record through a FOAF depicts field
connection and then the image is described in its own set of triples where the
image URI is used as the subject.
:returns: yields triples
"""
for image in self.record.images:
image_uri = URIRef(image.url)
yield image_uri, self.namespaces.rdf.type, self.namespaces.foaf.Image
yield image_uri, self.namespaces.dc.title, Literal(image.title)
yield image_uri, self.namespaces.cc.license, URIRef(image.license_url)
yield image_uri, self.namespaces.dc.RightsStatement, Literal(image.rights)
# although the actual image could be something else, the preview will always be a jpeg
yield image_uri, self.namespaces.dc.format, Literal('image/jpeg')
# add link from image to object
yield image_uri, self.namespaces.foaf.depicts, self.record_ref
# add a link from the object to the image
yield self.record_ref, self.namespaces.foaf.depiction, image_uri
# add a thumbnail link
if image.is_mss_image:
yield (
image_uri,
self.namespaces.foaf.thumbnail,
URIRef(image.thumbnail_url),
)
def _gbif(self):
"""
Yields triples describing the record using the GBIF record data associated with
it.
:returns: yields triples
"""
if self.gbif_record is not None:
# assert equivalence with the GBIF record
yield (
self.record_ref,
self.namespaces.owl.sameAs,
URIRef(
'https://www.gbif.org/occurrence/{}'.format(
self.gbif_record['gbifID']
)
),
)
# if we have a GBIF country code, add it
yield (
self.record_ref,
self.namespaces.dwc.countryCode,
self._get_value('countryCode', source=self.gbif_record),
)
def _dwc(self):
"""
Yields triples describing the record using DWC (DarWin Core) terms.
:returns: yields triples
"""
yield (
self.record_ref,
self.namespaces.dc.identifier,
Literal(self.record.data['occurrenceID']),
)
dwc_terms_dict = dwc_terms(self.record.data.keys())
groups_to_skip = {'dynamicProperties'}
terms_to_skip = {'associatedMedia', 'created', 'modified'}
for group, terms in dwc_terms_dict.items():
if group in groups_to_skip:
continue
for uri, term in terms.items():
if term in terms_to_skip:
continue
if self.gbif_record is not None:
gbif_key = self.gbif_record.get(f'{term}Key')
if gbif_key:
gbif_uri = URIRef(f'http://www.gbif.org/species/{gbif_key}')
# add the GBIF species URI with label
yield (
gbif_uri,
self.namespaces.rdfs.label,
Literal(self.record.data.get(term)),
)
# and associated our specimen object's DWC term with the GBIF URI
yield (
self.record_ref,
getattr(self.namespaces.dwc, term),
gbif_uri,
)
else:
yield (
self.record_ref,
getattr(self.namespaces.dwc, term),
Literal(self.record.data.get(term)),
)
# retrieve the dynamic properties and yield them as one JSON dump
dynamic_properties_dict = {}
for properties in dwc_terms_dict.get('dynamicProperties', {}).values():
for dynamic_property in properties:
if dynamic_property == 'created':
continue
dynamic_properties_dict[dynamic_property] = self.record.data.get(
dynamic_property
)
if dynamic_properties_dict:
yield (
self.record_ref,
self.namespaces.dwc.dynamicProperties,
Literal(json.dumps(dynamic_properties_dict)),
)
# yield the associatedMedia term as a pipe-separated list of image URIs
images = self.record.images
if images:
yield (
self.record_ref,
self.namespaces.dwc.associatedMedia,
Literal(as_dwc_list(image.url for image in images)),
)
if self.record.data.get('created', None) is not None:
# yield the created date in the correct format
yield (
self.record_ref,
self.namespaces.dc.created,
Literal(self.record.data['created']),
)
if self.record.data.get('modified', None) is not None:
# yield the modified date in the correct format
yield (
self.record_ref,
self.namespaces.dwc.modified,
Literal(self.record.data['modified']),
)
def _version_info(self):
"""
Yield simple version information about the record.
:returns: yields triples
"""
yield (
self.record_ref,
self.namespaces.owl.versionInfo,
Literal(self.rounded_version),
)
if self.version is None or self.version > self.rounded_version:
# if there is no version given or the version requested is beyond the latest version the
# data we're using is the same as the latest version's data, yield a same as to show
# this
yield (
self.record_ref,
self.namespaces.owl.sameAs,
URIRef(
object_uri(
self.record.data['occurrenceID'], version=self.rounded_version
)
),
)
def _extras(self):
"""
Yields some additional triples that don't really fit under any of the other
existing method groupings.
:returns: yields triples
"""
yield (
self.record_ref,
self.namespaces.aiiso.Department,
Literal(get_department(self.record.data['collectionCode'])),
)
yield (
self.record_ref,
self.namespaces.aiiso.Division,
self._get_value('subDepartment'),
)
yield (
self.record_ref,
self.namespaces.void.inDataset,
URIRef(dataset_uri({'id': self.record.package_id}) + '#dataset'),
)
|