-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathomekasToRDF.py
222 lines (150 loc) · 6.3 KB
/
omekasToRDF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/python
# -*- coding: utf-8 -*-
import requests
import json
import rdflib
import logging
import os
from datetime import date, datetime, timedelta
from zipfile import ZipFile
from triplesCreation import *
from constants import *
#TODO check trailing / in API_PATH
def alterFilesPermissions():
# RDF files
for (r, d, f) in os.walk(FILES_REPOSITORY):
for file in f:
try:
filePath = os.path.join(r, file)
os.chmod(filePath, 0o644)
except:
logging.exception('Permission update execpetion: ',
exc_info=True)
next
# Create a backup archive containing current RDF base content
def createBackup():
yesterday = datetime.now() - timedelta(days=1)
# Create archive file
if os.path.exists(BACKUP_REPOSITORY):
archiveFile = BACKUP_REPOSITORY + yesterday.strftime('%Y%m%d') \
+ '_base_rdf' + '.zip'
# Add all RDF files to the archive (see https://docs.python.org/2.7/library/zipfile.html for documentation)
with ZipFile(archiveFile, 'w') as zipObj:
# Iterate over all the files in directory
for (r, d, f) in os.walk(FILES_REPOSITORY):
for file in f:
# create complete filepath of file in directory
filePath = os.path.join(r, file)
# Add file to zip
zipObj.write(filePath)
os.chmod(archiveFile, 0o644)
else:
logging.exception('The backup repository with path "'
+ BACKUP_REPOSITORY + '" has not been found.')
sys.exit("Quitting script...please create the backup repository first.")
# Clean repository by removing X days old file (default set to 30 days)
# Log and archive files names start with the date with format "YYYYmmdd"
def cleanRepository():
today = date.today()
# Clean log files repository
for (r, d, f) in os.walk(LOGS_REPOSITORY):
for file in f:
filePath = os.path.join(r, file)
try:
creationDate = datetime.strptime(file[:8], '%Y%m%d')
# Remove x days old file
if (today - datetime.date(creationDate)).days \
> MAX_DAYS:
os.remove(os.path.join(r, file))
except:
logging.exception('Exception message:', exc_info=True)
next
# Clean backup archives repository
for (r, d, f) in os.walk(BACKUP_REPOSITORY):
for file in f:
filePath = os.path.join(r, file)
try:
creationDate = datetime.strptime(file[:8], '%Y%m%d')
# Remove X days old file
if (today - datetime.date(creationDate)).days \
> MAX_DAYS:
os.remove(os.path.join(r, file))
except:
next
# Configure logger (see https://realpython.com/python-logging/ for details)
def configureLogging():
today = date.today()
if os.path.exists(LOGS_REPOSITORY):
logfile = LOGS_REPOSITORY \
+ today.strftime('%Y%m%d_RDF_db_update') + '.log'
logging.basicConfig(filename=logfile,
format='%(levelname)s - %(asctime)s - %(message)s'
, level=logging.INFO)
# Last log file
os.chmod(logfile, 0o644)
else:
logging.exception('The logs repository with path "'
+ LOGS_REPOSITORY + '" has not been found.')
sys.exit("Quitting script...please create the logs repository first.")
#Retrieve Omeka S vocabularies and save prefixes with associated URI in the namespaces object
def saveNamespaces():
logging.info('---- Calling ' + API_PATH + VOCABULARIES + " ----")
response = requests.get(API_PATH + VOCABULARIES)
if response:
vocabularies = response.json()
if len(vocabularies) > 0:
for vocab in vocabularies:
namespaces[vocab["o:prefix"]] = vocab["o:namespace_uri"]
logging.info("Add namespace " + vocab["o:namespace_uri"] + " with prefix " + vocab["o:prefix"])
# Get Omeka S resource by making REST API calls
# Save items, medias or collections to RDF base (several files)
def saveResources(category):
graph = initializeRDFdatabase()
callOver = False
page = 0
# The call is split into several pages to avoid loosing data
while not callOver:
# Python request package is used to make the HTTP call
# See https://realpython.com/python-requests/ for examples
logging.info('---- Calling ' + API_PATH + category + " ----")
stringParams = {'page': page, 'per_page': RESULTS_PER_PAGE}
response = requests.get(API_PATH + category, stringParams)
# Response ok if status code between 200 and 400
if response:
resources = response.json()
if len(resources) > 0:
logging.info('Page number ' + str(page) + ' with '
+ str(len(resources)) + ' resources.')
page += 1
if category == ITEMS:
createItemsTriples(resources, graph)
elif category == MEDIAS:
createMediasTriples(resources, graph)
else:
createCollectionsTriples(resources, graph)
else:
callOver = True
logging.info('No further data to fetch. Call is over for '
+ str(category) + '.')
else:
logging.error('An error has occured. Response code: '
+ str(response.status_code))
# Save graph resources to a RDF file
saveGraphToFile(graph, category, FORMAT)
#### Main program ####
# Add backup archive and remove old files
createBackup()
cleanRepository()
# Instanciate and configure a logger
configureLogging()
logging.info('RDF database update initialization.')
saveNamespaces()
logging.info('Starting items creation.')
saveResources(ITEMS)
logging.info('Starting medias creation.')
saveResources(MEDIAS)
logging.info('Starting collections creation.')
saveResources(COLLECTIONS)
logging.info('Updating files permissions')
alterFilesPermissions()
logging.info('RDF database successfully updated.')