Skip to content

Commit

Permalink
added more wikidata functions to capture SI unit related subclasses a…
Browse files Browse the repository at this point in the history
…nd units with those classes
  • Loading branch information
stuchalk committed Dec 21, 2024
1 parent 0be3cb0 commit 79ce30b
Show file tree
Hide file tree
Showing 11 changed files with 79,989 additions and 14,138 deletions.
140 changes: 134 additions & 6 deletions dashboard/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from units.functions import *
from wdfunctions import *

choice = 'wdu'
choice = 'wdsic'

local = timezone("America/New_York")

Expand Down Expand Up @@ -667,9 +667,20 @@
for fld in flds:
if isinstance(unit[fld], dict): # when units are retrieved in code (not from download file)
unit[fld] = unit[fld]['value'] # data is in this format {'type': '???', 'value': '???'}
if isinstance(unit['facunit'], dict):
unit['facunit'] = unit['facunit']['value']
facu = Wdunits.objects.filter(uurl=unit['facunit'])
if not facu:
if isinstance(unit['unit'], dict):
unit['unit'] = unit['unit']['value']
if unit['unit'] == 'degree Celsius':
continue # it's an equation, not a factor...
print('factor unit not in DB yet')
continue

# add unit
wu = Wdunits(cls=unit['cls'], unit=unit['unit'], quant=unit['quant'], factor=unit['factor'],
wdfacunit_id=unit['facunit'], curl=unit['curl'], uurl=unit['uurl'], qurl=unit['qurl'],
wdfacunit_id=facu[0].id, curl=unit['curl'], uurl=unit['uurl'], qurl=unit['qurl'],
added=date.today(), updated=dt)
action = "added"
else:
Expand All @@ -692,9 +703,8 @@
unit['unit'] = unit['unit']['value']
if unit['unit'] == 'degree Celsius':
continue # it's an equation, not a factor...
print('factor unit not in DB')
print(unit['unit'])
exit()
print('factor unit not in DB yet')
continue
wu.wdfacunit_id = facu[0].id
wu.save()
action = "found"
Expand All @@ -704,8 +714,10 @@
cls = Wdclasses.objects.filter(url__exact=unit['curl'])
if cls:
wu.wdclass_id = cls[0].id
print(wu.__dict__)

# save to wdunits table

wu.save()

# add any unit reps to the representations table
Expand Down Expand Up @@ -747,7 +759,7 @@

print(action + " '" + wu.unit + "' (" + str(wu.id) + ")")
cnt += 1
if cnt > 999:
if cnt > 3999:
exit()

# get a list of quantities on wikidata
Expand Down Expand Up @@ -812,6 +824,13 @@
else:
print("already added " + quant['name'])

# get a list of quantities part of SI related classes
if choice == 'wdsic':
siclss = wdsiclss()
for sicls in siclss:
print(sicls)
exit()

# check wdquants data against the quantities data
if choice == 'wdqchk':
qs = Wdquants.objects.all().values('quant', 'sect')
Expand All @@ -821,3 +840,112 @@
if not found:
print(q)
exit()

# check how to get all data out for witidata units (for newview template)
if choice == 'wdudata':
wdunit = Wdunits.objects.get(id=35)
quants = wdunit.wdquantswdunits_set.all()
reps = wdunit.representations_set.all()

print(wdunit.__dict__)
print(quants[0].__dict__)
print(reps[0].__dict__)

# qkinds = wdunit.wdclasses_set.all()

# populate the wdquants_wdunits table (run once)
if choice == 'wduqks':
dt = local.localize(datetime.now())
# get list of quantity id and names
utmp = Wdunits.objects.all().values('id', 'unit').order_by('unit')
qtmp = Wdquants.objects.all().values('id', 'name').order_by('name')
unts, qnts = {}, {}
for q in qtmp:
qnts.update({q['name']: q['id']})
for u in utmp:
unts.update({u['unit']: u['id']})

units = None
file = f'umis_units_query_121724.json'
if os.path.exists(os.path.join(BASE_DIR, STATIC_URL, file)):
# read in the file (open function has read as default so not added)
with open(os.path.join(BASE_DIR, STATIC_URL, file)) as f:
tmp = f.read()
units = json.loads(tmp)
f.close()

for unt in units:
if unt['unit'] not in unts.keys() or unt['quant'] not in qnts.keys():
print(unt['quant'] + ":" + unt['unit'] + " not found")
continue

uq, created = WdquantsWdunits.objects.get_or_create(wdquant_id=qnts[unt['quant']], wdunit_id=unts[unt['unit']])
if created:
print("added '" + unt['quant'] + ":" + unt['unit'])
uq.updated = dt
uq.save()
else:
print("already added " + unt['quant'] + ":" + unt['unit'])

if choice == 'wduidx':
raw = Wdunits.objects.all().order_by('wdclass__quant', 'unit')
data = {}
for u in raw:
# print(u.wdclass.__dict__)
qujoins = u.wdquantswdunits_set.all()
for qujoin in qujoins:
quant = qujoin.wdquant.name
if quant not in data.keys():
data.update({quant: []})
unit = {'id': u.id, 'name': u.unit}
data[quant].append(unit)

# link the units systems to wdunits (not many <10% are identified), also add the ISQ if wdclass is defined
if choice == 'wdusyss':
dt = local.localize(datetime.now())
# get list of quantity id and names
utmp = Wdunits.objects.all().values('id', 'unit').order_by('unit')
stmp = Unitsystems.objects.filter(wdurl__isnull=False).values('id', 'wdurl').order_by('id')
ctmp = Wdunits.objects.filter(wdclass_id__isnull=False).values('unit', 'id').order_by('unit')
syss, unts, ucls = {}, {}, {}
for u in utmp:
unts.update({u['unit']: u['id']})
for s in stmp:
syss.update({s['wdurl'].replace('https://www.wikidata.org/wiki/', ''): s['id']})
for c in ctmp:
ucls.update({c['unit']: c['id']})

units = None
file = f'umis_units_query_121924.json'
if os.path.exists(os.path.join(BASE_DIR, STATIC_URL, file)):
# read in the file (open function has read as default so not added)
with open(os.path.join(BASE_DIR, STATIC_URL, file)) as f:
tmp = f.read()
units = json.loads(tmp)
f.close()

print(syss)
for unt in units:
uw, created = None, None
if 'usys' in unt.keys():
usys = unt['usys'].replace('http://www.wikidata.org/entity/', '')
if usys in syss.keys() and unt['unit'] in unts.keys():
uw, created = UnitsystemsWdunits.objects.get_or_create(
unitsystem_id=syss[usys], wdunit_id=unts[unt['unit']])
else:
usys = 1
if unt['unit'] in ucls.keys():
uw, created = UnitsystemsWdunits.objects.get_or_create(
unitsystem_id=usys, wdunit_id=ucls[unt['unit']])
else:
usys = 1
if unt['unit'] in ucls.keys():
uw, created = UnitsystemsWdunits.objects.get_or_create(
unitsystem_id=usys, wdunit_id=ucls[unt['unit']])
if created:
print("added " + str(usys) + ":" + unt['unit'])
uw.updated = dt
uw.save()
else:
print("already added " + str(usys) + ":" + unt['unit'])

25 changes: 25 additions & 0 deletions dashboard/wdfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ def wdunits():

def wdquants():
""" get list of units of measurement instances under 'unit of' subclasses """
# filtering based on ISO sources (incomplete) for example (currently limits responses severely)
# FILTER(?srcid IN(wd:Q109490582,wd:Q90137277,wd:Q73391977,wd:Q92157468,wd:Q117847945,
# wd:Q80232369,wd:Q99839315,wd:Q86976044,wd:Q85490171,wd:Q100957475))
query = """
SELECT DISTINCT ?qntid ?quant ?isq ?source ?sect
WHERE
Expand Down Expand Up @@ -106,3 +109,25 @@ def wdquants():
# return data
wdjsn = json.loads(json.dumps(wd['results']['bindings']))
return wdjsn


def wdsiclss():
""" get list of unit of measurement subclasses starting with 'unit of'"""
query = """
SELECT ?curl ?cls ?uurl ?unit WHERE {
?curl wdt:P279* wd:Q47574 ;
rdfs:label ?cls .
?uurl wdt:P31 ?curl ;
rdfs:label ?unit .
FILTER(?curl IN (wd:Q223662, wd:Q208469, wd:Q61610698, wd:Q87252761, wd:Q68618328, wd:Q1618549, wd:Q21684377,
wd:Q69197847, wd:Q99734981, wd:Q26240, wd:Q106839753, wd:Q3268848, wd:Q106839917))
FILTER(LANG(?cls) = "en")
FILTER(LANG(?unit) = "en")
}
ORDER BY ?cls
"""
# search wikidata sparql query
wd = return_sparql_query_results(query)
# return data
wdjsn = json.loads(json.dumps(wd['results']['bindings']))
return wdjsn
Loading

0 comments on commit 79ce30b

Please sign in to comment.