-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Process: get neighbourhood | ||
# Transform | ||
loading done in: 403.5775247858837 | ||
(1376877, 6946046) | ||
mining done in: 40191.05462627811 | ||
total time: 40594.632151064 |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Process: get neighbourhood | ||
# Transform | ||
loading done in: 683.5099277440459 | ||
(2198871, 5647489) | ||
mining done in: 37982.90089613525 | ||
total time: 38666.4108238793 |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from ink.base.structure import InkExtractor | ||
from ink.miner.rulemining import RuleSetMiner | ||
from rdflib_hdt import HDTStore | ||
from ink.base.connectors import RDFLibConnector, StardogConnector, AbstractConnector | ||
from sklearn.metrics import accuracy_score | ||
import sys | ||
import pickle | ||
import glob | ||
from tqdm import tqdm | ||
from rdflib import URIRef, Graph, Literal | ||
from timeit import default_timer as timer | ||
|
||
class HDTConnector(AbstractConnector): | ||
|
||
def query(self, q_str): | ||
global store | ||
#if self.all_requests.check(q_str): | ||
# return self.all_requests.get(q_str) | ||
try: | ||
# res = graph.query(q_str) | ||
noi = URIRef(q_str.split('"')[1]) | ||
res = store.hdt_document.search((noi, None, None))[0] | ||
val = [{"p": {"value": r[1].toPython()}, "o": {"value": r[2].n3().split('"')[1]}} if isinstance(r[2], | ||
Literal) else { | ||
"p": {"value": r[1].toPython()}, "o": {"value": r[2].toPython()}} for r in res] | ||
return val | ||
# return json.loads(res.serialize(format="json"))#['results']['bindings'] | ||
except Exception as e: | ||
# print(e) | ||
return [] | ||
|
||
def get_all_entities(self): | ||
global store | ||
res = store.hdt_document.search((None, None, None))[0] | ||
entities = set() | ||
for r in tqdm(res): | ||
entities.add(r[0].toPython()) | ||
#entities.add(r[2].toPython()) | ||
return entities | ||
|
||
|
||
file = sys.argv[1] | ||
store = HDTStore(file) | ||
|
||
if __name__ == '__main__': | ||
start = timer() | ||
connector = HDTConnector() | ||
extractor = InkExtractor(connector, verbose=True) | ||
pos = connector.get_all_entities() | ||
|
||
X_train, y_train = extractor.create_dataset(2, pos, None, jobs=32) | ||
X_train = extractor.fit_transform(X_train) | ||
end = timer() | ||
print("loading done in: ", str(end-start)) | ||
model = RuleSetMiner(support=100, max_len_rule_set=3, verbose=True, rule_complexity=int(sys.argv[3])) | ||
print(X_train[0].shape) | ||
|
||
rf = model.fit(X_train) | ||
end2 = timer() | ||
print("mining done in:",str(end2-end)) | ||
rf.to_csv(sys.argv[2]+'.csv') | ||
|
||
print("total time:",str(end2-start)) |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Process: get neighbourhood | ||
# Transform | ||
loading done in: 776.8928480511531 | ||
(2990435, 2983585) | ||
mining done in: 28198.46978243813 | ||
total time: 28975.362630489282 |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Process: get neighbourhood | ||
# Transform | ||
loading done in: 47.07918471284211 | ||
(470483, 607102) | ||
mining done in: 211.44440980697982 | ||
total time: 258.5235945198219 |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#Process: get neighbourhood | ||
# Transform | ||
loading done in: 770.2709990260191 | ||
(1653880, 1057539) | ||
mining done in: 2388.7974461987615 | ||
total time: 3159.0684452247806 |
Large diffs are not rendered by default.
Large diffs are not rendered by default.