-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWikidataComparison.fs
88 lines (67 loc) · 2.9 KB
/
WikidataComparison.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
module Wikidata.Comparison
open EraKG
open Wikidata.Sparql
let private matchType (op: OperationalPoint) =
op.Type = "station" || op.Type = "passengerStop"
/// from railwayRef to opid
let private matchUOPID (railwayRef: string) (uOPID: string) =
let fill (s: string) (len: int) =
if s.Length < len then
System.String('0', len - s.Length)
else
""
let toOPID (s: string) =
if s.Length > 0 && s.Length <= 5 then
"DE" + (fill s 5) + s.Replace(" ", "0")
else
""
let _matchUOPID (railwayRef: string) (uOPID: string) =
toOPID railwayRef = uOPID.Replace(" ", "0")
|| if uOPID.Contains " " && railwayRef.Length = 4 && railwayRef.Contains " " then
let railwayRefX = railwayRef.Replace(" ", " ")
toOPID railwayRefX = uOPID.Replace(" ", "0") // matches 'TU R' with 'DETU R'
else
false
railwayRef.Split [| ';' |] |> Array.exists (fun s -> _matchUOPID s uOPID)
let private compareByUOPID
(operationalPoints: OperationalPoint[])
(osmEntries: Entry[])
: (OperationalPoint * Entry option)[] =
operationalPoints
|> Array.map (fun op ->
match
osmEntries
|> Array.tryFind (fun entry -> entry.stationCode.IsSome && matchUOPID entry.stationCode.Value op.UOPID)
with
| Some entry -> (op, Some entry)
| None -> (op, None))
let compare (extra: bool) (allOperationalPoints: OperationalPoint[]) (osmEntries: Entry[]) =
let operationalPoints =
allOperationalPoints |> Array.filter (fun op -> matchType op)
let result = compareByUOPID operationalPoints osmEntries
let operationalPointsFoundPhase1 =
result |> Array.filter (fun (_, entry) -> entry.IsSome)
let operationalPointsNotFoundPhase1 =
result
|> Array.filter (fun (_, entry) -> entry.IsNone)
|> Array.map (fun (op, _) -> op)
let operationalPointsFoundPhase2 = [||]
let operationalPointsFound =
Array.concat
[ operationalPointsFoundPhase1
operationalPointsFoundPhase2 |> Array.map (fun (op, entry) -> (op, Some entry)) ]
let countPointsFound = operationalPointsFound.Length
let operationalPointsNotFound =
operationalPointsNotFoundPhase1
|> Array.filter (fun op ->
operationalPointsFoundPhase2
|> Array.exists (fun (op1, _) -> op.UOPID = op1.UOPID)
|> not)
let countPointsNotFound = operationalPointsNotFound.Length
if extra then
operationalPointsNotFound
|> Array.iter (fun op -> fprintfn stderr $"{op.UOPID} {op.Name}")
operationalPointsNotFound
|> Array.groupBy (fun op -> op.Type)
|> Array.iter (fun (k, l) -> fprintfn stderr $"type {k}, not found {l.Length}")
fprintfn stderr $"total {operationalPoints.Length}, found {countPointsFound}, not found {countPointsNotFound}"