forked from clarin-eric/ParlaMint
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
113 lines (103 loc) · 5.48 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
## We use this Makefile also for testing scripts:
META = $F/DataXI/ParlaMint-CZ/ParlaMint-CZ.xml
cl-test:
#$s -xsl:../Scripts/check-links.xsl ${META}
$s meta=${META} -xsl:../Scripts/check-links.xsl \
$F/DataXI/ParlaMint-CZ/ParlaMint-CZ_2016-04-13-ps2013-044-02-013-114.xml
F = ../../Factorized
f-test: f-test-root f-test-anaroot f-test-component f-test-anacomponent f-test-splits f-test-odd
f-test-root:
$j ParlaMint-teiCorpus.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ.xml
$j ParlaMint-teiCorpus.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ.xml
$j ParlaMint-teiCorpus.rng $F/Data/ParlaMint-BE/ParlaMint-BE.xml
$j ParlaMint-teiCorpus.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE.xml
f-test-anaroot:
$j ParlaMint-teiCorpus.ana.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ.ana.xml
$j ParlaMint-teiCorpus.ana.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ.ana.xml
$j ParlaMint-teiCorpus.ana.rng $F/Data/ParlaMint-BE/ParlaMint-BE.ana.xml
$j ParlaMint-teiCorpus.ana.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE.ana.xml
f-test-component:
$j ParlaMint-TEI.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ_2016-04-13-ps2013-044-02-013-114.xml
$j ParlaMint-TEI.rng $F/Data/ParlaMint-BE/ParlaMint-BE_2014-06-30-54-plenair-ip002x.xml
f-test-anacomponent:
$j ParlaMint-TEI.ana.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ_2016-04-13-ps2013-044-02-013-114.ana.xml
$j ParlaMint-TEI.ana.rng $F/Data/ParlaMint-BE/ParlaMint-BE_2014-06-30-54-plenair-ip002x.ana.xml
f-test-splits:
$j ParlaMint-taxonomy.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ-taxonomy-*.xml
$j ParlaMint-taxonomy.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE-taxonomy-*.xml
$j ParlaMint-listOrg.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ-listOrg.xml
$j ParlaMint-listOrg.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE-listOrg.xml
$j ParlaMint-listPerson.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ-listPerson.xml
$j ParlaMint-listPerson.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE-listPerson.xml
f-test-odd:
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ.xml
$j ../TEI/ParlaMint.odd.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-BE/ParlaMint-BE.xml
$j ../TEI/ParlaMint.odd.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ.ana.xml
$j ../TEI/ParlaMint.odd.rng $F/DataXI/ParlaMint-CZ/ParlaMint-CZ.ana.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-BE/ParlaMint-BE.ana.xml
$j ../TEI/ParlaMint.odd.rng $F/DataXI/ParlaMint-BE/ParlaMint-BE.ana.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ_2016-04-13-ps2013-044-02-013-114.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-BE/ParlaMint-BE_2014-06-30-54-plenair-ip002x.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-CZ/ParlaMint-CZ_2016-04-13-ps2013-044-02-013-114.ana.xml
$j ../TEI/ParlaMint.odd.rng $F/Data/ParlaMint-BE/ParlaMint-BE_2014-06-30-54-plenair-ip002x.ana.xml
list:
grep '<element name=' ParlaMint-*.rng | perl -pe 's|.+="||; s|">||' | sort | uniq
nohup:
nohup time make all > compile.log &
all: trang
xall: val trang
#Authored, strangely enough, in .rng!
t = java -jar /usr/share/java/trang.jar
trang: rnc
rnc:
$t ParlaMint-TEI.rng ParlaMint-TEI.rnc
$t ParlaMint-TEI.ana.rng ParlaMint-TEI.ana.rnc
$t ParlaMint-teiCorpus.rng ParlaMint-teiCorpus.rnc
$t ParlaMint-teiCorpus.ana.rng ParlaMint-teiCorpus.ana.rnc
$t ParlaMint-listOrg.rng ParlaMint-listOrg.rnc
$t ParlaMint-listPerson.rng ParlaMint-listPerson.rnc
$t ParlaMint-taxonomy.rng ParlaMint-taxonomy.rnc
#Doesn't work, cf. https://github.com/clarin-eric/ParlaMint/issues/74
xsd:
$t ParlaMint-TEI.rng ParlaMint-TEI.xsd
$t ParlaMint-TEI.ana.rng ParlaMint-TEI.ana.xsd
$t ParlaMint-teiCorpus.rng ParlaMint-teiCorpus.xsd
$t ParlaMint-teiCorpus.ana.rng ParlaMint-teiCorpus.ana.xsd
$t ParlaMint-listOrg.rng ParlaMint-listOrg.xsd
$t ParlaMint-listPerson.rng ParlaMint-listPerson.xsd
$t ParlaMint-taxonomy.rng ParlaMint-taxonomy.xsd
#DATADIR = ../Data
# For testing v3 corpora
DATADIR = ../../ParlaMint-data/Data
ALL = ${DATADIR}/ParlaMint-*/ParlaMint-*.xml
#ALL = ${DATADIR}/ParlaMint-AT/ParlaMint-AT_1996-01-15-020-XX-NRSITZ-00001.ana.xml
ANA = '.ana'
TAX = 'taxonomy'
LST = 'list'
val:
-ls ${ALL} | grep -v ${ANA} | grep -v ${TAX} | grep -v ${LST} | grep -v '_' | xargs ${pc}
-ls ${ALL} | grep -v ${ANA} | grep -v ${TAX} | grep -v ${LST} | grep -v '_' | xargs ${vrt}
-ls ${ALL} | grep -v ${ANA} | grep -v ${TAX} | grep -v ${LST} | grep '_' | xargs ${vct}
-ls ${ALL} | grep ${ANA} | grep -v ${TAX} | grep -v ${LST} | grep -v '_' | xargs ${pc}
-ls ${ALL} | grep ${ANA} | grep -v ${TAX} | grep -v ${LST} | grep -v '_' | xargs ${vra}
-ls ${ALL} | grep ${ANA} | grep -v ${TAX} | grep -v ${LST} | grep '_' | xargs ${vca}
-ls ${DATADIR}/ParlaMint-*/ParlaMint-*listOrg.xml | xargs ${vlo}
-ls ${DATADIR}/ParlaMint-*/ParlaMint-*listPerson.xml | xargs ${vlp}
-ls ${DATADIR}/ParlaMint-*/ParlaMint-*taxonomy*.xml | xargs ${vta}
pc = -I % $s -xi -xsl:../Scripts/copy.xsl % | $j parla-clarin.rng
vrt = $j ParlaMint-teiCorpus.rng # Corpus root / text
vct = $j ParlaMint-TEI.rng # Corpus component / text
vra = $j ParlaMint-teiCorpus.ana.rng # Corpus root / analysed
vca = $j ParlaMint-TEI.ana.rng # Corpus component / analysed
vlo = $j ParlaMint-listOrg.rng # Corpus organisation list
vlp = $j ParlaMint-listPerson.rng # Corpus person list
vta = $j ParlaMint-taxonomy.rng # Corpus taxonomy
s = java -jar /usr/share/java/saxon.jar
j = java -jar /usr/share/java/jing.jar
P = parallel --citation --gnu --halt 2
#Count things like org/@role
what = grep ' corresp=' | grep corresp
what:
cat ${ALL} | ${what}