forked from collective/collective.solr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
solr-4.4.x-german.cfg
169 lines (153 loc) · 7.13 KB
/
solr-4.4.x-german.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
[buildout]
extends = solr-4.4.x.cfg
parts +=
solr
[settings]
solr-port = 8983
solr-host = localhost
solr-min-ram = 64M
solr-max-ram = 128M
[solr]
recipe = collective.recipe.solrinstance
solr-location = ${solr-download:location}
host = ${settings:solr-host}
port = ${settings:solr-port}
max-num-results = 500
section-name = SOLR
unique-key = UID
default-search-field = Title
default-operator = and
logdir = ${buildout:directory}/var/solr
java_opts =
-Xms${settings:solr-min-ram}
-Xmx${settings:solr-max-ram}
spellcheckField = SearchableText
filter =
text solr.StandardTokenizerFactory
text solr.DictionaryCompoundWordTokenFilterFactory dictionary="${buildout:directory}/etc/german-common-nouns.txt" minWordSize="5" minSubwordSize="4" maxSubwordSize="15" onlyLongestMatch="true"
text solr.StopFilterFactory ignoreCase="true" words="${buildout:directory}/etc/german-stopwords.txt"
# text solr.GermanMinimalStemFilterFactory # Less aggressive
# text solr.GermanLightStemFilterFactory # Moderately aggressiv
# text solr.SnowballPorterFilterFactory language="German2" # More aggressive
# text solr.StemmerOverrideFilterFactory dictionary="${buildout:directory}/etc/german-stemming.txt" ignoreCase="false"
# text solr.KeywordMarkerFilterFactory protected="${buildout:directory}/etc/german-protwords.txt"
filter-index =
filter-query =
text solr.LowerCaseFilterFactory
text solr.SynonymFilterFactory synonyms="${buildout:directory}/etc/german-synonyms.txt" ignoreCase="true"
# Solr Config => parts/solr/solr/collection1/conf/solrconfig.xml
additional-solrconfig =
<!-- Spell Check
The spell check component can return a list of alternative spelling
suggestions.
http://wiki.apache.org/solr/SpellCheckComponent
-->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">SearchableText</str>
<!-- Multiple "Spell Checkers" can be declared and used by this
component
-->
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">SearchableText</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">SearchableText</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
<!-- Custom Spellchecker -->
<lst name="spellchecker">
<str name="name">suggest</str>
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.fst.WFSTLookupFactory</str>
<str name="field">SearchableText</str>
<float name="threshold">0.0005</float>
<str name="buildOnCommit">true</str>
</lst>
</searchComponent>
<!-- A request handler for demonstrating the spellcheck component.
NOTE: This is purely as an example. The whole purpose of the
SpellCheckComponent is to hook it into the request handler that
handles your normal user queries so that a separate request is
not needed to get suggestions.
IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
See http://wiki.apache.org/solr/SpellCheckComponent for details
on the request parameters.
-->
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<!-- Solr will use suggestions from both the 'default' spellchecker
and from the 'wordbreak' spellchecker and combine them.
collations (re-written queries) can include a combination of
corrections from both spellcheckers -->
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck.dictionary">suggest</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
index =
name:allowedRolesAndUsers type:string stored:false multivalued:true
name:created type:date stored:true
name:Creator type:string stored:true
name:Date type:date stored:true
name:default type:text indexed:true stored:false multivalued:true
name:Description type:text copyfield:default stored:true
name:effective type:date stored:true
name:exclude_from_nav type:boolean indexed:false stored:true
name:expires type:date stored:true
name:getIcon type:string indexed:false stored:true
name:getId type:string indexed:false stored:true
name:getRemoteUrl type:string indexed:false stored:true
name:is_folderish type:boolean stored:true
name:Language type:string stored:true
name:modified type:date stored:true
name:object_provides type:string stored:false multivalued:true
name:path_depth type:integer indexed:true stored:false
name:path_parents type:string indexed:true stored:false multivalued:true
name:path_string type:string indexed:false stored:true
name:portal_type type:string stored:true
name:review_state type:string stored:true
name:SearchableText type:text copyfield:default stored:false
name:searchwords type:string stored:false multivalued:true
name:showinsearch type:boolean stored:false
name:Subject type:string copyfield:default stored:true multivalued:true
name:Title type:text copyfield:default stored:true
name:Type type:string stored:true
name:UID type:string stored:true required:true