blob: 5fc135df5aedc7fdfb701fba7a3eb0f5ee7d8767 [file] [log] [blame]
toshrajbhardwajf4fc1c62018-08-06 07:35:14 +00001[
2 {
3 "name": "whitespace_analyzer",
4 "description": "A standard whitespace analyzer.",
5 "behaviours": [
6 "Tokenize the text using white space characters as delimeters.",
7 "Convert all characters to lower case.",
8 "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents."
9 ],
10 "tokenizer": "whitespace",
11 "filters": [
12 "lowercase",
13 "asciifolding"
14 ]
15 },
16 {
17 "name": "ngram_analyzer",
18 "description": "An analyzer which performs ngram filtering on the data stream.",
19 "behaviours": [
20 "Tokenize the text using white space characters as delimeters.",
21 "Convert all characters to lower case.",
22 "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.",
23 "Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2."
24 ],
25 "tokenizer": "whitespace",
26 "filters": [
27 "lowercase",
28 "asciifolding",
29 "ngram_filter"
30 ]
31 }
Mandeep Khinda9e6a9102017-08-30 14:37:47 +000032]