blob: 0927d987485d0140cd1c58c5ecb96298620a39a2 [file] [log] [blame]
Mandeep Khinda71bd84c2018-08-29 21:45:05 +00001[{
Geora Barsky417b3b42018-08-17 13:31:36 -04002 "name": "whitespace_analyzer",
Mandeep Khinda71bd84c2018-08-29 21:45:05 +00003 "description": "A standard whitespace analyzer.",
4 "behaviours": [
5 "Tokenize the text using white space characters as delimeters.",
6 "Convert all characters to lower case.",
7 "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents."
8 ],
9 "tokenizer": "whitespace",
10 "filters": [
11 "lowercase",
12 "asciifolding"
13 ]
Geora Barsky417b3b42018-08-17 13:31:36 -040014 },
15 {
16 "name": "ngram_analyzer",
Mandeep Khinda71bd84c2018-08-29 21:45:05 +000017 "description": "An analyzer which performs ngram filtering on the data stream.",
18 "behaviours": [
19 "Tokenize the text using white space characters as delimeters.",
20 "Convert all characters to lower case.",
21 "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.",
22 "Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2."
23 ],
24 "tokenizer": "whitespace",
25 "filters": [
26 "lowercase",
27 "asciifolding",
28 "ngram_filter"
29 ]
Geora Barsky417b3b42018-08-17 13:31:36 -040030 }
31]