Mandeep Khinda | 71bd84c | 2018-08-29 21:45:05 +0000 | [diff] [blame^] | 1 | [{ |
Geora Barsky | 417b3b4 | 2018-08-17 13:31:36 -0400 | [diff] [blame] | 2 | "name": "whitespace_analyzer", |
Mandeep Khinda | 71bd84c | 2018-08-29 21:45:05 +0000 | [diff] [blame^] | 3 | "description": "A standard whitespace analyzer.", |
| 4 | "behaviours": [ |
| 5 | "Tokenize the text using white space characters as delimeters.", |
| 6 | "Convert all characters to lower case.", |
| 7 | "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents." |
| 8 | ], |
| 9 | "tokenizer": "whitespace", |
| 10 | "filters": [ |
| 11 | "lowercase", |
| 12 | "asciifolding" |
| 13 | ] |
Geora Barsky | 417b3b4 | 2018-08-17 13:31:36 -0400 | [diff] [blame] | 14 | }, |
| 15 | { |
| 16 | "name": "ngram_analyzer", |
Mandeep Khinda | 71bd84c | 2018-08-29 21:45:05 +0000 | [diff] [blame^] | 17 | "description": "An analyzer which performs ngram filtering on the data stream.", |
| 18 | "behaviours": [ |
| 19 | "Tokenize the text using white space characters as delimeters.", |
| 20 | "Convert all characters to lower case.", |
| 21 | "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.", |
| 22 | "Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2." |
| 23 | ], |
| 24 | "tokenizer": "whitespace", |
| 25 | "filters": [ |
| 26 | "lowercase", |
| 27 | "asciifolding", |
| 28 | "ngram_filter" |
| 29 | ] |
Geora Barsky | 417b3b4 | 2018-08-17 13:31:36 -0400 | [diff] [blame] | 30 | } |
| 31 | ] |