Sylvain Desbureaux | 7007041 | 2020-11-09 21:58:48 +0100 | [diff] [blame^] | 1 | [
|
| 2 | {
|
| 3 | "name": "whitespace_analyzer",
|
| 4 | "description": "A standard whitespace analyzer.",
|
| 5 | "behaviours": [
|
| 6 | "Tokenize the text using white space characters as delimeters.",
|
| 7 | "Convert all characters to lower case.",
|
| 8 | "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents."
|
| 9 | ],
|
| 10 | "tokenizer": "whitespace",
|
| 11 | "filters": [
|
| 12 | "lowercase",
|
| 13 | "asciifolding"
|
| 14 | ]
|
| 15 | },
|
| 16 | {
|
| 17 | "name": "ngram_analyzer",
|
| 18 | "description": "An analyzer which performs ngram filtering on the data stream.",
|
| 19 | "behaviours": [
|
| 20 | "Tokenize the text using white space characters as delimeters.",
|
| 21 | "Convert all characters to lower case.",
|
| 22 | "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.",
|
| 23 | "Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2."
|
| 24 | ],
|
| 25 | "tokenizer": "whitespace",
|
| 26 | "filters": [
|
| 27 | "lowercase",
|
| 28 | "asciifolding",
|
| 29 | "ngram_filter"
|
| 30 | ]
|
| 31 | }
|
| 32 | ] |