[ | |
{ | |
"name": "whitespace_analyzer", | |
"description": "A standard whitespace analyzer.", | |
"behaviours": [ | |
"Tokenize the text using white space characters as delimeters.", | |
"Convert all characters to lower case.", | |
"Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents." | |
], | |
"tokenizer": "whitespace", | |
"filters": [ | |
"lowercase", | |
"asciifolding" | |
] | |
}, | |
{ | |
"name": "ngram_analyzer", | |
"description": "An analyzer which performs ngram filtering on the data stream.", | |
"behaviours": [ | |
"Tokenize the text using white space characters as delimeters.", | |
"Convert all characters to lower case.", | |
"Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.", | |
"Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2." | |
], | |
"tokenizer": "whitespace", | |
"filters": [ | |
"lowercase", | |
"asciifolding", | |
"ngram_filter" | |
] | |
} | |
] |