| [ |
| { |
| "name": "whitespace_analyzer", |
| "description": "A standard whitespace analyzer.", |
| "behaviours": [ |
| "Tokenize the text using white space characters as delimeters.", |
| "Convert all characters to lower case.", |
| "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents." |
| ], |
| "tokenizer": "whitespace", |
| "filters": [ |
| "lowercase", |
| "asciifolding" |
| ] |
| }, |
| { |
| "name": "ngram_analyzer", |
| "description": "An analyzer which performs ngram filtering on the data stream.", |
| "behaviours": [ |
| "Tokenize the text using white space characters as delimeters.", |
| "Convert all characters to lower case.", |
| "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.", |
| "Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2." |
| ], |
| "tokenizer": "whitespace", |
| "filters": [ |
| "lowercase", |
| "asciifolding", |
| "ngram_filter" |
| ] |
| } |
| ] |