mayankg2703 | 2a6b396 | 2018-02-06 10:24:30 +0000 | [diff] [blame^] | 1 | [ |
| 2 | { |
| 3 | "name": "whitespace_analyzer", |
| 4 | "description": "A standard whitespace analyzer.", |
| 5 | "behaviours": [ |
| 6 | "Tokenize the text using white space characters as delimeters.", |
| 7 | "Convert all characters to lower case.", |
| 8 | "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents." |
| 9 | ], |
| 10 | "tokenizer": "whitespace", |
| 11 | "filters": [ |
| 12 | "lowercase", |
| 13 | "asciifolding" |
| 14 | ] |
| 15 | }, |
| 16 | { |
| 17 | "name": "ngram_analyzer", |
| 18 | "description": "An analyzer which performs ngram filtering on the data stream.", |
| 19 | "behaviours": [ |
| 20 | "Tokenize the text using white space characters as delimeters.", |
| 21 | "Convert all characters to lower case.", |
| 22 | "Convert all alphanumeric and symbolic Unicode characters above the first 127 ASCII characters into their ASCII equivalents.", |
| 23 | "Apply ngram filtering using the following values for minimum and maximum size in codepoints of a single n-gram: minimum = 1, maximum = 2." |
| 24 | ], |
| 25 | "tokenizer": "whitespace", |
| 26 | "filters": [ |
| 27 | "lowercase", |
| 28 | "asciifolding", |
| 29 | "ngram_filter" |
| 30 | ] |
| 31 | } |
| 32 | ] |