# Integration tests for ICU analysis components # "Tokenizer": - do: indices.analyze: body: text: Foo Bar tokenizer: icu_tokenizer - length: { tokens: 2 } - match: { tokens.0.token: Foo } - match: { tokens.1.token: Bar } --- "Normalization filter": - do: indices.analyze: body: filter: [icu_normalizer] text: Foo Bar Ruß tokenizer: standard - length: { tokens: 3 } - match: { tokens.0.token: foo} - match: { tokens.1.token: bar } - match: { tokens.2.token: russ } --- "Normalization charfilter": - do: indices.analyze: body: char_filter: [icu_normalizer] text: Foo Bar Ruß tokenizer: standard - length: { tokens: 3 } - match: { tokens.0.token: foo } - match: { tokens.1.token: bar } - match: { tokens.2.token: russ } --- "Folding filter": - do: indices.analyze: body: filter: [icu_folding] text: Foo Bar résumé tokenizer: standard - length: { tokens: 3 } - match: { tokens.0.token: foo } - match: { tokens.1.token: bar } - match: { tokens.2.token: resume } --- "Normalization with unicode_set_filter": - do: indices.create: index: test body: settings: index: analysis: char_filter: charfilter_icu_normalizer: type: icu_normalizer unicode_set_filter: "[^ß]" filter: tokenfilter_icu_normalizer: type: icu_normalizer unicode_set_filter: "[^ßB]" tokenfilter_icu_folding: type: icu_folding unicode_set_filter: "[^â]" - do: indices.analyze: index: test body: char_filter: ["charfilter_icu_normalizer"] tokenizer: standard text: charfilter Föo Bâr Ruß - length: { tokens: 4 } - match: { tokens.0.token: charfilter } - match: { tokens.1.token: föo } - match: { tokens.2.token: bâr } - match: { tokens.3.token: ruß } - do: indices.analyze: index: test body: tokenizer: standard filter: ["tokenfilter_icu_normalizer"] text: tokenfilter Föo Bâr Ruß - length: { tokens: 4 } - match: { tokens.0.token: tokenfilter } - match: { tokens.1.token: föo } - match: { tokens.2.token: Bâr } - match: { tokens.3.token: ruß } - do: indices.analyze: index: test body: tokenizer: standard filter: ["tokenfilter_icu_folding"] text: icufolding Föo Bâr Ruß - length: { tokens: 4 } - match: { tokens.0.token: icufolding } - match: { tokens.1.token: foo } - match: { tokens.2.token: bâr } - match: { tokens.3.token: russ }