1 # Integration tests for Korean analysis components 2 # 3 --- 4 "Analyzer": 5 - do: 6 indices.analyze: 7 body: 8 text: 뿌리가 깊은 나무 9 analyzer: nori 10 - length: { tokens: 3 } 11 - match: { tokens.0.token: 뿌리 } 12 - match: { tokens.1.token: 깊 } 13 - match: { tokens.2.token: 나무 } 14 --- 15 "Tokenizer": 16 - do: 17 indices.analyze: 18 body: 19 text: 뿌리가 깊은 나무 20 tokenizer: nori_tokenizer 21 - length: { tokens: 5 } 22 - match: { tokens.0.token: 뿌리 } 23 - match: { tokens.1.token: 가 } 24 - match: { tokens.2.token: 깊 } 25 - match: { tokens.3.token: 은 } 26 - match: { tokens.4.token: 나무 } 27 --- 28 "Part of speech filter": 29 - do: 30 indices.analyze: 31 body: 32 text: 뿌리가 깊은 나무 33 tokenizer: nori_tokenizer 34 filter: [nori_part_of_speech] 35 - length: { tokens: 3 } 36 - match: { tokens.0.token: 뿌리 } 37 - match: { tokens.1.token: 깊 } 38 - match: { tokens.2.token: 나무 } 39 --- 40 "Reading filter": 41 - do: 42 indices.analyze: 43 body: 44 text: 鄕歌 45 tokenizer: nori_tokenizer 46 filter: [nori_readingform] 47 - length: { tokens: 1 } 48 - match: { tokens.0.token: 향가 }