"Fossies" - the Fresh Open Source Software Archive

Member "elasticsearch-6.8.23/plugins/analysis-kuromoji/src/test/resources/rest-api-spec/test/analysis_nori/10_basic.yml" (29 Dec 2021, 1724 Bytes) of package /linux/www/elasticsearch-6.8.23-src.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Ansible YAML source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 # Integration tests for Kuromoji analysis components
    2 #
    3 ---
    4 "Analyzer":
    5     - do:
    6         indices.analyze:
    7           body:
    8             text:         JR新宿駅の近くにビールを飲みに行こうか
    9             analyzer:     kuromoji
   10     - length: { tokens: 7 }
   11     - match:  { tokens.0.token: jr }
   12     - match:  { tokens.1.token: 新宿 }
   13     - match:  { tokens.2.token: 駅 }
   14     - match:  { tokens.3.token: 近く }
   15     - match:  { tokens.4.token: ビール }
   16     - match:  { tokens.5.token: 飲む }
   17     - match:  { tokens.6.token: 行く }
   18 ---
   19 "Tokenizer":
   20     - do:
   21         indices.analyze:
   22           body:
   23             text:         関西国際空港
   24             tokenizer:    kuromoji_tokenizer
   25     - length: { tokens: 4 }
   26     - match:  { tokens.0.token: 関西 }
   27     - match:  { tokens.1.token: 関西国際空港 }
   28     - match:  { tokens.2.token: 国際 }
   29     - match:  { tokens.3.token: 空港 }
   30 ---
   31 "Baseform filter":
   32     - do:
   33         indices.analyze:
   34           body:
   35             text:         飲み
   36             tokenizer:    kuromoji_tokenizer
   37             filter:       [kuromoji_baseform]
   38     - length: { tokens: 1 }
   39     - match:  { tokens.0.token: 飲む }
   40 ---
   41 "Reading filter":
   42     - do:
   43         indices.analyze:
   44           body:
   45             text:         寿司
   46             tokenizer:    kuromoji_tokenizer
   47             filter:       [kuromoji_readingform]
   48     - length: { tokens: 1 }
   49     - match:  { tokens.0.token: スシ }
   50 ---
   51 "Stemming filter":
   52     - do:
   53         indices.analyze:
   54           body:
   55             text:         サーバー
   56             tokenizer:    kuromoji_tokenizer
   57             filter:       [kuromoji_stemmer]
   58     - length: { tokens: 1 }
   59     - match:  { tokens.0.token: サーバ }