"Fossies" - the Fresh Open Source Software Archive

Member "mattermost-server-6.0.1/vendor/github.com/couchbase/vellum/levenshtein/alphabet.go" (18 Oct 2021, 2666 Bytes) of package /linux/www/mattermost-server-6.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Go source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 //  Copyright (c) 2018 Couchbase, Inc.
    2 //
    3 // Licensed under the Apache License, Version 2.0 (the "License");
    4 // you may not use this file except in compliance with the License.
    5 // You may obtain a copy of the License at
    6 //
    7 //      http://www.apache.org/licenses/LICENSE-2.0
    8 //
    9 // Unless required by applicable law or agreed to in writing, software
   10 // distributed under the License is distributed on an "AS IS" BASIS,
   11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   12 // See the License for the specific language governing permissions and
   13 // limitations under the License.
   14 
   15 package levenshtein
   16 
   17 import (
   18     "fmt"
   19     "sort"
   20     "unicode/utf8"
   21 )
   22 
   23 type FullCharacteristicVector []uint32
   24 
   25 func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 {
   26     bucketID := offset / 32
   27     align := offset - bucketID*32
   28     if align == 0 {
   29         return fcv[bucketID] & mask
   30     }
   31     left := fcv[bucketID] >> align
   32     right := fcv[bucketID+1] << (32 - align)
   33     return (left | right) & mask
   34 }
   35 
   36 type tuple struct {
   37     char rune
   38     fcv  FullCharacteristicVector
   39 }
   40 
   41 type sortRunes []rune
   42 
   43 func (s sortRunes) Less(i, j int) bool {
   44     return s[i] < s[j]
   45 }
   46 
   47 func (s sortRunes) Swap(i, j int) {
   48     s[i], s[j] = s[j], s[i]
   49 }
   50 
   51 func (s sortRunes) Len() int {
   52     return len(s)
   53 }
   54 
   55 func sortRune(r []rune) []rune {
   56     sort.Sort(sortRunes(r))
   57     return r
   58 }
   59 
   60 type Alphabet struct {
   61     charset []tuple
   62     index   uint32
   63 }
   64 
   65 func (a *Alphabet) resetNext() {
   66     a.index = 0
   67 }
   68 
   69 func (a *Alphabet) next() (rune, FullCharacteristicVector, error) {
   70     if int(a.index) >= len(a.charset) {
   71         return 0, nil, fmt.Errorf("eof")
   72     }
   73 
   74     rv := a.charset[a.index]
   75     a.index++
   76     return rv.char, rv.fcv, nil
   77 }
   78 
   79 func dedupe(in string) string {
   80     lookUp := make(map[rune]struct{}, len(in))
   81     var rv string
   82     for len(in) > 0 {
   83         r, size := utf8.DecodeRuneInString(in)
   84         in = in[size:]
   85         if _, ok := lookUp[r]; !ok {
   86             rv += string(r)
   87             lookUp[r] = struct{}{}
   88         }
   89     }
   90     return rv
   91 }
   92 
   93 func queryChars(qChars string) Alphabet {
   94     chars := dedupe(qChars)
   95     inChars := sortRune([]rune(chars))
   96     charsets := make([]tuple, 0, len(inChars))
   97 
   98     for _, c := range inChars {
   99         tempChars := qChars
  100         var bits []uint32
  101         for len(tempChars) > 0 {
  102             var chunk string
  103             if len(tempChars) > 32 {
  104                 chunk = tempChars[0:32]
  105                 tempChars = tempChars[32:]
  106             } else {
  107                 chunk = tempChars
  108                 tempChars = tempChars[:0]
  109             }
  110 
  111             chunkBits := uint32(0)
  112             bit := uint32(1)
  113             for _, chr := range chunk {
  114                 if chr == c {
  115                     chunkBits |= bit
  116                 }
  117                 bit <<= 1
  118             }
  119             bits = append(bits, chunkBits)
  120         }
  121         bits = append(bits, 0)
  122         charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)})
  123     }
  124     return Alphabet{charset: charsets}
  125 }