"Fossies" - the Fresh Open Source Software Archive

Member "cells-3.0.3/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s" (30 Nov 2021, 333191 Bytes) of package /linux/misc/pydio-cells-3.0.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PowerPC Assembler source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
    2 
    3 // +build !appengine
    4 // +build !noasm
    5 // +build gc
    6 
    7 #include "textflag.h"
    8 
    9 // func encodeBlockAsm(dst []byte, src []byte) int
   10 // Requires: SSE2
   11 TEXT ·encodeBlockAsm(SB), $65560-56
   12     MOVQ dst_base+0(FP), AX
   13     MOVQ $0x00000200, CX
   14     LEAQ 24(SP), DX
   15     PXOR X0, X0
   16 
   17 zero_loop_encodeBlockAsm:
   18     MOVOU X0, (DX)
   19     MOVOU X0, 16(DX)
   20     MOVOU X0, 32(DX)
   21     MOVOU X0, 48(DX)
   22     MOVOU X0, 64(DX)
   23     MOVOU X0, 80(DX)
   24     MOVOU X0, 96(DX)
   25     MOVOU X0, 112(DX)
   26     ADDQ  $0x80, DX
   27     DECQ  CX
   28     JNZ   zero_loop_encodeBlockAsm
   29     MOVL  $0x00000000, 12(SP)
   30     MOVQ  src_len+32(FP), CX
   31     LEAQ  -5(CX), DX
   32     LEAQ  -8(CX), BP
   33     MOVL  BP, 8(SP)
   34     SHRQ  $0x05, CX
   35     SUBL  CX, DX
   36     LEAQ  (AX)(DX*1), DX
   37     MOVQ  DX, (SP)
   38     MOVL  $0x00000001, CX
   39     MOVL  CX, 16(SP)
   40     MOVQ  src_base+24(FP), DX
   41 
   42 search_loop_encodeBlockAsm:
   43     MOVQ  (DX)(CX*1), SI
   44     MOVL  CX, BP
   45     SUBL  12(SP), BP
   46     SHRL  $0x06, BP
   47     LEAL  4(CX)(BP*1), BP
   48     CMPL  BP, 8(SP)
   49     JGE   emit_remainder_encodeBlockAsm
   50     MOVL  BP, 20(SP)
   51     MOVQ  $0x0000cf1bbcdcbf9b, R8
   52     MOVQ  SI, R9
   53     MOVQ  SI, R10
   54     SHRQ  $0x08, R10
   55     SHLQ  $0x10, R9
   56     IMULQ R8, R9
   57     SHRQ  $0x32, R9
   58     SHLQ  $0x10, R10
   59     IMULQ R8, R10
   60     SHRQ  $0x32, R10
   61     MOVL  24(SP)(R9*4), BP
   62     MOVL  24(SP)(R10*4), DI
   63     MOVL  CX, 24(SP)(R9*4)
   64     LEAL  1(CX), R9
   65     MOVL  R9, 24(SP)(R10*4)
   66     MOVQ  SI, R9
   67     SHRQ  $0x10, R9
   68     SHLQ  $0x10, R9
   69     IMULQ R8, R9
   70     SHRQ  $0x32, R9
   71     MOVL  CX, R8
   72     SUBL  16(SP), R8
   73     MOVL  1(DX)(R8*1), R10
   74     MOVQ  SI, R8
   75     SHRQ  $0x08, R8
   76     CMPL  R8, R10
   77     JNE   no_repeat_found_encodeBlockAsm
   78     LEAL  1(CX), SI
   79     MOVL  12(SP), DI
   80     MOVL  SI, BP
   81     SUBL  16(SP), BP
   82     JZ    repeat_extend_back_end_encodeBlockAsm
   83 
   84 repeat_extend_back_loop_encodeBlockAsm:
   85     CMPL SI, DI
   86     JLE  repeat_extend_back_end_encodeBlockAsm
   87     MOVB -1(DX)(BP*1), BL
   88     MOVB -1(DX)(SI*1), R8
   89     CMPB BL, R8
   90     JNE  repeat_extend_back_end_encodeBlockAsm
   91     LEAL -1(SI), SI
   92     DECL BP
   93     JNZ  repeat_extend_back_loop_encodeBlockAsm
   94 
   95 repeat_extend_back_end_encodeBlockAsm:
   96     MOVL 12(SP), BP
   97     CMPL BP, SI
   98     JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
   99     MOVL SI, R8
  100     MOVL SI, 12(SP)
  101     LEAQ (DX)(BP*1), R9
  102     SUBL BP, R8
  103     LEAL -1(R8), BP
  104     CMPL BP, $0x3c
  105     JLT  one_byte_repeat_emit_encodeBlockAsm
  106     CMPL BP, $0x00000100
  107     JLT  two_bytes_repeat_emit_encodeBlockAsm
  108     CMPL BP, $0x00010000
  109     JLT  three_bytes_repeat_emit_encodeBlockAsm
  110     CMPL BP, $0x01000000
  111     JLT  four_bytes_repeat_emit_encodeBlockAsm
  112     MOVB $0xfc, (AX)
  113     MOVL BP, 1(AX)
  114     ADDQ $0x05, AX
  115     JMP  memmove_long_repeat_emit_encodeBlockAsm
  116 
  117 four_bytes_repeat_emit_encodeBlockAsm:
  118     MOVL BP, R10
  119     SHRL $0x10, R10
  120     MOVB $0xf8, (AX)
  121     MOVW BP, 1(AX)
  122     MOVB R10, 3(AX)
  123     ADDQ $0x04, AX
  124     JMP  memmove_long_repeat_emit_encodeBlockAsm
  125 
  126 three_bytes_repeat_emit_encodeBlockAsm:
  127     MOVB $0xf4, (AX)
  128     MOVW BP, 1(AX)
  129     ADDQ $0x03, AX
  130     JMP  memmove_long_repeat_emit_encodeBlockAsm
  131 
  132 two_bytes_repeat_emit_encodeBlockAsm:
  133     MOVB $0xf0, (AX)
  134     MOVB BP, 1(AX)
  135     ADDQ $0x02, AX
  136     CMPL BP, $0x40
  137     JL   memmove_repeat_emit_encodeBlockAsm
  138     JMP  memmove_long_repeat_emit_encodeBlockAsm
  139 
  140 one_byte_repeat_emit_encodeBlockAsm:
  141     SHLB $0x02, BP
  142     MOVB BP, (AX)
  143     ADDQ $0x01, AX
  144 
  145 memmove_repeat_emit_encodeBlockAsm:
  146     LEAQ (AX)(R8*1), BP
  147 
  148     // genMemMoveShort
  149     CMPQ R8, $0x03
  150     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2
  151     JE   emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3
  152     CMPQ R8, $0x08
  153     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7
  154     CMPQ R8, $0x10
  155     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
  156     CMPQ R8, $0x20
  157     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
  158     JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
  159 
  160 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2:
  161     MOVB (R9), R10
  162     MOVB -1(R9)(R8*1), R9
  163     MOVB R10, (AX)
  164     MOVB R9, -1(AX)(R8*1)
  165     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
  166 
  167 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3:
  168     MOVW (R9), R10
  169     MOVB 2(R9), R9
  170     MOVW R10, (AX)
  171     MOVB R9, 2(AX)
  172     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
  173 
  174 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7:
  175     MOVL (R9), R10
  176     MOVL -4(R9)(R8*1), R9
  177     MOVL R10, (AX)
  178     MOVL R9, -4(AX)(R8*1)
  179     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
  180 
  181 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
  182     MOVQ (R9), R10
  183     MOVQ -8(R9)(R8*1), R9
  184     MOVQ R10, (AX)
  185     MOVQ R9, -8(AX)(R8*1)
  186     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
  187 
  188 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
  189     MOVOU (R9), X0
  190     MOVOU -16(R9)(R8*1), X1
  191     MOVOU X0, (AX)
  192     MOVOU X1, -16(AX)(R8*1)
  193     JMP   memmove_end_copy_repeat_emit_encodeBlockAsm
  194 
  195 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
  196     MOVOU (R9), X0
  197     MOVOU 16(R9), X1
  198     MOVOU -32(R9)(R8*1), X2
  199     MOVOU -16(R9)(R8*1), X3
  200     MOVOU X0, (AX)
  201     MOVOU X1, 16(AX)
  202     MOVOU X2, -32(AX)(R8*1)
  203     MOVOU X3, -16(AX)(R8*1)
  204 
  205 memmove_end_copy_repeat_emit_encodeBlockAsm:
  206     MOVQ BP, AX
  207     JMP  emit_literal_done_repeat_emit_encodeBlockAsm
  208 
  209 memmove_long_repeat_emit_encodeBlockAsm:
  210     LEAQ (AX)(R8*1), BP
  211 
  212     // genMemMoveLong
  213     MOVOU (R9), X0
  214     MOVOU 16(R9), X1
  215     MOVOU -32(R9)(R8*1), X2
  216     MOVOU -16(R9)(R8*1), X3
  217     MOVQ  R8, R11
  218     SHRQ  $0x05, R11
  219     MOVQ  AX, R10
  220     ANDL  $0x0000001f, R10
  221     MOVQ  $0x00000040, R12
  222     SUBQ  R10, R12
  223     DECQ  R11
  224     JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
  225     LEAQ  -32(R9)(R12*1), R10
  226     LEAQ  -32(AX)(R12*1), R13
  227 
  228 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
  229     MOVOU (R10), X4
  230     MOVOU 16(R10), X5
  231     MOVOA X4, (R13)
  232     MOVOA X5, 16(R13)
  233     ADDQ  $0x20, R13
  234     ADDQ  $0x20, R10
  235     ADDQ  $0x20, R12
  236     DECQ  R11
  237     JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
  238 
  239 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
  240     MOVOU -32(R9)(R12*1), X4
  241     MOVOU -16(R9)(R12*1), X5
  242     MOVOA X4, -32(AX)(R12*1)
  243     MOVOA X5, -16(AX)(R12*1)
  244     ADDQ  $0x20, R12
  245     CMPQ  R8, R12
  246     JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
  247     MOVOU X0, (AX)
  248     MOVOU X1, 16(AX)
  249     MOVOU X2, -32(AX)(R8*1)
  250     MOVOU X3, -16(AX)(R8*1)
  251     MOVQ  BP, AX
  252 
  253 emit_literal_done_repeat_emit_encodeBlockAsm:
  254     ADDL $0x05, CX
  255     MOVL CX, BP
  256     SUBL 16(SP), BP
  257     MOVQ src_len+32(FP), R8
  258     SUBL CX, R8
  259     LEAQ (DX)(CX*1), R9
  260     LEAQ (DX)(BP*1), BP
  261 
  262     // matchLen
  263     XORL R11, R11
  264     CMPL R8, $0x08
  265     JL   matchlen_single_repeat_extend_encodeBlockAsm
  266 
  267 matchlen_loopback_repeat_extend_encodeBlockAsm:
  268     MOVQ  (R9)(R11*1), R10
  269     XORQ  (BP)(R11*1), R10
  270     TESTQ R10, R10
  271     JZ    matchlen_loop_repeat_extend_encodeBlockAsm
  272     BSFQ  R10, R10
  273     SARQ  $0x03, R10
  274     LEAL  (R11)(R10*1), R11
  275     JMP   repeat_extend_forward_end_encodeBlockAsm
  276 
  277 matchlen_loop_repeat_extend_encodeBlockAsm:
  278     LEAL -8(R8), R8
  279     LEAL 8(R11), R11
  280     CMPL R8, $0x08
  281     JGE  matchlen_loopback_repeat_extend_encodeBlockAsm
  282 
  283 matchlen_single_repeat_extend_encodeBlockAsm:
  284     TESTL R8, R8
  285     JZ    repeat_extend_forward_end_encodeBlockAsm
  286 
  287 matchlen_single_loopback_repeat_extend_encodeBlockAsm:
  288     MOVB (R9)(R11*1), R10
  289     CMPB (BP)(R11*1), R10
  290     JNE  repeat_extend_forward_end_encodeBlockAsm
  291     LEAL 1(R11), R11
  292     DECL R8
  293     JNZ  matchlen_single_loopback_repeat_extend_encodeBlockAsm
  294 
  295 repeat_extend_forward_end_encodeBlockAsm:
  296     ADDL  R11, CX
  297     MOVL  CX, BP
  298     SUBL  SI, BP
  299     MOVL  16(SP), SI
  300     TESTL DI, DI
  301     JZ    repeat_as_copy_encodeBlockAsm
  302 
  303     // emitRepeat
  304 emit_repeat_again_match_repeat_encodeBlockAsm:
  305     MOVL BP, DI
  306     LEAL -4(BP), BP
  307     CMPL DI, $0x08
  308     JLE  repeat_two_match_repeat_encodeBlockAsm
  309     CMPL DI, $0x0c
  310     JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
  311     CMPL SI, $0x00000800
  312     JLT  repeat_two_offset_match_repeat_encodeBlockAsm
  313 
  314 cant_repeat_two_offset_match_repeat_encodeBlockAsm:
  315     CMPL BP, $0x00000104
  316     JLT  repeat_three_match_repeat_encodeBlockAsm
  317     CMPL BP, $0x00010100
  318     JLT  repeat_four_match_repeat_encodeBlockAsm
  319     CMPL BP, $0x0100ffff
  320     JLT  repeat_five_match_repeat_encodeBlockAsm
  321     LEAL -16842747(BP), BP
  322     MOVW $0x001d, (AX)
  323     MOVW $0xfffb, 2(AX)
  324     MOVB $0xff, 4(AX)
  325     ADDQ $0x05, AX
  326     JMP  emit_repeat_again_match_repeat_encodeBlockAsm
  327 
  328 repeat_five_match_repeat_encodeBlockAsm:
  329     LEAL -65536(BP), BP
  330     MOVL BP, SI
  331     MOVW $0x001d, (AX)
  332     MOVW BP, 2(AX)
  333     SARL $0x10, SI
  334     MOVB SI, 4(AX)
  335     ADDQ $0x05, AX
  336     JMP  repeat_end_emit_encodeBlockAsm
  337 
  338 repeat_four_match_repeat_encodeBlockAsm:
  339     LEAL -256(BP), BP
  340     MOVW $0x0019, (AX)
  341     MOVW BP, 2(AX)
  342     ADDQ $0x04, AX
  343     JMP  repeat_end_emit_encodeBlockAsm
  344 
  345 repeat_three_match_repeat_encodeBlockAsm:
  346     LEAL -4(BP), BP
  347     MOVW $0x0015, (AX)
  348     MOVB BP, 2(AX)
  349     ADDQ $0x03, AX
  350     JMP  repeat_end_emit_encodeBlockAsm
  351 
  352 repeat_two_match_repeat_encodeBlockAsm:
  353     SHLL $0x02, BP
  354     ORL  $0x01, BP
  355     MOVW BP, (AX)
  356     ADDQ $0x02, AX
  357     JMP  repeat_end_emit_encodeBlockAsm
  358 
  359 repeat_two_offset_match_repeat_encodeBlockAsm:
  360     XORQ DI, DI
  361     LEAL 1(DI)(BP*4), BP
  362     MOVB SI, 1(AX)
  363     SARL $0x08, SI
  364     SHLL $0x05, SI
  365     ORL  SI, BP
  366     MOVB BP, (AX)
  367     ADDQ $0x02, AX
  368     JMP  repeat_end_emit_encodeBlockAsm
  369 
  370 repeat_as_copy_encodeBlockAsm:
  371     // emitCopy
  372     CMPL SI, $0x00010000
  373     JL   two_byte_offset_repeat_as_copy_encodeBlockAsm
  374 
  375 four_bytes_loop_back_repeat_as_copy_encodeBlockAsm:
  376     CMPL BP, $0x40
  377     JLE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
  378     MOVB $0xff, (AX)
  379     MOVL SI, 1(AX)
  380     LEAL -64(BP), BP
  381     ADDQ $0x05, AX
  382     CMPL BP, $0x04
  383     JL   four_bytes_remain_repeat_as_copy_encodeBlockAsm
  384 
  385     // emitRepeat
  386 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
  387     MOVL BP, DI
  388     LEAL -4(BP), BP
  389     CMPL DI, $0x08
  390     JLE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
  391     CMPL DI, $0x0c
  392     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
  393     CMPL SI, $0x00000800
  394     JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
  395 
  396 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
  397     CMPL BP, $0x00000104
  398     JLT  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
  399     CMPL BP, $0x00010100
  400     JLT  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
  401     CMPL BP, $0x0100ffff
  402     JLT  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
  403     LEAL -16842747(BP), BP
  404     MOVW $0x001d, (AX)
  405     MOVW $0xfffb, 2(AX)
  406     MOVB $0xff, 4(AX)
  407     ADDQ $0x05, AX
  408     JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
  409 
  410 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
  411     LEAL -65536(BP), BP
  412     MOVL BP, SI
  413     MOVW $0x001d, (AX)
  414     MOVW BP, 2(AX)
  415     SARL $0x10, SI
  416     MOVB SI, 4(AX)
  417     ADDQ $0x05, AX
  418     JMP  repeat_end_emit_encodeBlockAsm
  419 
  420 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
  421     LEAL -256(BP), BP
  422     MOVW $0x0019, (AX)
  423     MOVW BP, 2(AX)
  424     ADDQ $0x04, AX
  425     JMP  repeat_end_emit_encodeBlockAsm
  426 
  427 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
  428     LEAL -4(BP), BP
  429     MOVW $0x0015, (AX)
  430     MOVB BP, 2(AX)
  431     ADDQ $0x03, AX
  432     JMP  repeat_end_emit_encodeBlockAsm
  433 
  434 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
  435     SHLL $0x02, BP
  436     ORL  $0x01, BP
  437     MOVW BP, (AX)
  438     ADDQ $0x02, AX
  439     JMP  repeat_end_emit_encodeBlockAsm
  440 
  441 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
  442     XORQ DI, DI
  443     LEAL 1(DI)(BP*4), BP
  444     MOVB SI, 1(AX)
  445     SARL $0x08, SI
  446     SHLL $0x05, SI
  447     ORL  SI, BP
  448     MOVB BP, (AX)
  449     ADDQ $0x02, AX
  450     JMP  repeat_end_emit_encodeBlockAsm
  451     JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm
  452 
  453 four_bytes_remain_repeat_as_copy_encodeBlockAsm:
  454     TESTL BP, BP
  455     JZ    repeat_end_emit_encodeBlockAsm
  456     MOVB  $0x03, BL
  457     LEAL  -4(BX)(BP*4), BP
  458     MOVB  BP, (AX)
  459     MOVL  SI, 1(AX)
  460     ADDQ  $0x05, AX
  461     JMP   repeat_end_emit_encodeBlockAsm
  462 
  463 two_byte_offset_repeat_as_copy_encodeBlockAsm:
  464     CMPL BP, $0x40
  465     JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
  466     MOVB $0xee, (AX)
  467     MOVW SI, 1(AX)
  468     LEAL -60(BP), BP
  469     ADDQ $0x03, AX
  470 
  471     // emitRepeat
  472 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  473     MOVL BP, DI
  474     LEAL -4(BP), BP
  475     CMPL DI, $0x08
  476     JLE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
  477     CMPL DI, $0x0c
  478     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
  479     CMPL SI, $0x00000800
  480     JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
  481 
  482 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  483     CMPL BP, $0x00000104
  484     JLT  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
  485     CMPL BP, $0x00010100
  486     JLT  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
  487     CMPL BP, $0x0100ffff
  488     JLT  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
  489     LEAL -16842747(BP), BP
  490     MOVW $0x001d, (AX)
  491     MOVW $0xfffb, 2(AX)
  492     MOVB $0xff, 4(AX)
  493     ADDQ $0x05, AX
  494     JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
  495 
  496 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  497     LEAL -65536(BP), BP
  498     MOVL BP, SI
  499     MOVW $0x001d, (AX)
  500     MOVW BP, 2(AX)
  501     SARL $0x10, SI
  502     MOVB SI, 4(AX)
  503     ADDQ $0x05, AX
  504     JMP  repeat_end_emit_encodeBlockAsm
  505 
  506 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  507     LEAL -256(BP), BP
  508     MOVW $0x0019, (AX)
  509     MOVW BP, 2(AX)
  510     ADDQ $0x04, AX
  511     JMP  repeat_end_emit_encodeBlockAsm
  512 
  513 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  514     LEAL -4(BP), BP
  515     MOVW $0x0015, (AX)
  516     MOVB BP, 2(AX)
  517     ADDQ $0x03, AX
  518     JMP  repeat_end_emit_encodeBlockAsm
  519 
  520 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  521     SHLL $0x02, BP
  522     ORL  $0x01, BP
  523     MOVW BP, (AX)
  524     ADDQ $0x02, AX
  525     JMP  repeat_end_emit_encodeBlockAsm
  526 
  527 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
  528     XORQ DI, DI
  529     LEAL 1(DI)(BP*4), BP
  530     MOVB SI, 1(AX)
  531     SARL $0x08, SI
  532     SHLL $0x05, SI
  533     ORL  SI, BP
  534     MOVB BP, (AX)
  535     ADDQ $0x02, AX
  536     JMP  repeat_end_emit_encodeBlockAsm
  537     JMP two_byte_offset_repeat_as_copy_encodeBlockAsm
  538 
  539 two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
  540     CMPL BP, $0x0c
  541     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm
  542     CMPL SI, $0x00000800
  543     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm
  544     MOVB $0x01, BL
  545     LEAL -16(BX)(BP*4), BP
  546     MOVB SI, 1(AX)
  547     SHRL $0x08, SI
  548     SHLL $0x05, SI
  549     ORL  SI, BP
  550     MOVB BP, (AX)
  551     ADDQ $0x02, AX
  552     JMP  repeat_end_emit_encodeBlockAsm
  553 
  554 emit_copy_three_repeat_as_copy_encodeBlockAsm:
  555     MOVB $0x02, BL
  556     LEAL -4(BX)(BP*4), BP
  557     MOVB BP, (AX)
  558     MOVW SI, 1(AX)
  559     ADDQ $0x03, AX
  560 
  561 repeat_end_emit_encodeBlockAsm:
  562     MOVL CX, 12(SP)
  563     JMP  search_loop_encodeBlockAsm
  564 
  565 no_repeat_found_encodeBlockAsm:
  566     CMPL (DX)(BP*1), SI
  567     JEQ  candidate_match_encodeBlockAsm
  568     SHRQ $0x08, SI
  569     MOVL 24(SP)(R9*4), BP
  570     LEAL 2(CX), R8
  571     CMPL (DX)(DI*1), SI
  572     JEQ  candidate2_match_encodeBlockAsm
  573     MOVL R8, 24(SP)(R9*4)
  574     SHRQ $0x08, SI
  575     CMPL (DX)(BP*1), SI
  576     JEQ  candidate3_match_encodeBlockAsm
  577     MOVL 20(SP), CX
  578     JMP  search_loop_encodeBlockAsm
  579 
  580 candidate3_match_encodeBlockAsm:
  581     ADDL $0x02, CX
  582     JMP  candidate_match_encodeBlockAsm
  583 
  584 candidate2_match_encodeBlockAsm:
  585     MOVL R8, 24(SP)(R9*4)
  586     INCL CX
  587     MOVL DI, BP
  588 
  589 candidate_match_encodeBlockAsm:
  590     MOVL  12(SP), SI
  591     TESTL BP, BP
  592     JZ    match_extend_back_end_encodeBlockAsm
  593 
  594 match_extend_back_loop_encodeBlockAsm:
  595     CMPL CX, SI
  596     JLE  match_extend_back_end_encodeBlockAsm
  597     MOVB -1(DX)(BP*1), BL
  598     MOVB -1(DX)(CX*1), DI
  599     CMPB BL, DI
  600     JNE  match_extend_back_end_encodeBlockAsm
  601     LEAL -1(CX), CX
  602     DECL BP
  603     JZ   match_extend_back_end_encodeBlockAsm
  604     JMP  match_extend_back_loop_encodeBlockAsm
  605 
  606 match_extend_back_end_encodeBlockAsm:
  607     MOVL CX, SI
  608     SUBL 12(SP), SI
  609     LEAQ 5(AX)(SI*1), SI
  610     CMPQ SI, (SP)
  611     JL   match_dst_size_check_encodeBlockAsm
  612     MOVQ $0x00000000, ret+48(FP)
  613     RET
  614 
  615 match_dst_size_check_encodeBlockAsm:
  616     MOVL CX, SI
  617     MOVL 12(SP), DI
  618     CMPL DI, SI
  619     JEQ  emit_literal_done_match_emit_encodeBlockAsm
  620     MOVL SI, R8
  621     MOVL SI, 12(SP)
  622     LEAQ (DX)(DI*1), SI
  623     SUBL DI, R8
  624     LEAL -1(R8), DI
  625     CMPL DI, $0x3c
  626     JLT  one_byte_match_emit_encodeBlockAsm
  627     CMPL DI, $0x00000100
  628     JLT  two_bytes_match_emit_encodeBlockAsm
  629     CMPL DI, $0x00010000
  630     JLT  three_bytes_match_emit_encodeBlockAsm
  631     CMPL DI, $0x01000000
  632     JLT  four_bytes_match_emit_encodeBlockAsm
  633     MOVB $0xfc, (AX)
  634     MOVL DI, 1(AX)
  635     ADDQ $0x05, AX
  636     JMP  memmove_long_match_emit_encodeBlockAsm
  637 
  638 four_bytes_match_emit_encodeBlockAsm:
  639     MOVL DI, R9
  640     SHRL $0x10, R9
  641     MOVB $0xf8, (AX)
  642     MOVW DI, 1(AX)
  643     MOVB R9, 3(AX)
  644     ADDQ $0x04, AX
  645     JMP  memmove_long_match_emit_encodeBlockAsm
  646 
  647 three_bytes_match_emit_encodeBlockAsm:
  648     MOVB $0xf4, (AX)
  649     MOVW DI, 1(AX)
  650     ADDQ $0x03, AX
  651     JMP  memmove_long_match_emit_encodeBlockAsm
  652 
  653 two_bytes_match_emit_encodeBlockAsm:
  654     MOVB $0xf0, (AX)
  655     MOVB DI, 1(AX)
  656     ADDQ $0x02, AX
  657     CMPL DI, $0x40
  658     JL   memmove_match_emit_encodeBlockAsm
  659     JMP  memmove_long_match_emit_encodeBlockAsm
  660 
  661 one_byte_match_emit_encodeBlockAsm:
  662     SHLB $0x02, DI
  663     MOVB DI, (AX)
  664     ADDQ $0x01, AX
  665 
  666 memmove_match_emit_encodeBlockAsm:
  667     LEAQ (AX)(R8*1), DI
  668 
  669     // genMemMoveShort
  670     CMPQ R8, $0x03
  671     JB   emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2
  672     JE   emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3
  673     CMPQ R8, $0x08
  674     JB   emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7
  675     CMPQ R8, $0x10
  676     JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
  677     CMPQ R8, $0x20
  678     JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
  679     JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
  680 
  681 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2:
  682     MOVB (SI), R9
  683     MOVB -1(SI)(R8*1), SI
  684     MOVB R9, (AX)
  685     MOVB SI, -1(AX)(R8*1)
  686     JMP  memmove_end_copy_match_emit_encodeBlockAsm
  687 
  688 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3:
  689     MOVW (SI), R9
  690     MOVB 2(SI), SI
  691     MOVW R9, (AX)
  692     MOVB SI, 2(AX)
  693     JMP  memmove_end_copy_match_emit_encodeBlockAsm
  694 
  695 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7:
  696     MOVL (SI), R9
  697     MOVL -4(SI)(R8*1), SI
  698     MOVL R9, (AX)
  699     MOVL SI, -4(AX)(R8*1)
  700     JMP  memmove_end_copy_match_emit_encodeBlockAsm
  701 
  702 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
  703     MOVQ (SI), R9
  704     MOVQ -8(SI)(R8*1), SI
  705     MOVQ R9, (AX)
  706     MOVQ SI, -8(AX)(R8*1)
  707     JMP  memmove_end_copy_match_emit_encodeBlockAsm
  708 
  709 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
  710     MOVOU (SI), X0
  711     MOVOU -16(SI)(R8*1), X1
  712     MOVOU X0, (AX)
  713     MOVOU X1, -16(AX)(R8*1)
  714     JMP   memmove_end_copy_match_emit_encodeBlockAsm
  715 
  716 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
  717     MOVOU (SI), X0
  718     MOVOU 16(SI), X1
  719     MOVOU -32(SI)(R8*1), X2
  720     MOVOU -16(SI)(R8*1), X3
  721     MOVOU X0, (AX)
  722     MOVOU X1, 16(AX)
  723     MOVOU X2, -32(AX)(R8*1)
  724     MOVOU X3, -16(AX)(R8*1)
  725 
  726 memmove_end_copy_match_emit_encodeBlockAsm:
  727     MOVQ DI, AX
  728     JMP  emit_literal_done_match_emit_encodeBlockAsm
  729 
  730 memmove_long_match_emit_encodeBlockAsm:
  731     LEAQ (AX)(R8*1), DI
  732 
  733     // genMemMoveLong
  734     MOVOU (SI), X0
  735     MOVOU 16(SI), X1
  736     MOVOU -32(SI)(R8*1), X2
  737     MOVOU -16(SI)(R8*1), X3
  738     MOVQ  R8, R10
  739     SHRQ  $0x05, R10
  740     MOVQ  AX, R9
  741     ANDL  $0x0000001f, R9
  742     MOVQ  $0x00000040, R11
  743     SUBQ  R9, R11
  744     DECQ  R10
  745     JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
  746     LEAQ  -32(SI)(R11*1), R9
  747     LEAQ  -32(AX)(R11*1), R12
  748 
  749 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
  750     MOVOU (R9), X4
  751     MOVOU 16(R9), X5
  752     MOVOA X4, (R12)
  753     MOVOA X5, 16(R12)
  754     ADDQ  $0x20, R12
  755     ADDQ  $0x20, R9
  756     ADDQ  $0x20, R11
  757     DECQ  R10
  758     JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
  759 
  760 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
  761     MOVOU -32(SI)(R11*1), X4
  762     MOVOU -16(SI)(R11*1), X5
  763     MOVOA X4, -32(AX)(R11*1)
  764     MOVOA X5, -16(AX)(R11*1)
  765     ADDQ  $0x20, R11
  766     CMPQ  R8, R11
  767     JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
  768     MOVOU X0, (AX)
  769     MOVOU X1, 16(AX)
  770     MOVOU X2, -32(AX)(R8*1)
  771     MOVOU X3, -16(AX)(R8*1)
  772     MOVQ  DI, AX
  773 
  774 emit_literal_done_match_emit_encodeBlockAsm:
  775 match_nolit_loop_encodeBlockAsm:
  776     MOVL CX, SI
  777     SUBL BP, SI
  778     MOVL SI, 16(SP)
  779     ADDL $0x04, CX
  780     ADDL $0x04, BP
  781     MOVQ src_len+32(FP), SI
  782     SUBL CX, SI
  783     LEAQ (DX)(CX*1), DI
  784     LEAQ (DX)(BP*1), BP
  785 
  786     // matchLen
  787     XORL R9, R9
  788     CMPL SI, $0x08
  789     JL   matchlen_single_match_nolit_encodeBlockAsm
  790 
  791 matchlen_loopback_match_nolit_encodeBlockAsm:
  792     MOVQ  (DI)(R9*1), R8
  793     XORQ  (BP)(R9*1), R8
  794     TESTQ R8, R8
  795     JZ    matchlen_loop_match_nolit_encodeBlockAsm
  796     BSFQ  R8, R8
  797     SARQ  $0x03, R8
  798     LEAL  (R9)(R8*1), R9
  799     JMP   match_nolit_end_encodeBlockAsm
  800 
  801 matchlen_loop_match_nolit_encodeBlockAsm:
  802     LEAL -8(SI), SI
  803     LEAL 8(R9), R9
  804     CMPL SI, $0x08
  805     JGE  matchlen_loopback_match_nolit_encodeBlockAsm
  806 
  807 matchlen_single_match_nolit_encodeBlockAsm:
  808     TESTL SI, SI
  809     JZ    match_nolit_end_encodeBlockAsm
  810 
  811 matchlen_single_loopback_match_nolit_encodeBlockAsm:
  812     MOVB (DI)(R9*1), R8
  813     CMPB (BP)(R9*1), R8
  814     JNE  match_nolit_end_encodeBlockAsm
  815     LEAL 1(R9), R9
  816     DECL SI
  817     JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm
  818 
  819 match_nolit_end_encodeBlockAsm:
  820     ADDL R9, CX
  821     MOVL 16(SP), BP
  822     ADDL $0x04, R9
  823     MOVL CX, 12(SP)
  824 
  825     // emitCopy
  826     CMPL BP, $0x00010000
  827     JL   two_byte_offset_match_nolit_encodeBlockAsm
  828 
  829 four_bytes_loop_back_match_nolit_encodeBlockAsm:
  830     CMPL R9, $0x40
  831     JLE  four_bytes_remain_match_nolit_encodeBlockAsm
  832     MOVB $0xff, (AX)
  833     MOVL BP, 1(AX)
  834     LEAL -64(R9), R9
  835     ADDQ $0x05, AX
  836     CMPL R9, $0x04
  837     JL   four_bytes_remain_match_nolit_encodeBlockAsm
  838 
  839     // emitRepeat
  840 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
  841     MOVL R9, SI
  842     LEAL -4(R9), R9
  843     CMPL SI, $0x08
  844     JLE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
  845     CMPL SI, $0x0c
  846     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
  847     CMPL BP, $0x00000800
  848     JLT  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
  849 
  850 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  851     CMPL R9, $0x00000104
  852     JLT  repeat_three_match_nolit_encodeBlockAsm_emit_copy
  853     CMPL R9, $0x00010100
  854     JLT  repeat_four_match_nolit_encodeBlockAsm_emit_copy
  855     CMPL R9, $0x0100ffff
  856     JLT  repeat_five_match_nolit_encodeBlockAsm_emit_copy
  857     LEAL -16842747(R9), R9
  858     MOVW $0x001d, (AX)
  859     MOVW $0xfffb, 2(AX)
  860     MOVB $0xff, 4(AX)
  861     ADDQ $0x05, AX
  862     JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
  863 
  864 repeat_five_match_nolit_encodeBlockAsm_emit_copy:
  865     LEAL -65536(R9), R9
  866     MOVL R9, BP
  867     MOVW $0x001d, (AX)
  868     MOVW R9, 2(AX)
  869     SARL $0x10, BP
  870     MOVB BP, 4(AX)
  871     ADDQ $0x05, AX
  872     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  873 
  874 repeat_four_match_nolit_encodeBlockAsm_emit_copy:
  875     LEAL -256(R9), R9
  876     MOVW $0x0019, (AX)
  877     MOVW R9, 2(AX)
  878     ADDQ $0x04, AX
  879     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  880 
  881 repeat_three_match_nolit_encodeBlockAsm_emit_copy:
  882     LEAL -4(R9), R9
  883     MOVW $0x0015, (AX)
  884     MOVB R9, 2(AX)
  885     ADDQ $0x03, AX
  886     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  887 
  888 repeat_two_match_nolit_encodeBlockAsm_emit_copy:
  889     SHLL $0x02, R9
  890     ORL  $0x01, R9
  891     MOVW R9, (AX)
  892     ADDQ $0x02, AX
  893     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  894 
  895 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
  896     XORQ SI, SI
  897     LEAL 1(SI)(R9*4), R9
  898     MOVB BP, 1(AX)
  899     SARL $0x08, BP
  900     SHLL $0x05, BP
  901     ORL  BP, R9
  902     MOVB R9, (AX)
  903     ADDQ $0x02, AX
  904     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  905     JMP four_bytes_loop_back_match_nolit_encodeBlockAsm
  906 
  907 four_bytes_remain_match_nolit_encodeBlockAsm:
  908     TESTL R9, R9
  909     JZ    match_nolit_emitcopy_end_encodeBlockAsm
  910     MOVB  $0x03, BL
  911     LEAL  -4(BX)(R9*4), R9
  912     MOVB  R9, (AX)
  913     MOVL  BP, 1(AX)
  914     ADDQ  $0x05, AX
  915     JMP   match_nolit_emitcopy_end_encodeBlockAsm
  916 
  917 two_byte_offset_match_nolit_encodeBlockAsm:
  918     CMPL R9, $0x40
  919     JLE  two_byte_offset_short_match_nolit_encodeBlockAsm
  920     MOVB $0xee, (AX)
  921     MOVW BP, 1(AX)
  922     LEAL -60(R9), R9
  923     ADDQ $0x03, AX
  924 
  925     // emitRepeat
  926 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
  927     MOVL R9, SI
  928     LEAL -4(R9), R9
  929     CMPL SI, $0x08
  930     JLE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
  931     CMPL SI, $0x0c
  932     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  933     CMPL BP, $0x00000800
  934     JLT  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
  935 
  936 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  937     CMPL R9, $0x00000104
  938     JLT  repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
  939     CMPL R9, $0x00010100
  940     JLT  repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
  941     CMPL R9, $0x0100ffff
  942     JLT  repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
  943     LEAL -16842747(R9), R9
  944     MOVW $0x001d, (AX)
  945     MOVW $0xfffb, 2(AX)
  946     MOVB $0xff, 4(AX)
  947     ADDQ $0x05, AX
  948     JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
  949 
  950 repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
  951     LEAL -65536(R9), R9
  952     MOVL R9, BP
  953     MOVW $0x001d, (AX)
  954     MOVW R9, 2(AX)
  955     SARL $0x10, BP
  956     MOVB BP, 4(AX)
  957     ADDQ $0x05, AX
  958     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  959 
  960 repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
  961     LEAL -256(R9), R9
  962     MOVW $0x0019, (AX)
  963     MOVW R9, 2(AX)
  964     ADDQ $0x04, AX
  965     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  966 
  967 repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
  968     LEAL -4(R9), R9
  969     MOVW $0x0015, (AX)
  970     MOVB R9, 2(AX)
  971     ADDQ $0x03, AX
  972     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  973 
  974 repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
  975     SHLL $0x02, R9
  976     ORL  $0x01, R9
  977     MOVW R9, (AX)
  978     ADDQ $0x02, AX
  979     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  980 
  981 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
  982     XORQ SI, SI
  983     LEAL 1(SI)(R9*4), R9
  984     MOVB BP, 1(AX)
  985     SARL $0x08, BP
  986     SHLL $0x05, BP
  987     ORL  BP, R9
  988     MOVB R9, (AX)
  989     ADDQ $0x02, AX
  990     JMP  match_nolit_emitcopy_end_encodeBlockAsm
  991     JMP two_byte_offset_match_nolit_encodeBlockAsm
  992 
  993 two_byte_offset_short_match_nolit_encodeBlockAsm:
  994     CMPL R9, $0x0c
  995     JGE  emit_copy_three_match_nolit_encodeBlockAsm
  996     CMPL BP, $0x00000800
  997     JGE  emit_copy_three_match_nolit_encodeBlockAsm
  998     MOVB $0x01, BL
  999     LEAL -16(BX)(R9*4), R9
 1000     MOVB BP, 1(AX)
 1001     SHRL $0x08, BP
 1002     SHLL $0x05, BP
 1003     ORL  BP, R9
 1004     MOVB R9, (AX)
 1005     ADDQ $0x02, AX
 1006     JMP  match_nolit_emitcopy_end_encodeBlockAsm
 1007 
 1008 emit_copy_three_match_nolit_encodeBlockAsm:
 1009     MOVB $0x02, BL
 1010     LEAL -4(BX)(R9*4), R9
 1011     MOVB R9, (AX)
 1012     MOVW BP, 1(AX)
 1013     ADDQ $0x03, AX
 1014 
 1015 match_nolit_emitcopy_end_encodeBlockAsm:
 1016     CMPL CX, 8(SP)
 1017     JGE  emit_remainder_encodeBlockAsm
 1018     MOVQ -2(DX)(CX*1), SI
 1019     CMPQ AX, (SP)
 1020     JL   match_nolit_dst_ok_encodeBlockAsm
 1021     MOVQ $0x00000000, ret+48(FP)
 1022     RET
 1023 
 1024 match_nolit_dst_ok_encodeBlockAsm:
 1025     MOVQ  $0x0000cf1bbcdcbf9b, R8
 1026     MOVQ  SI, DI
 1027     SHRQ  $0x10, SI
 1028     MOVQ  SI, BP
 1029     SHLQ  $0x10, DI
 1030     IMULQ R8, DI
 1031     SHRQ  $0x32, DI
 1032     SHLQ  $0x10, BP
 1033     IMULQ R8, BP
 1034     SHRQ  $0x32, BP
 1035     LEAL  -2(CX), R8
 1036     LEAQ  24(SP)(BP*4), R9
 1037     MOVL  (R9), BP
 1038     MOVL  R8, 24(SP)(DI*4)
 1039     MOVL  CX, (R9)
 1040     CMPL  (DX)(BP*1), SI
 1041     JEQ   match_nolit_loop_encodeBlockAsm
 1042     INCL  CX
 1043     JMP   search_loop_encodeBlockAsm
 1044 
 1045 emit_remainder_encodeBlockAsm:
 1046     MOVQ src_len+32(FP), CX
 1047     SUBL 12(SP), CX
 1048     LEAQ 5(AX)(CX*1), CX
 1049     CMPQ CX, (SP)
 1050     JL   emit_remainder_ok_encodeBlockAsm
 1051     MOVQ $0x00000000, ret+48(FP)
 1052     RET
 1053 
 1054 emit_remainder_ok_encodeBlockAsm:
 1055     MOVQ src_len+32(FP), CX
 1056     MOVL 12(SP), BX
 1057     CMPL BX, CX
 1058     JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
 1059     MOVL CX, BP
 1060     MOVL CX, 12(SP)
 1061     LEAQ (DX)(BX*1), CX
 1062     SUBL BX, BP
 1063     LEAL -1(BP), DX
 1064     CMPL DX, $0x3c
 1065     JLT  one_byte_emit_remainder_encodeBlockAsm
 1066     CMPL DX, $0x00000100
 1067     JLT  two_bytes_emit_remainder_encodeBlockAsm
 1068     CMPL DX, $0x00010000
 1069     JLT  three_bytes_emit_remainder_encodeBlockAsm
 1070     CMPL DX, $0x01000000
 1071     JLT  four_bytes_emit_remainder_encodeBlockAsm
 1072     MOVB $0xfc, (AX)
 1073     MOVL DX, 1(AX)
 1074     ADDQ $0x05, AX
 1075     JMP  memmove_long_emit_remainder_encodeBlockAsm
 1076 
 1077 four_bytes_emit_remainder_encodeBlockAsm:
 1078     MOVL DX, BX
 1079     SHRL $0x10, BX
 1080     MOVB $0xf8, (AX)
 1081     MOVW DX, 1(AX)
 1082     MOVB BL, 3(AX)
 1083     ADDQ $0x04, AX
 1084     JMP  memmove_long_emit_remainder_encodeBlockAsm
 1085 
 1086 three_bytes_emit_remainder_encodeBlockAsm:
 1087     MOVB $0xf4, (AX)
 1088     MOVW DX, 1(AX)
 1089     ADDQ $0x03, AX
 1090     JMP  memmove_long_emit_remainder_encodeBlockAsm
 1091 
 1092 two_bytes_emit_remainder_encodeBlockAsm:
 1093     MOVB $0xf0, (AX)
 1094     MOVB DL, 1(AX)
 1095     ADDQ $0x02, AX
 1096     CMPL DX, $0x40
 1097     JL   memmove_emit_remainder_encodeBlockAsm
 1098     JMP  memmove_long_emit_remainder_encodeBlockAsm
 1099 
 1100 one_byte_emit_remainder_encodeBlockAsm:
 1101     SHLB $0x02, DL
 1102     MOVB DL, (AX)
 1103     ADDQ $0x01, AX
 1104 
 1105 memmove_emit_remainder_encodeBlockAsm:
 1106     LEAQ (AX)(BP*1), DX
 1107     MOVL BP, BX
 1108 
 1109     // genMemMoveShort
 1110     CMPQ BX, $0x03
 1111     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
 1112     JE   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
 1113     CMPQ BX, $0x08
 1114     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
 1115     CMPQ BX, $0x10
 1116     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
 1117     CMPQ BX, $0x20
 1118     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
 1119     JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
 1120 
 1121 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
 1122     MOVB (CX), BP
 1123     MOVB -1(CX)(BX*1), CL
 1124     MOVB BP, (AX)
 1125     MOVB CL, -1(AX)(BX*1)
 1126     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 1127 
 1128 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
 1129     MOVW (CX), BP
 1130     MOVB 2(CX), CL
 1131     MOVW BP, (AX)
 1132     MOVB CL, 2(AX)
 1133     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 1134 
 1135 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
 1136     MOVL (CX), BP
 1137     MOVL -4(CX)(BX*1), CX
 1138     MOVL BP, (AX)
 1139     MOVL CX, -4(AX)(BX*1)
 1140     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 1141 
 1142 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
 1143     MOVQ (CX), BP
 1144     MOVQ -8(CX)(BX*1), CX
 1145     MOVQ BP, (AX)
 1146     MOVQ CX, -8(AX)(BX*1)
 1147     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 1148 
 1149 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
 1150     MOVOU (CX), X0
 1151     MOVOU -16(CX)(BX*1), X1
 1152     MOVOU X0, (AX)
 1153     MOVOU X1, -16(AX)(BX*1)
 1154     JMP   memmove_end_copy_emit_remainder_encodeBlockAsm
 1155 
 1156 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
 1157     MOVOU (CX), X0
 1158     MOVOU 16(CX), X1
 1159     MOVOU -32(CX)(BX*1), X2
 1160     MOVOU -16(CX)(BX*1), X3
 1161     MOVOU X0, (AX)
 1162     MOVOU X1, 16(AX)
 1163     MOVOU X2, -32(AX)(BX*1)
 1164     MOVOU X3, -16(AX)(BX*1)
 1165 
 1166 memmove_end_copy_emit_remainder_encodeBlockAsm:
 1167     MOVQ DX, AX
 1168     JMP  emit_literal_done_emit_remainder_encodeBlockAsm
 1169 
 1170 memmove_long_emit_remainder_encodeBlockAsm:
 1171     LEAQ (AX)(BP*1), DX
 1172     MOVL BP, BX
 1173 
 1174     // genMemMoveLong
 1175     MOVOU (CX), X0
 1176     MOVOU 16(CX), X1
 1177     MOVOU -32(CX)(BX*1), X2
 1178     MOVOU -16(CX)(BX*1), X3
 1179     MOVQ  BX, SI
 1180     SHRQ  $0x05, SI
 1181     MOVQ  AX, BP
 1182     ANDL  $0x0000001f, BP
 1183     MOVQ  $0x00000040, DI
 1184     SUBQ  BP, DI
 1185     DECQ  SI
 1186     JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
 1187     LEAQ  -32(CX)(DI*1), BP
 1188     LEAQ  -32(AX)(DI*1), R8
 1189 
 1190 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
 1191     MOVOU (BP), X4
 1192     MOVOU 16(BP), X5
 1193     MOVOA X4, (R8)
 1194     MOVOA X5, 16(R8)
 1195     ADDQ  $0x20, R8
 1196     ADDQ  $0x20, BP
 1197     ADDQ  $0x20, DI
 1198     DECQ  SI
 1199     JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
 1200 
 1201 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
 1202     MOVOU -32(CX)(DI*1), X4
 1203     MOVOU -16(CX)(DI*1), X5
 1204     MOVOA X4, -32(AX)(DI*1)
 1205     MOVOA X5, -16(AX)(DI*1)
 1206     ADDQ  $0x20, DI
 1207     CMPQ  BX, DI
 1208     JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
 1209     MOVOU X0, (AX)
 1210     MOVOU X1, 16(AX)
 1211     MOVOU X2, -32(AX)(BX*1)
 1212     MOVOU X3, -16(AX)(BX*1)
 1213     MOVQ  DX, AX
 1214 
 1215 emit_literal_done_emit_remainder_encodeBlockAsm:
 1216     MOVQ dst_base+0(FP), CX
 1217     SUBQ CX, AX
 1218     MOVQ AX, ret+48(FP)
 1219     RET
 1220 
 1221 // func encodeBlockAsm4MB(dst []byte, src []byte) int
 1222 // Requires: SSE2
 1223 TEXT ·encodeBlockAsm4MB(SB), $65560-56
 1224     MOVQ dst_base+0(FP), AX
 1225     MOVQ $0x00000200, CX
 1226     LEAQ 24(SP), DX
 1227     PXOR X0, X0
 1228 
 1229 zero_loop_encodeBlockAsm4MB:
 1230     MOVOU X0, (DX)
 1231     MOVOU X0, 16(DX)
 1232     MOVOU X0, 32(DX)
 1233     MOVOU X0, 48(DX)
 1234     MOVOU X0, 64(DX)
 1235     MOVOU X0, 80(DX)
 1236     MOVOU X0, 96(DX)
 1237     MOVOU X0, 112(DX)
 1238     ADDQ  $0x80, DX
 1239     DECQ  CX
 1240     JNZ   zero_loop_encodeBlockAsm4MB
 1241     MOVL  $0x00000000, 12(SP)
 1242     MOVQ  src_len+32(FP), CX
 1243     LEAQ  -5(CX), DX
 1244     LEAQ  -8(CX), BP
 1245     MOVL  BP, 8(SP)
 1246     SHRQ  $0x05, CX
 1247     SUBL  CX, DX
 1248     LEAQ  (AX)(DX*1), DX
 1249     MOVQ  DX, (SP)
 1250     MOVL  $0x00000001, CX
 1251     MOVL  CX, 16(SP)
 1252     MOVQ  src_base+24(FP), DX
 1253 
 1254 search_loop_encodeBlockAsm4MB:
 1255     MOVQ  (DX)(CX*1), SI
 1256     MOVL  CX, BP
 1257     SUBL  12(SP), BP
 1258     SHRL  $0x06, BP
 1259     LEAL  4(CX)(BP*1), BP
 1260     CMPL  BP, 8(SP)
 1261     JGE   emit_remainder_encodeBlockAsm4MB
 1262     MOVL  BP, 20(SP)
 1263     MOVQ  $0x0000cf1bbcdcbf9b, R8
 1264     MOVQ  SI, R9
 1265     MOVQ  SI, R10
 1266     SHRQ  $0x08, R10
 1267     SHLQ  $0x10, R9
 1268     IMULQ R8, R9
 1269     SHRQ  $0x32, R9
 1270     SHLQ  $0x10, R10
 1271     IMULQ R8, R10
 1272     SHRQ  $0x32, R10
 1273     MOVL  24(SP)(R9*4), BP
 1274     MOVL  24(SP)(R10*4), DI
 1275     MOVL  CX, 24(SP)(R9*4)
 1276     LEAL  1(CX), R9
 1277     MOVL  R9, 24(SP)(R10*4)
 1278     MOVQ  SI, R9
 1279     SHRQ  $0x10, R9
 1280     SHLQ  $0x10, R9
 1281     IMULQ R8, R9
 1282     SHRQ  $0x32, R9
 1283     MOVL  CX, R8
 1284     SUBL  16(SP), R8
 1285     MOVL  1(DX)(R8*1), R10
 1286     MOVQ  SI, R8
 1287     SHRQ  $0x08, R8
 1288     CMPL  R8, R10
 1289     JNE   no_repeat_found_encodeBlockAsm4MB
 1290     LEAL  1(CX), SI
 1291     MOVL  12(SP), DI
 1292     MOVL  SI, BP
 1293     SUBL  16(SP), BP
 1294     JZ    repeat_extend_back_end_encodeBlockAsm4MB
 1295 
 1296 repeat_extend_back_loop_encodeBlockAsm4MB:
 1297     CMPL SI, DI
 1298     JLE  repeat_extend_back_end_encodeBlockAsm4MB
 1299     MOVB -1(DX)(BP*1), BL
 1300     MOVB -1(DX)(SI*1), R8
 1301     CMPB BL, R8
 1302     JNE  repeat_extend_back_end_encodeBlockAsm4MB
 1303     LEAL -1(SI), SI
 1304     DECL BP
 1305     JNZ  repeat_extend_back_loop_encodeBlockAsm4MB
 1306 
 1307 repeat_extend_back_end_encodeBlockAsm4MB:
 1308     MOVL 12(SP), BP
 1309     CMPL BP, SI
 1310     JEQ  emit_literal_done_repeat_emit_encodeBlockAsm4MB
 1311     MOVL SI, R8
 1312     MOVL SI, 12(SP)
 1313     LEAQ (DX)(BP*1), R9
 1314     SUBL BP, R8
 1315     LEAL -1(R8), BP
 1316     CMPL BP, $0x3c
 1317     JLT  one_byte_repeat_emit_encodeBlockAsm4MB
 1318     CMPL BP, $0x00000100
 1319     JLT  two_bytes_repeat_emit_encodeBlockAsm4MB
 1320     CMPL BP, $0x00010000
 1321     JLT  three_bytes_repeat_emit_encodeBlockAsm4MB
 1322     MOVL BP, R10
 1323     SHRL $0x10, R10
 1324     MOVB $0xf8, (AX)
 1325     MOVW BP, 1(AX)
 1326     MOVB R10, 3(AX)
 1327     ADDQ $0x04, AX
 1328     JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 1329 
 1330 three_bytes_repeat_emit_encodeBlockAsm4MB:
 1331     MOVB $0xf4, (AX)
 1332     MOVW BP, 1(AX)
 1333     ADDQ $0x03, AX
 1334     JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 1335 
 1336 two_bytes_repeat_emit_encodeBlockAsm4MB:
 1337     MOVB $0xf0, (AX)
 1338     MOVB BP, 1(AX)
 1339     ADDQ $0x02, AX
 1340     CMPL BP, $0x40
 1341     JL   memmove_repeat_emit_encodeBlockAsm4MB
 1342     JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 1343 
 1344 one_byte_repeat_emit_encodeBlockAsm4MB:
 1345     SHLB $0x02, BP
 1346     MOVB BP, (AX)
 1347     ADDQ $0x01, AX
 1348 
 1349 memmove_repeat_emit_encodeBlockAsm4MB:
 1350     LEAQ (AX)(R8*1), BP
 1351 
 1352     // genMemMoveShort
 1353     CMPQ R8, $0x03
 1354     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_1or2
 1355     JE   emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_3
 1356     CMPQ R8, $0x08
 1357     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_4through7
 1358     CMPQ R8, $0x10
 1359     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
 1360     CMPQ R8, $0x20
 1361     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
 1362     JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
 1363 
 1364 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_1or2:
 1365     MOVB (R9), R10
 1366     MOVB -1(R9)(R8*1), R9
 1367     MOVB R10, (AX)
 1368     MOVB R9, -1(AX)(R8*1)
 1369     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 1370 
 1371 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_3:
 1372     MOVW (R9), R10
 1373     MOVB 2(R9), R9
 1374     MOVW R10, (AX)
 1375     MOVB R9, 2(AX)
 1376     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 1377 
 1378 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_4through7:
 1379     MOVL (R9), R10
 1380     MOVL -4(R9)(R8*1), R9
 1381     MOVL R10, (AX)
 1382     MOVL R9, -4(AX)(R8*1)
 1383     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 1384 
 1385 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
 1386     MOVQ (R9), R10
 1387     MOVQ -8(R9)(R8*1), R9
 1388     MOVQ R10, (AX)
 1389     MOVQ R9, -8(AX)(R8*1)
 1390     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 1391 
 1392 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
 1393     MOVOU (R9), X0
 1394     MOVOU -16(R9)(R8*1), X1
 1395     MOVOU X0, (AX)
 1396     MOVOU X1, -16(AX)(R8*1)
 1397     JMP   memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 1398 
 1399 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
 1400     MOVOU (R9), X0
 1401     MOVOU 16(R9), X1
 1402     MOVOU -32(R9)(R8*1), X2
 1403     MOVOU -16(R9)(R8*1), X3
 1404     MOVOU X0, (AX)
 1405     MOVOU X1, 16(AX)
 1406     MOVOU X2, -32(AX)(R8*1)
 1407     MOVOU X3, -16(AX)(R8*1)
 1408 
 1409 memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
 1410     MOVQ BP, AX
 1411     JMP  emit_literal_done_repeat_emit_encodeBlockAsm4MB
 1412 
 1413 memmove_long_repeat_emit_encodeBlockAsm4MB:
 1414     LEAQ (AX)(R8*1), BP
 1415 
 1416     // genMemMoveLong
 1417     MOVOU (R9), X0
 1418     MOVOU 16(R9), X1
 1419     MOVOU -32(R9)(R8*1), X2
 1420     MOVOU -16(R9)(R8*1), X3
 1421     MOVQ  R8, R11
 1422     SHRQ  $0x05, R11
 1423     MOVQ  AX, R10
 1424     ANDL  $0x0000001f, R10
 1425     MOVQ  $0x00000040, R12
 1426     SUBQ  R10, R12
 1427     DECQ  R11
 1428     JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 1429     LEAQ  -32(R9)(R12*1), R10
 1430     LEAQ  -32(AX)(R12*1), R13
 1431 
 1432 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
 1433     MOVOU (R10), X4
 1434     MOVOU 16(R10), X5
 1435     MOVOA X4, (R13)
 1436     MOVOA X5, 16(R13)
 1437     ADDQ  $0x20, R13
 1438     ADDQ  $0x20, R10
 1439     ADDQ  $0x20, R12
 1440     DECQ  R11
 1441     JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
 1442 
 1443 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
 1444     MOVOU -32(R9)(R12*1), X4
 1445     MOVOU -16(R9)(R12*1), X5
 1446     MOVOA X4, -32(AX)(R12*1)
 1447     MOVOA X5, -16(AX)(R12*1)
 1448     ADDQ  $0x20, R12
 1449     CMPQ  R8, R12
 1450     JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 1451     MOVOU X0, (AX)
 1452     MOVOU X1, 16(AX)
 1453     MOVOU X2, -32(AX)(R8*1)
 1454     MOVOU X3, -16(AX)(R8*1)
 1455     MOVQ  BP, AX
 1456 
 1457 emit_literal_done_repeat_emit_encodeBlockAsm4MB:
 1458     ADDL $0x05, CX
 1459     MOVL CX, BP
 1460     SUBL 16(SP), BP
 1461     MOVQ src_len+32(FP), R8
 1462     SUBL CX, R8
 1463     LEAQ (DX)(CX*1), R9
 1464     LEAQ (DX)(BP*1), BP
 1465 
 1466     // matchLen
 1467     XORL R11, R11
 1468     CMPL R8, $0x08
 1469     JL   matchlen_single_repeat_extend_encodeBlockAsm4MB
 1470 
 1471 matchlen_loopback_repeat_extend_encodeBlockAsm4MB:
 1472     MOVQ  (R9)(R11*1), R10
 1473     XORQ  (BP)(R11*1), R10
 1474     TESTQ R10, R10
 1475     JZ    matchlen_loop_repeat_extend_encodeBlockAsm4MB
 1476     BSFQ  R10, R10
 1477     SARQ  $0x03, R10
 1478     LEAL  (R11)(R10*1), R11
 1479     JMP   repeat_extend_forward_end_encodeBlockAsm4MB
 1480 
 1481 matchlen_loop_repeat_extend_encodeBlockAsm4MB:
 1482     LEAL -8(R8), R8
 1483     LEAL 8(R11), R11
 1484     CMPL R8, $0x08
 1485     JGE  matchlen_loopback_repeat_extend_encodeBlockAsm4MB
 1486 
 1487 matchlen_single_repeat_extend_encodeBlockAsm4MB:
 1488     TESTL R8, R8
 1489     JZ    repeat_extend_forward_end_encodeBlockAsm4MB
 1490 
 1491 matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB:
 1492     MOVB (R9)(R11*1), R10
 1493     CMPB (BP)(R11*1), R10
 1494     JNE  repeat_extend_forward_end_encodeBlockAsm4MB
 1495     LEAL 1(R11), R11
 1496     DECL R8
 1497     JNZ  matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB
 1498 
 1499 repeat_extend_forward_end_encodeBlockAsm4MB:
 1500     ADDL  R11, CX
 1501     MOVL  CX, BP
 1502     SUBL  SI, BP
 1503     MOVL  16(SP), SI
 1504     TESTL DI, DI
 1505     JZ    repeat_as_copy_encodeBlockAsm4MB
 1506 
 1507     // emitRepeat
 1508     MOVL BP, DI
 1509     LEAL -4(BP), BP
 1510     CMPL DI, $0x08
 1511     JLE  repeat_two_match_repeat_encodeBlockAsm4MB
 1512     CMPL DI, $0x0c
 1513     JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
 1514     CMPL SI, $0x00000800
 1515     JLT  repeat_two_offset_match_repeat_encodeBlockAsm4MB
 1516 
 1517 cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
 1518     CMPL BP, $0x00000104
 1519     JLT  repeat_three_match_repeat_encodeBlockAsm4MB
 1520     CMPL BP, $0x00010100
 1521     JLT  repeat_four_match_repeat_encodeBlockAsm4MB
 1522     LEAL -65536(BP), BP
 1523     MOVL BP, SI
 1524     MOVW $0x001d, (AX)
 1525     MOVW BP, 2(AX)
 1526     SARL $0x10, SI
 1527     MOVB SI, 4(AX)
 1528     ADDQ $0x05, AX
 1529     JMP  repeat_end_emit_encodeBlockAsm4MB
 1530 
 1531 repeat_four_match_repeat_encodeBlockAsm4MB:
 1532     LEAL -256(BP), BP
 1533     MOVW $0x0019, (AX)
 1534     MOVW BP, 2(AX)
 1535     ADDQ $0x04, AX
 1536     JMP  repeat_end_emit_encodeBlockAsm4MB
 1537 
 1538 repeat_three_match_repeat_encodeBlockAsm4MB:
 1539     LEAL -4(BP), BP
 1540     MOVW $0x0015, (AX)
 1541     MOVB BP, 2(AX)
 1542     ADDQ $0x03, AX
 1543     JMP  repeat_end_emit_encodeBlockAsm4MB
 1544 
 1545 repeat_two_match_repeat_encodeBlockAsm4MB:
 1546     SHLL $0x02, BP
 1547     ORL  $0x01, BP
 1548     MOVW BP, (AX)
 1549     ADDQ $0x02, AX
 1550     JMP  repeat_end_emit_encodeBlockAsm4MB
 1551 
 1552 repeat_two_offset_match_repeat_encodeBlockAsm4MB:
 1553     XORQ DI, DI
 1554     LEAL 1(DI)(BP*4), BP
 1555     MOVB SI, 1(AX)
 1556     SARL $0x08, SI
 1557     SHLL $0x05, SI
 1558     ORL  SI, BP
 1559     MOVB BP, (AX)
 1560     ADDQ $0x02, AX
 1561     JMP  repeat_end_emit_encodeBlockAsm4MB
 1562 
 1563 repeat_as_copy_encodeBlockAsm4MB:
 1564     // emitCopy
 1565     CMPL SI, $0x00010000
 1566     JL   two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
 1567 
 1568 four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB:
 1569     CMPL BP, $0x40
 1570     JLE  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
 1571     MOVB $0xff, (AX)
 1572     MOVL SI, 1(AX)
 1573     LEAL -64(BP), BP
 1574     ADDQ $0x05, AX
 1575     CMPL BP, $0x04
 1576     JL   four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
 1577 
 1578     // emitRepeat
 1579     MOVL BP, DI
 1580     LEAL -4(BP), BP
 1581     CMPL DI, $0x08
 1582     JLE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 1583     CMPL DI, $0x0c
 1584     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 1585     CMPL SI, $0x00000800
 1586     JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 1587 
 1588 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 1589     CMPL BP, $0x00000104
 1590     JLT  repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 1591     CMPL BP, $0x00010100
 1592     JLT  repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 1593     LEAL -65536(BP), BP
 1594     MOVL BP, SI
 1595     MOVW $0x001d, (AX)
 1596     MOVW BP, 2(AX)
 1597     SARL $0x10, SI
 1598     MOVB SI, 4(AX)
 1599     ADDQ $0x05, AX
 1600     JMP  repeat_end_emit_encodeBlockAsm4MB
 1601 
 1602 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 1603     LEAL -256(BP), BP
 1604     MOVW $0x0019, (AX)
 1605     MOVW BP, 2(AX)
 1606     ADDQ $0x04, AX
 1607     JMP  repeat_end_emit_encodeBlockAsm4MB
 1608 
 1609 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 1610     LEAL -4(BP), BP
 1611     MOVW $0x0015, (AX)
 1612     MOVB BP, 2(AX)
 1613     ADDQ $0x03, AX
 1614     JMP  repeat_end_emit_encodeBlockAsm4MB
 1615 
 1616 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 1617     SHLL $0x02, BP
 1618     ORL  $0x01, BP
 1619     MOVW BP, (AX)
 1620     ADDQ $0x02, AX
 1621     JMP  repeat_end_emit_encodeBlockAsm4MB
 1622 
 1623 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
 1624     XORQ DI, DI
 1625     LEAL 1(DI)(BP*4), BP
 1626     MOVB SI, 1(AX)
 1627     SARL $0x08, SI
 1628     SHLL $0x05, SI
 1629     ORL  SI, BP
 1630     MOVB BP, (AX)
 1631     ADDQ $0x02, AX
 1632     JMP  repeat_end_emit_encodeBlockAsm4MB
 1633     JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB
 1634 
 1635 four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
 1636     TESTL BP, BP
 1637     JZ    repeat_end_emit_encodeBlockAsm4MB
 1638     MOVB  $0x03, BL
 1639     LEAL  -4(BX)(BP*4), BP
 1640     MOVB  BP, (AX)
 1641     MOVL  SI, 1(AX)
 1642     ADDQ  $0x05, AX
 1643     JMP   repeat_end_emit_encodeBlockAsm4MB
 1644 
 1645 two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
 1646     CMPL BP, $0x40
 1647     JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
 1648     MOVB $0xee, (AX)
 1649     MOVW SI, 1(AX)
 1650     LEAL -60(BP), BP
 1651     ADDQ $0x03, AX
 1652 
 1653     // emitRepeat
 1654     MOVL BP, DI
 1655     LEAL -4(BP), BP
 1656     CMPL DI, $0x08
 1657     JLE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 1658     CMPL DI, $0x0c
 1659     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 1660     CMPL SI, $0x00000800
 1661     JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 1662 
 1663 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 1664     CMPL BP, $0x00000104
 1665     JLT  repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 1666     CMPL BP, $0x00010100
 1667     JLT  repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 1668     LEAL -65536(BP), BP
 1669     MOVL BP, SI
 1670     MOVW $0x001d, (AX)
 1671     MOVW BP, 2(AX)
 1672     SARL $0x10, SI
 1673     MOVB SI, 4(AX)
 1674     ADDQ $0x05, AX
 1675     JMP  repeat_end_emit_encodeBlockAsm4MB
 1676 
 1677 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 1678     LEAL -256(BP), BP
 1679     MOVW $0x0019, (AX)
 1680     MOVW BP, 2(AX)
 1681     ADDQ $0x04, AX
 1682     JMP  repeat_end_emit_encodeBlockAsm4MB
 1683 
 1684 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 1685     LEAL -4(BP), BP
 1686     MOVW $0x0015, (AX)
 1687     MOVB BP, 2(AX)
 1688     ADDQ $0x03, AX
 1689     JMP  repeat_end_emit_encodeBlockAsm4MB
 1690 
 1691 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 1692     SHLL $0x02, BP
 1693     ORL  $0x01, BP
 1694     MOVW BP, (AX)
 1695     ADDQ $0x02, AX
 1696     JMP  repeat_end_emit_encodeBlockAsm4MB
 1697 
 1698 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
 1699     XORQ DI, DI
 1700     LEAL 1(DI)(BP*4), BP
 1701     MOVB SI, 1(AX)
 1702     SARL $0x08, SI
 1703     SHLL $0x05, SI
 1704     ORL  SI, BP
 1705     MOVB BP, (AX)
 1706     ADDQ $0x02, AX
 1707     JMP  repeat_end_emit_encodeBlockAsm4MB
 1708     JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
 1709 
 1710 two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
 1711     CMPL BP, $0x0c
 1712     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
 1713     CMPL SI, $0x00000800
 1714     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
 1715     MOVB $0x01, BL
 1716     LEAL -16(BX)(BP*4), BP
 1717     MOVB SI, 1(AX)
 1718     SHRL $0x08, SI
 1719     SHLL $0x05, SI
 1720     ORL  SI, BP
 1721     MOVB BP, (AX)
 1722     ADDQ $0x02, AX
 1723     JMP  repeat_end_emit_encodeBlockAsm4MB
 1724 
 1725 emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
 1726     MOVB $0x02, BL
 1727     LEAL -4(BX)(BP*4), BP
 1728     MOVB BP, (AX)
 1729     MOVW SI, 1(AX)
 1730     ADDQ $0x03, AX
 1731 
 1732 repeat_end_emit_encodeBlockAsm4MB:
 1733     MOVL CX, 12(SP)
 1734     JMP  search_loop_encodeBlockAsm4MB
 1735 
 1736 no_repeat_found_encodeBlockAsm4MB:
 1737     CMPL (DX)(BP*1), SI
 1738     JEQ  candidate_match_encodeBlockAsm4MB
 1739     SHRQ $0x08, SI
 1740     MOVL 24(SP)(R9*4), BP
 1741     LEAL 2(CX), R8
 1742     CMPL (DX)(DI*1), SI
 1743     JEQ  candidate2_match_encodeBlockAsm4MB
 1744     MOVL R8, 24(SP)(R9*4)
 1745     SHRQ $0x08, SI
 1746     CMPL (DX)(BP*1), SI
 1747     JEQ  candidate3_match_encodeBlockAsm4MB
 1748     MOVL 20(SP), CX
 1749     JMP  search_loop_encodeBlockAsm4MB
 1750 
 1751 candidate3_match_encodeBlockAsm4MB:
 1752     ADDL $0x02, CX
 1753     JMP  candidate_match_encodeBlockAsm4MB
 1754 
 1755 candidate2_match_encodeBlockAsm4MB:
 1756     MOVL R8, 24(SP)(R9*4)
 1757     INCL CX
 1758     MOVL DI, BP
 1759 
 1760 candidate_match_encodeBlockAsm4MB:
 1761     MOVL  12(SP), SI
 1762     TESTL BP, BP
 1763     JZ    match_extend_back_end_encodeBlockAsm4MB
 1764 
 1765 match_extend_back_loop_encodeBlockAsm4MB:
 1766     CMPL CX, SI
 1767     JLE  match_extend_back_end_encodeBlockAsm4MB
 1768     MOVB -1(DX)(BP*1), BL
 1769     MOVB -1(DX)(CX*1), DI
 1770     CMPB BL, DI
 1771     JNE  match_extend_back_end_encodeBlockAsm4MB
 1772     LEAL -1(CX), CX
 1773     DECL BP
 1774     JZ   match_extend_back_end_encodeBlockAsm4MB
 1775     JMP  match_extend_back_loop_encodeBlockAsm4MB
 1776 
 1777 match_extend_back_end_encodeBlockAsm4MB:
 1778     MOVL CX, SI
 1779     SUBL 12(SP), SI
 1780     LEAQ 4(AX)(SI*1), SI
 1781     CMPQ SI, (SP)
 1782     JL   match_dst_size_check_encodeBlockAsm4MB
 1783     MOVQ $0x00000000, ret+48(FP)
 1784     RET
 1785 
 1786 match_dst_size_check_encodeBlockAsm4MB:
 1787     MOVL CX, SI
 1788     MOVL 12(SP), DI
 1789     CMPL DI, SI
 1790     JEQ  emit_literal_done_match_emit_encodeBlockAsm4MB
 1791     MOVL SI, R8
 1792     MOVL SI, 12(SP)
 1793     LEAQ (DX)(DI*1), SI
 1794     SUBL DI, R8
 1795     LEAL -1(R8), DI
 1796     CMPL DI, $0x3c
 1797     JLT  one_byte_match_emit_encodeBlockAsm4MB
 1798     CMPL DI, $0x00000100
 1799     JLT  two_bytes_match_emit_encodeBlockAsm4MB
 1800     CMPL DI, $0x00010000
 1801     JLT  three_bytes_match_emit_encodeBlockAsm4MB
 1802     MOVL DI, R9
 1803     SHRL $0x10, R9
 1804     MOVB $0xf8, (AX)
 1805     MOVW DI, 1(AX)
 1806     MOVB R9, 3(AX)
 1807     ADDQ $0x04, AX
 1808     JMP  memmove_long_match_emit_encodeBlockAsm4MB
 1809 
 1810 three_bytes_match_emit_encodeBlockAsm4MB:
 1811     MOVB $0xf4, (AX)
 1812     MOVW DI, 1(AX)
 1813     ADDQ $0x03, AX
 1814     JMP  memmove_long_match_emit_encodeBlockAsm4MB
 1815 
 1816 two_bytes_match_emit_encodeBlockAsm4MB:
 1817     MOVB $0xf0, (AX)
 1818     MOVB DI, 1(AX)
 1819     ADDQ $0x02, AX
 1820     CMPL DI, $0x40
 1821     JL   memmove_match_emit_encodeBlockAsm4MB
 1822     JMP  memmove_long_match_emit_encodeBlockAsm4MB
 1823 
 1824 one_byte_match_emit_encodeBlockAsm4MB:
 1825     SHLB $0x02, DI
 1826     MOVB DI, (AX)
 1827     ADDQ $0x01, AX
 1828 
 1829 memmove_match_emit_encodeBlockAsm4MB:
 1830     LEAQ (AX)(R8*1), DI
 1831 
 1832     // genMemMoveShort
 1833     CMPQ R8, $0x03
 1834     JB   emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_1or2
 1835     JE   emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_3
 1836     CMPQ R8, $0x08
 1837     JB   emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_4through7
 1838     CMPQ R8, $0x10
 1839     JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
 1840     CMPQ R8, $0x20
 1841     JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
 1842     JMP  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
 1843 
 1844 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_1or2:
 1845     MOVB (SI), R9
 1846     MOVB -1(SI)(R8*1), SI
 1847     MOVB R9, (AX)
 1848     MOVB SI, -1(AX)(R8*1)
 1849     JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 1850 
 1851 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_3:
 1852     MOVW (SI), R9
 1853     MOVB 2(SI), SI
 1854     MOVW R9, (AX)
 1855     MOVB SI, 2(AX)
 1856     JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 1857 
 1858 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_4through7:
 1859     MOVL (SI), R9
 1860     MOVL -4(SI)(R8*1), SI
 1861     MOVL R9, (AX)
 1862     MOVL SI, -4(AX)(R8*1)
 1863     JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 1864 
 1865 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
 1866     MOVQ (SI), R9
 1867     MOVQ -8(SI)(R8*1), SI
 1868     MOVQ R9, (AX)
 1869     MOVQ SI, -8(AX)(R8*1)
 1870     JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 1871 
 1872 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
 1873     MOVOU (SI), X0
 1874     MOVOU -16(SI)(R8*1), X1
 1875     MOVOU X0, (AX)
 1876     MOVOU X1, -16(AX)(R8*1)
 1877     JMP   memmove_end_copy_match_emit_encodeBlockAsm4MB
 1878 
 1879 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
 1880     MOVOU (SI), X0
 1881     MOVOU 16(SI), X1
 1882     MOVOU -32(SI)(R8*1), X2
 1883     MOVOU -16(SI)(R8*1), X3
 1884     MOVOU X0, (AX)
 1885     MOVOU X1, 16(AX)
 1886     MOVOU X2, -32(AX)(R8*1)
 1887     MOVOU X3, -16(AX)(R8*1)
 1888 
 1889 memmove_end_copy_match_emit_encodeBlockAsm4MB:
 1890     MOVQ DI, AX
 1891     JMP  emit_literal_done_match_emit_encodeBlockAsm4MB
 1892 
 1893 memmove_long_match_emit_encodeBlockAsm4MB:
 1894     LEAQ (AX)(R8*1), DI
 1895 
 1896     // genMemMoveLong
 1897     MOVOU (SI), X0
 1898     MOVOU 16(SI), X1
 1899     MOVOU -32(SI)(R8*1), X2
 1900     MOVOU -16(SI)(R8*1), X3
 1901     MOVQ  R8, R10
 1902     SHRQ  $0x05, R10
 1903     MOVQ  AX, R9
 1904     ANDL  $0x0000001f, R9
 1905     MOVQ  $0x00000040, R11
 1906     SUBQ  R9, R11
 1907     DECQ  R10
 1908     JA    emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 1909     LEAQ  -32(SI)(R11*1), R9
 1910     LEAQ  -32(AX)(R11*1), R12
 1911 
 1912 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
 1913     MOVOU (R9), X4
 1914     MOVOU 16(R9), X5
 1915     MOVOA X4, (R12)
 1916     MOVOA X5, 16(R12)
 1917     ADDQ  $0x20, R12
 1918     ADDQ  $0x20, R9
 1919     ADDQ  $0x20, R11
 1920     DECQ  R10
 1921     JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
 1922 
 1923 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
 1924     MOVOU -32(SI)(R11*1), X4
 1925     MOVOU -16(SI)(R11*1), X5
 1926     MOVOA X4, -32(AX)(R11*1)
 1927     MOVOA X5, -16(AX)(R11*1)
 1928     ADDQ  $0x20, R11
 1929     CMPQ  R8, R11
 1930     JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
 1931     MOVOU X0, (AX)
 1932     MOVOU X1, 16(AX)
 1933     MOVOU X2, -32(AX)(R8*1)
 1934     MOVOU X3, -16(AX)(R8*1)
 1935     MOVQ  DI, AX
 1936 
 1937 emit_literal_done_match_emit_encodeBlockAsm4MB:
 1938 match_nolit_loop_encodeBlockAsm4MB:
 1939     MOVL CX, SI
 1940     SUBL BP, SI
 1941     MOVL SI, 16(SP)
 1942     ADDL $0x04, CX
 1943     ADDL $0x04, BP
 1944     MOVQ src_len+32(FP), SI
 1945     SUBL CX, SI
 1946     LEAQ (DX)(CX*1), DI
 1947     LEAQ (DX)(BP*1), BP
 1948 
 1949     // matchLen
 1950     XORL R9, R9
 1951     CMPL SI, $0x08
 1952     JL   matchlen_single_match_nolit_encodeBlockAsm4MB
 1953 
 1954 matchlen_loopback_match_nolit_encodeBlockAsm4MB:
 1955     MOVQ  (DI)(R9*1), R8
 1956     XORQ  (BP)(R9*1), R8
 1957     TESTQ R8, R8
 1958     JZ    matchlen_loop_match_nolit_encodeBlockAsm4MB
 1959     BSFQ  R8, R8
 1960     SARQ  $0x03, R8
 1961     LEAL  (R9)(R8*1), R9
 1962     JMP   match_nolit_end_encodeBlockAsm4MB
 1963 
 1964 matchlen_loop_match_nolit_encodeBlockAsm4MB:
 1965     LEAL -8(SI), SI
 1966     LEAL 8(R9), R9
 1967     CMPL SI, $0x08
 1968     JGE  matchlen_loopback_match_nolit_encodeBlockAsm4MB
 1969 
 1970 matchlen_single_match_nolit_encodeBlockAsm4MB:
 1971     TESTL SI, SI
 1972     JZ    match_nolit_end_encodeBlockAsm4MB
 1973 
 1974 matchlen_single_loopback_match_nolit_encodeBlockAsm4MB:
 1975     MOVB (DI)(R9*1), R8
 1976     CMPB (BP)(R9*1), R8
 1977     JNE  match_nolit_end_encodeBlockAsm4MB
 1978     LEAL 1(R9), R9
 1979     DECL SI
 1980     JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm4MB
 1981 
 1982 match_nolit_end_encodeBlockAsm4MB:
 1983     ADDL R9, CX
 1984     MOVL 16(SP), BP
 1985     ADDL $0x04, R9
 1986     MOVL CX, 12(SP)
 1987 
 1988     // emitCopy
 1989     CMPL BP, $0x00010000
 1990     JL   two_byte_offset_match_nolit_encodeBlockAsm4MB
 1991 
 1992 four_bytes_loop_back_match_nolit_encodeBlockAsm4MB:
 1993     CMPL R9, $0x40
 1994     JLE  four_bytes_remain_match_nolit_encodeBlockAsm4MB
 1995     MOVB $0xff, (AX)
 1996     MOVL BP, 1(AX)
 1997     LEAL -64(R9), R9
 1998     ADDQ $0x05, AX
 1999     CMPL R9, $0x04
 2000     JL   four_bytes_remain_match_nolit_encodeBlockAsm4MB
 2001 
 2002     // emitRepeat
 2003     MOVL R9, SI
 2004     LEAL -4(R9), R9
 2005     CMPL SI, $0x08
 2006     JLE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
 2007     CMPL SI, $0x0c
 2008     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
 2009     CMPL BP, $0x00000800
 2010     JLT  repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
 2011 
 2012 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
 2013     CMPL R9, $0x00000104
 2014     JLT  repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
 2015     CMPL R9, $0x00010100
 2016     JLT  repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
 2017     LEAL -65536(R9), R9
 2018     MOVL R9, BP
 2019     MOVW $0x001d, (AX)
 2020     MOVW R9, 2(AX)
 2021     SARL $0x10, BP
 2022     MOVB BP, 4(AX)
 2023     ADDQ $0x05, AX
 2024     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2025 
 2026 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
 2027     LEAL -256(R9), R9
 2028     MOVW $0x0019, (AX)
 2029     MOVW R9, 2(AX)
 2030     ADDQ $0x04, AX
 2031     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2032 
 2033 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
 2034     LEAL -4(R9), R9
 2035     MOVW $0x0015, (AX)
 2036     MOVB R9, 2(AX)
 2037     ADDQ $0x03, AX
 2038     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2039 
 2040 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
 2041     SHLL $0x02, R9
 2042     ORL  $0x01, R9
 2043     MOVW R9, (AX)
 2044     ADDQ $0x02, AX
 2045     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2046 
 2047 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
 2048     XORQ SI, SI
 2049     LEAL 1(SI)(R9*4), R9
 2050     MOVB BP, 1(AX)
 2051     SARL $0x08, BP
 2052     SHLL $0x05, BP
 2053     ORL  BP, R9
 2054     MOVB R9, (AX)
 2055     ADDQ $0x02, AX
 2056     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2057     JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB
 2058 
 2059 four_bytes_remain_match_nolit_encodeBlockAsm4MB:
 2060     TESTL R9, R9
 2061     JZ    match_nolit_emitcopy_end_encodeBlockAsm4MB
 2062     MOVB  $0x03, BL
 2063     LEAL  -4(BX)(R9*4), R9
 2064     MOVB  R9, (AX)
 2065     MOVL  BP, 1(AX)
 2066     ADDQ  $0x05, AX
 2067     JMP   match_nolit_emitcopy_end_encodeBlockAsm4MB
 2068 
 2069 two_byte_offset_match_nolit_encodeBlockAsm4MB:
 2070     CMPL R9, $0x40
 2071     JLE  two_byte_offset_short_match_nolit_encodeBlockAsm4MB
 2072     MOVB $0xee, (AX)
 2073     MOVW BP, 1(AX)
 2074     LEAL -60(R9), R9
 2075     ADDQ $0x03, AX
 2076 
 2077     // emitRepeat
 2078     MOVL R9, SI
 2079     LEAL -4(R9), R9
 2080     CMPL SI, $0x08
 2081     JLE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
 2082     CMPL SI, $0x0c
 2083     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
 2084     CMPL BP, $0x00000800
 2085     JLT  repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
 2086 
 2087 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 2088     CMPL R9, $0x00000104
 2089     JLT  repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
 2090     CMPL R9, $0x00010100
 2091     JLT  repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
 2092     LEAL -65536(R9), R9
 2093     MOVL R9, BP
 2094     MOVW $0x001d, (AX)
 2095     MOVW R9, 2(AX)
 2096     SARL $0x10, BP
 2097     MOVB BP, 4(AX)
 2098     ADDQ $0x05, AX
 2099     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2100 
 2101 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 2102     LEAL -256(R9), R9
 2103     MOVW $0x0019, (AX)
 2104     MOVW R9, 2(AX)
 2105     ADDQ $0x04, AX
 2106     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2107 
 2108 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 2109     LEAL -4(R9), R9
 2110     MOVW $0x0015, (AX)
 2111     MOVB R9, 2(AX)
 2112     ADDQ $0x03, AX
 2113     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2114 
 2115 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 2116     SHLL $0x02, R9
 2117     ORL  $0x01, R9
 2118     MOVW R9, (AX)
 2119     ADDQ $0x02, AX
 2120     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2121 
 2122 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
 2123     XORQ SI, SI
 2124     LEAL 1(SI)(R9*4), R9
 2125     MOVB BP, 1(AX)
 2126     SARL $0x08, BP
 2127     SHLL $0x05, BP
 2128     ORL  BP, R9
 2129     MOVB R9, (AX)
 2130     ADDQ $0x02, AX
 2131     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2132     JMP two_byte_offset_match_nolit_encodeBlockAsm4MB
 2133 
 2134 two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
 2135     CMPL R9, $0x0c
 2136     JGE  emit_copy_three_match_nolit_encodeBlockAsm4MB
 2137     CMPL BP, $0x00000800
 2138     JGE  emit_copy_three_match_nolit_encodeBlockAsm4MB
 2139     MOVB $0x01, BL
 2140     LEAL -16(BX)(R9*4), R9
 2141     MOVB BP, 1(AX)
 2142     SHRL $0x08, BP
 2143     SHLL $0x05, BP
 2144     ORL  BP, R9
 2145     MOVB R9, (AX)
 2146     ADDQ $0x02, AX
 2147     JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 2148 
 2149 emit_copy_three_match_nolit_encodeBlockAsm4MB:
 2150     MOVB $0x02, BL
 2151     LEAL -4(BX)(R9*4), R9
 2152     MOVB R9, (AX)
 2153     MOVW BP, 1(AX)
 2154     ADDQ $0x03, AX
 2155 
 2156 match_nolit_emitcopy_end_encodeBlockAsm4MB:
 2157     CMPL CX, 8(SP)
 2158     JGE  emit_remainder_encodeBlockAsm4MB
 2159     MOVQ -2(DX)(CX*1), SI
 2160     CMPQ AX, (SP)
 2161     JL   match_nolit_dst_ok_encodeBlockAsm4MB
 2162     MOVQ $0x00000000, ret+48(FP)
 2163     RET
 2164 
 2165 match_nolit_dst_ok_encodeBlockAsm4MB:
 2166     MOVQ  $0x0000cf1bbcdcbf9b, R8
 2167     MOVQ  SI, DI
 2168     SHRQ  $0x10, SI
 2169     MOVQ  SI, BP
 2170     SHLQ  $0x10, DI
 2171     IMULQ R8, DI
 2172     SHRQ  $0x32, DI
 2173     SHLQ  $0x10, BP
 2174     IMULQ R8, BP
 2175     SHRQ  $0x32, BP
 2176     LEAL  -2(CX), R8
 2177     LEAQ  24(SP)(BP*4), R9
 2178     MOVL  (R9), BP
 2179     MOVL  R8, 24(SP)(DI*4)
 2180     MOVL  CX, (R9)
 2181     CMPL  (DX)(BP*1), SI
 2182     JEQ   match_nolit_loop_encodeBlockAsm4MB
 2183     INCL  CX
 2184     JMP   search_loop_encodeBlockAsm4MB
 2185 
 2186 emit_remainder_encodeBlockAsm4MB:
 2187     MOVQ src_len+32(FP), CX
 2188     SUBL 12(SP), CX
 2189     LEAQ 4(AX)(CX*1), CX
 2190     CMPQ CX, (SP)
 2191     JL   emit_remainder_ok_encodeBlockAsm4MB
 2192     MOVQ $0x00000000, ret+48(FP)
 2193     RET
 2194 
 2195 emit_remainder_ok_encodeBlockAsm4MB:
 2196     MOVQ src_len+32(FP), CX
 2197     MOVL 12(SP), BX
 2198     CMPL BX, CX
 2199     JEQ  emit_literal_done_emit_remainder_encodeBlockAsm4MB
 2200     MOVL CX, BP
 2201     MOVL CX, 12(SP)
 2202     LEAQ (DX)(BX*1), CX
 2203     SUBL BX, BP
 2204     LEAL -1(BP), DX
 2205     CMPL DX, $0x3c
 2206     JLT  one_byte_emit_remainder_encodeBlockAsm4MB
 2207     CMPL DX, $0x00000100
 2208     JLT  two_bytes_emit_remainder_encodeBlockAsm4MB
 2209     CMPL DX, $0x00010000
 2210     JLT  three_bytes_emit_remainder_encodeBlockAsm4MB
 2211     MOVL DX, BX
 2212     SHRL $0x10, BX
 2213     MOVB $0xf8, (AX)
 2214     MOVW DX, 1(AX)
 2215     MOVB BL, 3(AX)
 2216     ADDQ $0x04, AX
 2217     JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 2218 
 2219 three_bytes_emit_remainder_encodeBlockAsm4MB:
 2220     MOVB $0xf4, (AX)
 2221     MOVW DX, 1(AX)
 2222     ADDQ $0x03, AX
 2223     JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 2224 
 2225 two_bytes_emit_remainder_encodeBlockAsm4MB:
 2226     MOVB $0xf0, (AX)
 2227     MOVB DL, 1(AX)
 2228     ADDQ $0x02, AX
 2229     CMPL DX, $0x40
 2230     JL   memmove_emit_remainder_encodeBlockAsm4MB
 2231     JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 2232 
 2233 one_byte_emit_remainder_encodeBlockAsm4MB:
 2234     SHLB $0x02, DL
 2235     MOVB DL, (AX)
 2236     ADDQ $0x01, AX
 2237 
 2238 memmove_emit_remainder_encodeBlockAsm4MB:
 2239     LEAQ (AX)(BP*1), DX
 2240     MOVL BP, BX
 2241 
 2242     // genMemMoveShort
 2243     CMPQ BX, $0x03
 2244     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2
 2245     JE   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3
 2246     CMPQ BX, $0x08
 2247     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7
 2248     CMPQ BX, $0x10
 2249     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
 2250     CMPQ BX, $0x20
 2251     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
 2252     JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
 2253 
 2254 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
 2255     MOVB (CX), BP
 2256     MOVB -1(CX)(BX*1), CL
 2257     MOVB BP, (AX)
 2258     MOVB CL, -1(AX)(BX*1)
 2259     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 2260 
 2261 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
 2262     MOVW (CX), BP
 2263     MOVB 2(CX), CL
 2264     MOVW BP, (AX)
 2265     MOVB CL, 2(AX)
 2266     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 2267 
 2268 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
 2269     MOVL (CX), BP
 2270     MOVL -4(CX)(BX*1), CX
 2271     MOVL BP, (AX)
 2272     MOVL CX, -4(AX)(BX*1)
 2273     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 2274 
 2275 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
 2276     MOVQ (CX), BP
 2277     MOVQ -8(CX)(BX*1), CX
 2278     MOVQ BP, (AX)
 2279     MOVQ CX, -8(AX)(BX*1)
 2280     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 2281 
 2282 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
 2283     MOVOU (CX), X0
 2284     MOVOU -16(CX)(BX*1), X1
 2285     MOVOU X0, (AX)
 2286     MOVOU X1, -16(AX)(BX*1)
 2287     JMP   memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 2288 
 2289 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
 2290     MOVOU (CX), X0
 2291     MOVOU 16(CX), X1
 2292     MOVOU -32(CX)(BX*1), X2
 2293     MOVOU -16(CX)(BX*1), X3
 2294     MOVOU X0, (AX)
 2295     MOVOU X1, 16(AX)
 2296     MOVOU X2, -32(AX)(BX*1)
 2297     MOVOU X3, -16(AX)(BX*1)
 2298 
 2299 memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
 2300     MOVQ DX, AX
 2301     JMP  emit_literal_done_emit_remainder_encodeBlockAsm4MB
 2302 
 2303 memmove_long_emit_remainder_encodeBlockAsm4MB:
 2304     LEAQ (AX)(BP*1), DX
 2305     MOVL BP, BX
 2306 
 2307     // genMemMoveLong
 2308     MOVOU (CX), X0
 2309     MOVOU 16(CX), X1
 2310     MOVOU -32(CX)(BX*1), X2
 2311     MOVOU -16(CX)(BX*1), X3
 2312     MOVQ  BX, SI
 2313     SHRQ  $0x05, SI
 2314     MOVQ  AX, BP
 2315     ANDL  $0x0000001f, BP
 2316     MOVQ  $0x00000040, DI
 2317     SUBQ  BP, DI
 2318     DECQ  SI
 2319     JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
 2320     LEAQ  -32(CX)(DI*1), BP
 2321     LEAQ  -32(AX)(DI*1), R8
 2322 
 2323 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
 2324     MOVOU (BP), X4
 2325     MOVOU 16(BP), X5
 2326     MOVOA X4, (R8)
 2327     MOVOA X5, 16(R8)
 2328     ADDQ  $0x20, R8
 2329     ADDQ  $0x20, BP
 2330     ADDQ  $0x20, DI
 2331     DECQ  SI
 2332     JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
 2333 
 2334 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
 2335     MOVOU -32(CX)(DI*1), X4
 2336     MOVOU -16(CX)(DI*1), X5
 2337     MOVOA X4, -32(AX)(DI*1)
 2338     MOVOA X5, -16(AX)(DI*1)
 2339     ADDQ  $0x20, DI
 2340     CMPQ  BX, DI
 2341     JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
 2342     MOVOU X0, (AX)
 2343     MOVOU X1, 16(AX)
 2344     MOVOU X2, -32(AX)(BX*1)
 2345     MOVOU X3, -16(AX)(BX*1)
 2346     MOVQ  DX, AX
 2347 
 2348 emit_literal_done_emit_remainder_encodeBlockAsm4MB:
 2349     MOVQ dst_base+0(FP), CX
 2350     SUBQ CX, AX
 2351     MOVQ AX, ret+48(FP)
 2352     RET
 2353 
 2354 // func encodeBlockAsm12B(dst []byte, src []byte) int
 2355 // Requires: SSE2
 2356 TEXT ·encodeBlockAsm12B(SB), $16408-56
 2357     MOVQ dst_base+0(FP), AX
 2358     MOVQ $0x00000080, CX
 2359     LEAQ 24(SP), DX
 2360     PXOR X0, X0
 2361 
 2362 zero_loop_encodeBlockAsm12B:
 2363     MOVOU X0, (DX)
 2364     MOVOU X0, 16(DX)
 2365     MOVOU X0, 32(DX)
 2366     MOVOU X0, 48(DX)
 2367     MOVOU X0, 64(DX)
 2368     MOVOU X0, 80(DX)
 2369     MOVOU X0, 96(DX)
 2370     MOVOU X0, 112(DX)
 2371     ADDQ  $0x80, DX
 2372     DECQ  CX
 2373     JNZ   zero_loop_encodeBlockAsm12B
 2374     MOVL  $0x00000000, 12(SP)
 2375     MOVQ  src_len+32(FP), CX
 2376     LEAQ  -5(CX), DX
 2377     LEAQ  -8(CX), BP
 2378     MOVL  BP, 8(SP)
 2379     SHRQ  $0x05, CX
 2380     SUBL  CX, DX
 2381     LEAQ  (AX)(DX*1), DX
 2382     MOVQ  DX, (SP)
 2383     MOVL  $0x00000001, CX
 2384     MOVL  CX, 16(SP)
 2385     MOVQ  src_base+24(FP), DX
 2386 
 2387 search_loop_encodeBlockAsm12B:
 2388     MOVQ  (DX)(CX*1), SI
 2389     MOVL  CX, BP
 2390     SUBL  12(SP), BP
 2391     SHRL  $0x05, BP
 2392     LEAL  4(CX)(BP*1), BP
 2393     CMPL  BP, 8(SP)
 2394     JGE   emit_remainder_encodeBlockAsm12B
 2395     MOVL  BP, 20(SP)
 2396     MOVQ  $0x000000cf1bbcdcbb, R8
 2397     MOVQ  SI, R9
 2398     MOVQ  SI, R10
 2399     SHRQ  $0x08, R10
 2400     SHLQ  $0x18, R9
 2401     IMULQ R8, R9
 2402     SHRQ  $0x34, R9
 2403     SHLQ  $0x18, R10
 2404     IMULQ R8, R10
 2405     SHRQ  $0x34, R10
 2406     MOVL  24(SP)(R9*4), BP
 2407     MOVL  24(SP)(R10*4), DI
 2408     MOVL  CX, 24(SP)(R9*4)
 2409     LEAL  1(CX), R9
 2410     MOVL  R9, 24(SP)(R10*4)
 2411     MOVQ  SI, R9
 2412     SHRQ  $0x10, R9
 2413     SHLQ  $0x18, R9
 2414     IMULQ R8, R9
 2415     SHRQ  $0x34, R9
 2416     MOVL  CX, R8
 2417     SUBL  16(SP), R8
 2418     MOVL  1(DX)(R8*1), R10
 2419     MOVQ  SI, R8
 2420     SHRQ  $0x08, R8
 2421     CMPL  R8, R10
 2422     JNE   no_repeat_found_encodeBlockAsm12B
 2423     LEAL  1(CX), SI
 2424     MOVL  12(SP), DI
 2425     MOVL  SI, BP
 2426     SUBL  16(SP), BP
 2427     JZ    repeat_extend_back_end_encodeBlockAsm12B
 2428 
 2429 repeat_extend_back_loop_encodeBlockAsm12B:
 2430     CMPL SI, DI
 2431     JLE  repeat_extend_back_end_encodeBlockAsm12B
 2432     MOVB -1(DX)(BP*1), BL
 2433     MOVB -1(DX)(SI*1), R8
 2434     CMPB BL, R8
 2435     JNE  repeat_extend_back_end_encodeBlockAsm12B
 2436     LEAL -1(SI), SI
 2437     DECL BP
 2438     JNZ  repeat_extend_back_loop_encodeBlockAsm12B
 2439 
 2440 repeat_extend_back_end_encodeBlockAsm12B:
 2441     MOVL 12(SP), BP
 2442     CMPL BP, SI
 2443     JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
 2444     MOVL SI, R8
 2445     MOVL SI, 12(SP)
 2446     LEAQ (DX)(BP*1), R9
 2447     SUBL BP, R8
 2448     LEAL -1(R8), BP
 2449     CMPL BP, $0x3c
 2450     JLT  one_byte_repeat_emit_encodeBlockAsm12B
 2451     CMPL BP, $0x00000100
 2452     JLT  two_bytes_repeat_emit_encodeBlockAsm12B
 2453     MOVB $0xf4, (AX)
 2454     MOVW BP, 1(AX)
 2455     ADDQ $0x03, AX
 2456     JMP  memmove_long_repeat_emit_encodeBlockAsm12B
 2457 
 2458 two_bytes_repeat_emit_encodeBlockAsm12B:
 2459     MOVB $0xf0, (AX)
 2460     MOVB BP, 1(AX)
 2461     ADDQ $0x02, AX
 2462     CMPL BP, $0x40
 2463     JL   memmove_repeat_emit_encodeBlockAsm12B
 2464     JMP  memmove_long_repeat_emit_encodeBlockAsm12B
 2465 
 2466 one_byte_repeat_emit_encodeBlockAsm12B:
 2467     SHLB $0x02, BP
 2468     MOVB BP, (AX)
 2469     ADDQ $0x01, AX
 2470 
 2471 memmove_repeat_emit_encodeBlockAsm12B:
 2472     LEAQ (AX)(R8*1), BP
 2473 
 2474     // genMemMoveShort
 2475     CMPQ R8, $0x03
 2476     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2
 2477     JE   emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3
 2478     CMPQ R8, $0x08
 2479     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7
 2480     CMPQ R8, $0x10
 2481     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
 2482     CMPQ R8, $0x20
 2483     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
 2484     JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
 2485 
 2486 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2:
 2487     MOVB (R9), R10
 2488     MOVB -1(R9)(R8*1), R9
 2489     MOVB R10, (AX)
 2490     MOVB R9, -1(AX)(R8*1)
 2491     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 2492 
 2493 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3:
 2494     MOVW (R9), R10
 2495     MOVB 2(R9), R9
 2496     MOVW R10, (AX)
 2497     MOVB R9, 2(AX)
 2498     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 2499 
 2500 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7:
 2501     MOVL (R9), R10
 2502     MOVL -4(R9)(R8*1), R9
 2503     MOVL R10, (AX)
 2504     MOVL R9, -4(AX)(R8*1)
 2505     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 2506 
 2507 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
 2508     MOVQ (R9), R10
 2509     MOVQ -8(R9)(R8*1), R9
 2510     MOVQ R10, (AX)
 2511     MOVQ R9, -8(AX)(R8*1)
 2512     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 2513 
 2514 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
 2515     MOVOU (R9), X0
 2516     MOVOU -16(R9)(R8*1), X1
 2517     MOVOU X0, (AX)
 2518     MOVOU X1, -16(AX)(R8*1)
 2519     JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B
 2520 
 2521 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
 2522     MOVOU (R9), X0
 2523     MOVOU 16(R9), X1
 2524     MOVOU -32(R9)(R8*1), X2
 2525     MOVOU -16(R9)(R8*1), X3
 2526     MOVOU X0, (AX)
 2527     MOVOU X1, 16(AX)
 2528     MOVOU X2, -32(AX)(R8*1)
 2529     MOVOU X3, -16(AX)(R8*1)
 2530 
 2531 memmove_end_copy_repeat_emit_encodeBlockAsm12B:
 2532     MOVQ BP, AX
 2533     JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B
 2534 
 2535 memmove_long_repeat_emit_encodeBlockAsm12B:
 2536     LEAQ (AX)(R8*1), BP
 2537 
 2538     // genMemMoveLong
 2539     MOVOU (R9), X0
 2540     MOVOU 16(R9), X1
 2541     MOVOU -32(R9)(R8*1), X2
 2542     MOVOU -16(R9)(R8*1), X3
 2543     MOVQ  R8, R11
 2544     SHRQ  $0x05, R11
 2545     MOVQ  AX, R10
 2546     ANDL  $0x0000001f, R10
 2547     MOVQ  $0x00000040, R12
 2548     SUBQ  R10, R12
 2549     DECQ  R11
 2550     JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 2551     LEAQ  -32(R9)(R12*1), R10
 2552     LEAQ  -32(AX)(R12*1), R13
 2553 
 2554 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
 2555     MOVOU (R10), X4
 2556     MOVOU 16(R10), X5
 2557     MOVOA X4, (R13)
 2558     MOVOA X5, 16(R13)
 2559     ADDQ  $0x20, R13
 2560     ADDQ  $0x20, R10
 2561     ADDQ  $0x20, R12
 2562     DECQ  R11
 2563     JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
 2564 
 2565 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
 2566     MOVOU -32(R9)(R12*1), X4
 2567     MOVOU -16(R9)(R12*1), X5
 2568     MOVOA X4, -32(AX)(R12*1)
 2569     MOVOA X5, -16(AX)(R12*1)
 2570     ADDQ  $0x20, R12
 2571     CMPQ  R8, R12
 2572     JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 2573     MOVOU X0, (AX)
 2574     MOVOU X1, 16(AX)
 2575     MOVOU X2, -32(AX)(R8*1)
 2576     MOVOU X3, -16(AX)(R8*1)
 2577     MOVQ  BP, AX
 2578 
 2579 emit_literal_done_repeat_emit_encodeBlockAsm12B:
 2580     ADDL $0x05, CX
 2581     MOVL CX, BP
 2582     SUBL 16(SP), BP
 2583     MOVQ src_len+32(FP), R8
 2584     SUBL CX, R8
 2585     LEAQ (DX)(CX*1), R9
 2586     LEAQ (DX)(BP*1), BP
 2587 
 2588     // matchLen
 2589     XORL R11, R11
 2590     CMPL R8, $0x08
 2591     JL   matchlen_single_repeat_extend_encodeBlockAsm12B
 2592 
 2593 matchlen_loopback_repeat_extend_encodeBlockAsm12B:
 2594     MOVQ  (R9)(R11*1), R10
 2595     XORQ  (BP)(R11*1), R10
 2596     TESTQ R10, R10
 2597     JZ    matchlen_loop_repeat_extend_encodeBlockAsm12B
 2598     BSFQ  R10, R10
 2599     SARQ  $0x03, R10
 2600     LEAL  (R11)(R10*1), R11
 2601     JMP   repeat_extend_forward_end_encodeBlockAsm12B
 2602 
 2603 matchlen_loop_repeat_extend_encodeBlockAsm12B:
 2604     LEAL -8(R8), R8
 2605     LEAL 8(R11), R11
 2606     CMPL R8, $0x08
 2607     JGE  matchlen_loopback_repeat_extend_encodeBlockAsm12B
 2608 
 2609 matchlen_single_repeat_extend_encodeBlockAsm12B:
 2610     TESTL R8, R8
 2611     JZ    repeat_extend_forward_end_encodeBlockAsm12B
 2612 
 2613 matchlen_single_loopback_repeat_extend_encodeBlockAsm12B:
 2614     MOVB (R9)(R11*1), R10
 2615     CMPB (BP)(R11*1), R10
 2616     JNE  repeat_extend_forward_end_encodeBlockAsm12B
 2617     LEAL 1(R11), R11
 2618     DECL R8
 2619     JNZ  matchlen_single_loopback_repeat_extend_encodeBlockAsm12B
 2620 
 2621 repeat_extend_forward_end_encodeBlockAsm12B:
 2622     ADDL  R11, CX
 2623     MOVL  CX, BP
 2624     SUBL  SI, BP
 2625     MOVL  16(SP), SI
 2626     TESTL DI, DI
 2627     JZ    repeat_as_copy_encodeBlockAsm12B
 2628 
 2629     // emitRepeat
 2630     MOVL BP, DI
 2631     LEAL -4(BP), BP
 2632     CMPL DI, $0x08
 2633     JLE  repeat_two_match_repeat_encodeBlockAsm12B
 2634     CMPL DI, $0x0c
 2635     JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
 2636     CMPL SI, $0x00000800
 2637     JLT  repeat_two_offset_match_repeat_encodeBlockAsm12B
 2638 
 2639 cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
 2640     CMPL BP, $0x00000104
 2641     JLT  repeat_three_match_repeat_encodeBlockAsm12B
 2642     LEAL -256(BP), BP
 2643     MOVW $0x0019, (AX)
 2644     MOVW BP, 2(AX)
 2645     ADDQ $0x04, AX
 2646     JMP  repeat_end_emit_encodeBlockAsm12B
 2647 
 2648 repeat_three_match_repeat_encodeBlockAsm12B:
 2649     LEAL -4(BP), BP
 2650     MOVW $0x0015, (AX)
 2651     MOVB BP, 2(AX)
 2652     ADDQ $0x03, AX
 2653     JMP  repeat_end_emit_encodeBlockAsm12B
 2654 
 2655 repeat_two_match_repeat_encodeBlockAsm12B:
 2656     SHLL $0x02, BP
 2657     ORL  $0x01, BP
 2658     MOVW BP, (AX)
 2659     ADDQ $0x02, AX
 2660     JMP  repeat_end_emit_encodeBlockAsm12B
 2661 
 2662 repeat_two_offset_match_repeat_encodeBlockAsm12B:
 2663     XORQ DI, DI
 2664     LEAL 1(DI)(BP*4), BP
 2665     MOVB SI, 1(AX)
 2666     SARL $0x08, SI
 2667     SHLL $0x05, SI
 2668     ORL  SI, BP
 2669     MOVB BP, (AX)
 2670     ADDQ $0x02, AX
 2671     JMP  repeat_end_emit_encodeBlockAsm12B
 2672 
 2673 repeat_as_copy_encodeBlockAsm12B:
 2674     // emitCopy
 2675 two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
 2676     CMPL BP, $0x40
 2677     JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
 2678     MOVB $0xee, (AX)
 2679     MOVW SI, 1(AX)
 2680     LEAL -60(BP), BP
 2681     ADDQ $0x03, AX
 2682 
 2683     // emitRepeat
 2684     MOVL BP, DI
 2685     LEAL -4(BP), BP
 2686     CMPL DI, $0x08
 2687     JLE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 2688     CMPL DI, $0x0c
 2689     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 2690     CMPL SI, $0x00000800
 2691     JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 2692 
 2693 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 2694     CMPL BP, $0x00000104
 2695     JLT  repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 2696     LEAL -256(BP), BP
 2697     MOVW $0x0019, (AX)
 2698     MOVW BP, 2(AX)
 2699     ADDQ $0x04, AX
 2700     JMP  repeat_end_emit_encodeBlockAsm12B
 2701 
 2702 repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 2703     LEAL -4(BP), BP
 2704     MOVW $0x0015, (AX)
 2705     MOVB BP, 2(AX)
 2706     ADDQ $0x03, AX
 2707     JMP  repeat_end_emit_encodeBlockAsm12B
 2708 
 2709 repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 2710     SHLL $0x02, BP
 2711     ORL  $0x01, BP
 2712     MOVW BP, (AX)
 2713     ADDQ $0x02, AX
 2714     JMP  repeat_end_emit_encodeBlockAsm12B
 2715 
 2716 repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
 2717     XORQ DI, DI
 2718     LEAL 1(DI)(BP*4), BP
 2719     MOVB SI, 1(AX)
 2720     SARL $0x08, SI
 2721     SHLL $0x05, SI
 2722     ORL  SI, BP
 2723     MOVB BP, (AX)
 2724     ADDQ $0x02, AX
 2725     JMP  repeat_end_emit_encodeBlockAsm12B
 2726     JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B
 2727 
 2728 two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
 2729     CMPL BP, $0x0c
 2730     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
 2731     CMPL SI, $0x00000800
 2732     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
 2733     MOVB $0x01, BL
 2734     LEAL -16(BX)(BP*4), BP
 2735     MOVB SI, 1(AX)
 2736     SHRL $0x08, SI
 2737     SHLL $0x05, SI
 2738     ORL  SI, BP
 2739     MOVB BP, (AX)
 2740     ADDQ $0x02, AX
 2741     JMP  repeat_end_emit_encodeBlockAsm12B
 2742 
 2743 emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
 2744     MOVB $0x02, BL
 2745     LEAL -4(BX)(BP*4), BP
 2746     MOVB BP, (AX)
 2747     MOVW SI, 1(AX)
 2748     ADDQ $0x03, AX
 2749 
 2750 repeat_end_emit_encodeBlockAsm12B:
 2751     MOVL CX, 12(SP)
 2752     JMP  search_loop_encodeBlockAsm12B
 2753 
 2754 no_repeat_found_encodeBlockAsm12B:
 2755     CMPL (DX)(BP*1), SI
 2756     JEQ  candidate_match_encodeBlockAsm12B
 2757     SHRQ $0x08, SI
 2758     MOVL 24(SP)(R9*4), BP
 2759     LEAL 2(CX), R8
 2760     CMPL (DX)(DI*1), SI
 2761     JEQ  candidate2_match_encodeBlockAsm12B
 2762     MOVL R8, 24(SP)(R9*4)
 2763     SHRQ $0x08, SI
 2764     CMPL (DX)(BP*1), SI
 2765     JEQ  candidate3_match_encodeBlockAsm12B
 2766     MOVL 20(SP), CX
 2767     JMP  search_loop_encodeBlockAsm12B
 2768 
 2769 candidate3_match_encodeBlockAsm12B:
 2770     ADDL $0x02, CX
 2771     JMP  candidate_match_encodeBlockAsm12B
 2772 
 2773 candidate2_match_encodeBlockAsm12B:
 2774     MOVL R8, 24(SP)(R9*4)
 2775     INCL CX
 2776     MOVL DI, BP
 2777 
 2778 candidate_match_encodeBlockAsm12B:
 2779     MOVL  12(SP), SI
 2780     TESTL BP, BP
 2781     JZ    match_extend_back_end_encodeBlockAsm12B
 2782 
 2783 match_extend_back_loop_encodeBlockAsm12B:
 2784     CMPL CX, SI
 2785     JLE  match_extend_back_end_encodeBlockAsm12B
 2786     MOVB -1(DX)(BP*1), BL
 2787     MOVB -1(DX)(CX*1), DI
 2788     CMPB BL, DI
 2789     JNE  match_extend_back_end_encodeBlockAsm12B
 2790     LEAL -1(CX), CX
 2791     DECL BP
 2792     JZ   match_extend_back_end_encodeBlockAsm12B
 2793     JMP  match_extend_back_loop_encodeBlockAsm12B
 2794 
 2795 match_extend_back_end_encodeBlockAsm12B:
 2796     MOVL CX, SI
 2797     SUBL 12(SP), SI
 2798     LEAQ 3(AX)(SI*1), SI
 2799     CMPQ SI, (SP)
 2800     JL   match_dst_size_check_encodeBlockAsm12B
 2801     MOVQ $0x00000000, ret+48(FP)
 2802     RET
 2803 
 2804 match_dst_size_check_encodeBlockAsm12B:
 2805     MOVL CX, SI
 2806     MOVL 12(SP), DI
 2807     CMPL DI, SI
 2808     JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
 2809     MOVL SI, R8
 2810     MOVL SI, 12(SP)
 2811     LEAQ (DX)(DI*1), SI
 2812     SUBL DI, R8
 2813     LEAL -1(R8), DI
 2814     CMPL DI, $0x3c
 2815     JLT  one_byte_match_emit_encodeBlockAsm12B
 2816     CMPL DI, $0x00000100
 2817     JLT  two_bytes_match_emit_encodeBlockAsm12B
 2818     MOVB $0xf4, (AX)
 2819     MOVW DI, 1(AX)
 2820     ADDQ $0x03, AX
 2821     JMP  memmove_long_match_emit_encodeBlockAsm12B
 2822 
 2823 two_bytes_match_emit_encodeBlockAsm12B:
 2824     MOVB $0xf0, (AX)
 2825     MOVB DI, 1(AX)
 2826     ADDQ $0x02, AX
 2827     CMPL DI, $0x40
 2828     JL   memmove_match_emit_encodeBlockAsm12B
 2829     JMP  memmove_long_match_emit_encodeBlockAsm12B
 2830 
 2831 one_byte_match_emit_encodeBlockAsm12B:
 2832     SHLB $0x02, DI
 2833     MOVB DI, (AX)
 2834     ADDQ $0x01, AX
 2835 
 2836 memmove_match_emit_encodeBlockAsm12B:
 2837     LEAQ (AX)(R8*1), DI
 2838 
 2839     // genMemMoveShort
 2840     CMPQ R8, $0x03
 2841     JB   emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2
 2842     JE   emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3
 2843     CMPQ R8, $0x08
 2844     JB   emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7
 2845     CMPQ R8, $0x10
 2846     JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
 2847     CMPQ R8, $0x20
 2848     JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
 2849     JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
 2850 
 2851 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2:
 2852     MOVB (SI), R9
 2853     MOVB -1(SI)(R8*1), SI
 2854     MOVB R9, (AX)
 2855     MOVB SI, -1(AX)(R8*1)
 2856     JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 2857 
 2858 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3:
 2859     MOVW (SI), R9
 2860     MOVB 2(SI), SI
 2861     MOVW R9, (AX)
 2862     MOVB SI, 2(AX)
 2863     JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 2864 
 2865 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7:
 2866     MOVL (SI), R9
 2867     MOVL -4(SI)(R8*1), SI
 2868     MOVL R9, (AX)
 2869     MOVL SI, -4(AX)(R8*1)
 2870     JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 2871 
 2872 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
 2873     MOVQ (SI), R9
 2874     MOVQ -8(SI)(R8*1), SI
 2875     MOVQ R9, (AX)
 2876     MOVQ SI, -8(AX)(R8*1)
 2877     JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 2878 
 2879 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
 2880     MOVOU (SI), X0
 2881     MOVOU -16(SI)(R8*1), X1
 2882     MOVOU X0, (AX)
 2883     MOVOU X1, -16(AX)(R8*1)
 2884     JMP   memmove_end_copy_match_emit_encodeBlockAsm12B
 2885 
 2886 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
 2887     MOVOU (SI), X0
 2888     MOVOU 16(SI), X1
 2889     MOVOU -32(SI)(R8*1), X2
 2890     MOVOU -16(SI)(R8*1), X3
 2891     MOVOU X0, (AX)
 2892     MOVOU X1, 16(AX)
 2893     MOVOU X2, -32(AX)(R8*1)
 2894     MOVOU X3, -16(AX)(R8*1)
 2895 
 2896 memmove_end_copy_match_emit_encodeBlockAsm12B:
 2897     MOVQ DI, AX
 2898     JMP  emit_literal_done_match_emit_encodeBlockAsm12B
 2899 
 2900 memmove_long_match_emit_encodeBlockAsm12B:
 2901     LEAQ (AX)(R8*1), DI
 2902 
 2903     // genMemMoveLong
 2904     MOVOU (SI), X0
 2905     MOVOU 16(SI), X1
 2906     MOVOU -32(SI)(R8*1), X2
 2907     MOVOU -16(SI)(R8*1), X3
 2908     MOVQ  R8, R10
 2909     SHRQ  $0x05, R10
 2910     MOVQ  AX, R9
 2911     ANDL  $0x0000001f, R9
 2912     MOVQ  $0x00000040, R11
 2913     SUBQ  R9, R11
 2914     DECQ  R10
 2915     JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 2916     LEAQ  -32(SI)(R11*1), R9
 2917     LEAQ  -32(AX)(R11*1), R12
 2918 
 2919 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
 2920     MOVOU (R9), X4
 2921     MOVOU 16(R9), X5
 2922     MOVOA X4, (R12)
 2923     MOVOA X5, 16(R12)
 2924     ADDQ  $0x20, R12
 2925     ADDQ  $0x20, R9
 2926     ADDQ  $0x20, R11
 2927     DECQ  R10
 2928     JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
 2929 
 2930 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
 2931     MOVOU -32(SI)(R11*1), X4
 2932     MOVOU -16(SI)(R11*1), X5
 2933     MOVOA X4, -32(AX)(R11*1)
 2934     MOVOA X5, -16(AX)(R11*1)
 2935     ADDQ  $0x20, R11
 2936     CMPQ  R8, R11
 2937     JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
 2938     MOVOU X0, (AX)
 2939     MOVOU X1, 16(AX)
 2940     MOVOU X2, -32(AX)(R8*1)
 2941     MOVOU X3, -16(AX)(R8*1)
 2942     MOVQ  DI, AX
 2943 
 2944 emit_literal_done_match_emit_encodeBlockAsm12B:
 2945 match_nolit_loop_encodeBlockAsm12B:
 2946     MOVL CX, SI
 2947     SUBL BP, SI
 2948     MOVL SI, 16(SP)
 2949     ADDL $0x04, CX
 2950     ADDL $0x04, BP
 2951     MOVQ src_len+32(FP), SI
 2952     SUBL CX, SI
 2953     LEAQ (DX)(CX*1), DI
 2954     LEAQ (DX)(BP*1), BP
 2955 
 2956     // matchLen
 2957     XORL R9, R9
 2958     CMPL SI, $0x08
 2959     JL   matchlen_single_match_nolit_encodeBlockAsm12B
 2960 
 2961 matchlen_loopback_match_nolit_encodeBlockAsm12B:
 2962     MOVQ  (DI)(R9*1), R8
 2963     XORQ  (BP)(R9*1), R8
 2964     TESTQ R8, R8
 2965     JZ    matchlen_loop_match_nolit_encodeBlockAsm12B
 2966     BSFQ  R8, R8
 2967     SARQ  $0x03, R8
 2968     LEAL  (R9)(R8*1), R9
 2969     JMP   match_nolit_end_encodeBlockAsm12B
 2970 
 2971 matchlen_loop_match_nolit_encodeBlockAsm12B:
 2972     LEAL -8(SI), SI
 2973     LEAL 8(R9), R9
 2974     CMPL SI, $0x08
 2975     JGE  matchlen_loopback_match_nolit_encodeBlockAsm12B
 2976 
 2977 matchlen_single_match_nolit_encodeBlockAsm12B:
 2978     TESTL SI, SI
 2979     JZ    match_nolit_end_encodeBlockAsm12B
 2980 
 2981 matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
 2982     MOVB (DI)(R9*1), R8
 2983     CMPB (BP)(R9*1), R8
 2984     JNE  match_nolit_end_encodeBlockAsm12B
 2985     LEAL 1(R9), R9
 2986     DECL SI
 2987     JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm12B
 2988 
 2989 match_nolit_end_encodeBlockAsm12B:
 2990     ADDL R9, CX
 2991     MOVL 16(SP), BP
 2992     ADDL $0x04, R9
 2993     MOVL CX, 12(SP)
 2994 
 2995     // emitCopy
 2996 two_byte_offset_match_nolit_encodeBlockAsm12B:
 2997     CMPL R9, $0x40
 2998     JLE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
 2999     MOVB $0xee, (AX)
 3000     MOVW BP, 1(AX)
 3001     LEAL -60(R9), R9
 3002     ADDQ $0x03, AX
 3003 
 3004     // emitRepeat
 3005     MOVL R9, SI
 3006     LEAL -4(R9), R9
 3007     CMPL SI, $0x08
 3008     JLE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
 3009     CMPL SI, $0x0c
 3010     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
 3011     CMPL BP, $0x00000800
 3012     JLT  repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
 3013 
 3014 cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
 3015     CMPL R9, $0x00000104
 3016     JLT  repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
 3017     LEAL -256(R9), R9
 3018     MOVW $0x0019, (AX)
 3019     MOVW R9, 2(AX)
 3020     ADDQ $0x04, AX
 3021     JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 3022 
 3023 repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
 3024     LEAL -4(R9), R9
 3025     MOVW $0x0015, (AX)
 3026     MOVB R9, 2(AX)
 3027     ADDQ $0x03, AX
 3028     JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 3029 
 3030 repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
 3031     SHLL $0x02, R9
 3032     ORL  $0x01, R9
 3033     MOVW R9, (AX)
 3034     ADDQ $0x02, AX
 3035     JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 3036 
 3037 repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
 3038     XORQ SI, SI
 3039     LEAL 1(SI)(R9*4), R9
 3040     MOVB BP, 1(AX)
 3041     SARL $0x08, BP
 3042     SHLL $0x05, BP
 3043     ORL  BP, R9
 3044     MOVB R9, (AX)
 3045     ADDQ $0x02, AX
 3046     JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 3047     JMP two_byte_offset_match_nolit_encodeBlockAsm12B
 3048 
 3049 two_byte_offset_short_match_nolit_encodeBlockAsm12B:
 3050     CMPL R9, $0x0c
 3051     JGE  emit_copy_three_match_nolit_encodeBlockAsm12B
 3052     CMPL BP, $0x00000800
 3053     JGE  emit_copy_three_match_nolit_encodeBlockAsm12B
 3054     MOVB $0x01, BL
 3055     LEAL -16(BX)(R9*4), R9
 3056     MOVB BP, 1(AX)
 3057     SHRL $0x08, BP
 3058     SHLL $0x05, BP
 3059     ORL  BP, R9
 3060     MOVB R9, (AX)
 3061     ADDQ $0x02, AX
 3062     JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 3063 
 3064 emit_copy_three_match_nolit_encodeBlockAsm12B:
 3065     MOVB $0x02, BL
 3066     LEAL -4(BX)(R9*4), R9
 3067     MOVB R9, (AX)
 3068     MOVW BP, 1(AX)
 3069     ADDQ $0x03, AX
 3070 
 3071 match_nolit_emitcopy_end_encodeBlockAsm12B:
 3072     CMPL CX, 8(SP)
 3073     JGE  emit_remainder_encodeBlockAsm12B
 3074     MOVQ -2(DX)(CX*1), SI
 3075     CMPQ AX, (SP)
 3076     JL   match_nolit_dst_ok_encodeBlockAsm12B
 3077     MOVQ $0x00000000, ret+48(FP)
 3078     RET
 3079 
 3080 match_nolit_dst_ok_encodeBlockAsm12B:
 3081     MOVQ  $0x000000cf1bbcdcbb, R8
 3082     MOVQ  SI, DI
 3083     SHRQ  $0x10, SI
 3084     MOVQ  SI, BP
 3085     SHLQ  $0x18, DI
 3086     IMULQ R8, DI
 3087     SHRQ  $0x34, DI
 3088     SHLQ  $0x18, BP
 3089     IMULQ R8, BP
 3090     SHRQ  $0x34, BP
 3091     LEAL  -2(CX), R8
 3092     LEAQ  24(SP)(BP*4), R9
 3093     MOVL  (R9), BP
 3094     MOVL  R8, 24(SP)(DI*4)
 3095     MOVL  CX, (R9)
 3096     CMPL  (DX)(BP*1), SI
 3097     JEQ   match_nolit_loop_encodeBlockAsm12B
 3098     INCL  CX
 3099     JMP   search_loop_encodeBlockAsm12B
 3100 
 3101 emit_remainder_encodeBlockAsm12B:
 3102     MOVQ src_len+32(FP), CX
 3103     SUBL 12(SP), CX
 3104     LEAQ 3(AX)(CX*1), CX
 3105     CMPQ CX, (SP)
 3106     JL   emit_remainder_ok_encodeBlockAsm12B
 3107     MOVQ $0x00000000, ret+48(FP)
 3108     RET
 3109 
 3110 emit_remainder_ok_encodeBlockAsm12B:
 3111     MOVQ src_len+32(FP), CX
 3112     MOVL 12(SP), BX
 3113     CMPL BX, CX
 3114     JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
 3115     MOVL CX, BP
 3116     MOVL CX, 12(SP)
 3117     LEAQ (DX)(BX*1), CX
 3118     SUBL BX, BP
 3119     LEAL -1(BP), DX
 3120     CMPL DX, $0x3c
 3121     JLT  one_byte_emit_remainder_encodeBlockAsm12B
 3122     CMPL DX, $0x00000100
 3123     JLT  two_bytes_emit_remainder_encodeBlockAsm12B
 3124     MOVB $0xf4, (AX)
 3125     MOVW DX, 1(AX)
 3126     ADDQ $0x03, AX
 3127     JMP  memmove_long_emit_remainder_encodeBlockAsm12B
 3128 
 3129 two_bytes_emit_remainder_encodeBlockAsm12B:
 3130     MOVB $0xf0, (AX)
 3131     MOVB DL, 1(AX)
 3132     ADDQ $0x02, AX
 3133     CMPL DX, $0x40
 3134     JL   memmove_emit_remainder_encodeBlockAsm12B
 3135     JMP  memmove_long_emit_remainder_encodeBlockAsm12B
 3136 
 3137 one_byte_emit_remainder_encodeBlockAsm12B:
 3138     SHLB $0x02, DL
 3139     MOVB DL, (AX)
 3140     ADDQ $0x01, AX
 3141 
 3142 memmove_emit_remainder_encodeBlockAsm12B:
 3143     LEAQ (AX)(BP*1), DX
 3144     MOVL BP, BX
 3145 
 3146     // genMemMoveShort
 3147     CMPQ BX, $0x03
 3148     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
 3149     JE   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
 3150     CMPQ BX, $0x08
 3151     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
 3152     CMPQ BX, $0x10
 3153     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
 3154     CMPQ BX, $0x20
 3155     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
 3156     JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
 3157 
 3158 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
 3159     MOVB (CX), BP
 3160     MOVB -1(CX)(BX*1), CL
 3161     MOVB BP, (AX)
 3162     MOVB CL, -1(AX)(BX*1)
 3163     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 3164 
 3165 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
 3166     MOVW (CX), BP
 3167     MOVB 2(CX), CL
 3168     MOVW BP, (AX)
 3169     MOVB CL, 2(AX)
 3170     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 3171 
 3172 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
 3173     MOVL (CX), BP
 3174     MOVL -4(CX)(BX*1), CX
 3175     MOVL BP, (AX)
 3176     MOVL CX, -4(AX)(BX*1)
 3177     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 3178 
 3179 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
 3180     MOVQ (CX), BP
 3181     MOVQ -8(CX)(BX*1), CX
 3182     MOVQ BP, (AX)
 3183     MOVQ CX, -8(AX)(BX*1)
 3184     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 3185 
 3186 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
 3187     MOVOU (CX), X0
 3188     MOVOU -16(CX)(BX*1), X1
 3189     MOVOU X0, (AX)
 3190     MOVOU X1, -16(AX)(BX*1)
 3191     JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B
 3192 
 3193 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
 3194     MOVOU (CX), X0
 3195     MOVOU 16(CX), X1
 3196     MOVOU -32(CX)(BX*1), X2
 3197     MOVOU -16(CX)(BX*1), X3
 3198     MOVOU X0, (AX)
 3199     MOVOU X1, 16(AX)
 3200     MOVOU X2, -32(AX)(BX*1)
 3201     MOVOU X3, -16(AX)(BX*1)
 3202 
 3203 memmove_end_copy_emit_remainder_encodeBlockAsm12B:
 3204     MOVQ DX, AX
 3205     JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B
 3206 
 3207 memmove_long_emit_remainder_encodeBlockAsm12B:
 3208     LEAQ (AX)(BP*1), DX
 3209     MOVL BP, BX
 3210 
 3211     // genMemMoveLong
 3212     MOVOU (CX), X0
 3213     MOVOU 16(CX), X1
 3214     MOVOU -32(CX)(BX*1), X2
 3215     MOVOU -16(CX)(BX*1), X3
 3216     MOVQ  BX, SI
 3217     SHRQ  $0x05, SI
 3218     MOVQ  AX, BP
 3219     ANDL  $0x0000001f, BP
 3220     MOVQ  $0x00000040, DI
 3221     SUBQ  BP, DI
 3222     DECQ  SI
 3223     JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
 3224     LEAQ  -32(CX)(DI*1), BP
 3225     LEAQ  -32(AX)(DI*1), R8
 3226 
 3227 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
 3228     MOVOU (BP), X4
 3229     MOVOU 16(BP), X5
 3230     MOVOA X4, (R8)
 3231     MOVOA X5, 16(R8)
 3232     ADDQ  $0x20, R8
 3233     ADDQ  $0x20, BP
 3234     ADDQ  $0x20, DI
 3235     DECQ  SI
 3236     JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
 3237 
 3238 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
 3239     MOVOU -32(CX)(DI*1), X4
 3240     MOVOU -16(CX)(DI*1), X5
 3241     MOVOA X4, -32(AX)(DI*1)
 3242     MOVOA X5, -16(AX)(DI*1)
 3243     ADDQ  $0x20, DI
 3244     CMPQ  BX, DI
 3245     JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
 3246     MOVOU X0, (AX)
 3247     MOVOU X1, 16(AX)
 3248     MOVOU X2, -32(AX)(BX*1)
 3249     MOVOU X3, -16(AX)(BX*1)
 3250     MOVQ  DX, AX
 3251 
 3252 emit_literal_done_emit_remainder_encodeBlockAsm12B:
 3253     MOVQ dst_base+0(FP), CX
 3254     SUBQ CX, AX
 3255     MOVQ AX, ret+48(FP)
 3256     RET
 3257 
 3258 // func encodeBlockAsm10B(dst []byte, src []byte) int
 3259 // Requires: SSE2
 3260 TEXT ·encodeBlockAsm10B(SB), $4120-56
 3261     MOVQ dst_base+0(FP), AX
 3262     MOVQ $0x00000020, CX
 3263     LEAQ 24(SP), DX
 3264     PXOR X0, X0
 3265 
 3266 zero_loop_encodeBlockAsm10B:
 3267     MOVOU X0, (DX)
 3268     MOVOU X0, 16(DX)
 3269     MOVOU X0, 32(DX)
 3270     MOVOU X0, 48(DX)
 3271     MOVOU X0, 64(DX)
 3272     MOVOU X0, 80(DX)
 3273     MOVOU X0, 96(DX)
 3274     MOVOU X0, 112(DX)
 3275     ADDQ  $0x80, DX
 3276     DECQ  CX
 3277     JNZ   zero_loop_encodeBlockAsm10B
 3278     MOVL  $0x00000000, 12(SP)
 3279     MOVQ  src_len+32(FP), CX
 3280     LEAQ  -5(CX), DX
 3281     LEAQ  -8(CX), BP
 3282     MOVL  BP, 8(SP)
 3283     SHRQ  $0x05, CX
 3284     SUBL  CX, DX
 3285     LEAQ  (AX)(DX*1), DX
 3286     MOVQ  DX, (SP)
 3287     MOVL  $0x00000001, CX
 3288     MOVL  CX, 16(SP)
 3289     MOVQ  src_base+24(FP), DX
 3290 
 3291 search_loop_encodeBlockAsm10B:
 3292     MOVQ  (DX)(CX*1), SI
 3293     MOVL  CX, BP
 3294     SUBL  12(SP), BP
 3295     SHRL  $0x05, BP
 3296     LEAL  4(CX)(BP*1), BP
 3297     CMPL  BP, 8(SP)
 3298     JGE   emit_remainder_encodeBlockAsm10B
 3299     MOVL  BP, 20(SP)
 3300     MOVQ  $0x9e3779b1, R8
 3301     MOVQ  SI, R9
 3302     MOVQ  SI, R10
 3303     SHRQ  $0x08, R10
 3304     SHLQ  $0x20, R9
 3305     IMULQ R8, R9
 3306     SHRQ  $0x36, R9
 3307     SHLQ  $0x20, R10
 3308     IMULQ R8, R10
 3309     SHRQ  $0x36, R10
 3310     MOVL  24(SP)(R9*4), BP
 3311     MOVL  24(SP)(R10*4), DI
 3312     MOVL  CX, 24(SP)(R9*4)
 3313     LEAL  1(CX), R9
 3314     MOVL  R9, 24(SP)(R10*4)
 3315     MOVQ  SI, R9
 3316     SHRQ  $0x10, R9
 3317     SHLQ  $0x20, R9
 3318     IMULQ R8, R9
 3319     SHRQ  $0x36, R9
 3320     MOVL  CX, R8
 3321     SUBL  16(SP), R8
 3322     MOVL  1(DX)(R8*1), R10
 3323     MOVQ  SI, R8
 3324     SHRQ  $0x08, R8
 3325     CMPL  R8, R10
 3326     JNE   no_repeat_found_encodeBlockAsm10B
 3327     LEAL  1(CX), SI
 3328     MOVL  12(SP), DI
 3329     MOVL  SI, BP
 3330     SUBL  16(SP), BP
 3331     JZ    repeat_extend_back_end_encodeBlockAsm10B
 3332 
 3333 repeat_extend_back_loop_encodeBlockAsm10B:
 3334     CMPL SI, DI
 3335     JLE  repeat_extend_back_end_encodeBlockAsm10B
 3336     MOVB -1(DX)(BP*1), BL
 3337     MOVB -1(DX)(SI*1), R8
 3338     CMPB BL, R8
 3339     JNE  repeat_extend_back_end_encodeBlockAsm10B
 3340     LEAL -1(SI), SI
 3341     DECL BP
 3342     JNZ  repeat_extend_back_loop_encodeBlockAsm10B
 3343 
 3344 repeat_extend_back_end_encodeBlockAsm10B:
 3345     MOVL 12(SP), BP
 3346     CMPL BP, SI
 3347     JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
 3348     MOVL SI, R8
 3349     MOVL SI, 12(SP)
 3350     LEAQ (DX)(BP*1), R9
 3351     SUBL BP, R8
 3352     LEAL -1(R8), BP
 3353     CMPL BP, $0x3c
 3354     JLT  one_byte_repeat_emit_encodeBlockAsm10B
 3355     CMPL BP, $0x00000100
 3356     JLT  two_bytes_repeat_emit_encodeBlockAsm10B
 3357     MOVB $0xf4, (AX)
 3358     MOVW BP, 1(AX)
 3359     ADDQ $0x03, AX
 3360     JMP  memmove_long_repeat_emit_encodeBlockAsm10B
 3361 
 3362 two_bytes_repeat_emit_encodeBlockAsm10B:
 3363     MOVB $0xf0, (AX)
 3364     MOVB BP, 1(AX)
 3365     ADDQ $0x02, AX
 3366     CMPL BP, $0x40
 3367     JL   memmove_repeat_emit_encodeBlockAsm10B
 3368     JMP  memmove_long_repeat_emit_encodeBlockAsm10B
 3369 
 3370 one_byte_repeat_emit_encodeBlockAsm10B:
 3371     SHLB $0x02, BP
 3372     MOVB BP, (AX)
 3373     ADDQ $0x01, AX
 3374 
 3375 memmove_repeat_emit_encodeBlockAsm10B:
 3376     LEAQ (AX)(R8*1), BP
 3377 
 3378     // genMemMoveShort
 3379     CMPQ R8, $0x03
 3380     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2
 3381     JE   emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3
 3382     CMPQ R8, $0x08
 3383     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7
 3384     CMPQ R8, $0x10
 3385     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
 3386     CMPQ R8, $0x20
 3387     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
 3388     JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
 3389 
 3390 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2:
 3391     MOVB (R9), R10
 3392     MOVB -1(R9)(R8*1), R9
 3393     MOVB R10, (AX)
 3394     MOVB R9, -1(AX)(R8*1)
 3395     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 3396 
 3397 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3:
 3398     MOVW (R9), R10
 3399     MOVB 2(R9), R9
 3400     MOVW R10, (AX)
 3401     MOVB R9, 2(AX)
 3402     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 3403 
 3404 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7:
 3405     MOVL (R9), R10
 3406     MOVL -4(R9)(R8*1), R9
 3407     MOVL R10, (AX)
 3408     MOVL R9, -4(AX)(R8*1)
 3409     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 3410 
 3411 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
 3412     MOVQ (R9), R10
 3413     MOVQ -8(R9)(R8*1), R9
 3414     MOVQ R10, (AX)
 3415     MOVQ R9, -8(AX)(R8*1)
 3416     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 3417 
 3418 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
 3419     MOVOU (R9), X0
 3420     MOVOU -16(R9)(R8*1), X1
 3421     MOVOU X0, (AX)
 3422     MOVOU X1, -16(AX)(R8*1)
 3423     JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B
 3424 
 3425 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
 3426     MOVOU (R9), X0
 3427     MOVOU 16(R9), X1
 3428     MOVOU -32(R9)(R8*1), X2
 3429     MOVOU -16(R9)(R8*1), X3
 3430     MOVOU X0, (AX)
 3431     MOVOU X1, 16(AX)
 3432     MOVOU X2, -32(AX)(R8*1)
 3433     MOVOU X3, -16(AX)(R8*1)
 3434 
 3435 memmove_end_copy_repeat_emit_encodeBlockAsm10B:
 3436     MOVQ BP, AX
 3437     JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B
 3438 
 3439 memmove_long_repeat_emit_encodeBlockAsm10B:
 3440     LEAQ (AX)(R8*1), BP
 3441 
 3442     // genMemMoveLong
 3443     MOVOU (R9), X0
 3444     MOVOU 16(R9), X1
 3445     MOVOU -32(R9)(R8*1), X2
 3446     MOVOU -16(R9)(R8*1), X3
 3447     MOVQ  R8, R11
 3448     SHRQ  $0x05, R11
 3449     MOVQ  AX, R10
 3450     ANDL  $0x0000001f, R10
 3451     MOVQ  $0x00000040, R12
 3452     SUBQ  R10, R12
 3453     DECQ  R11
 3454     JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 3455     LEAQ  -32(R9)(R12*1), R10
 3456     LEAQ  -32(AX)(R12*1), R13
 3457 
 3458 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
 3459     MOVOU (R10), X4
 3460     MOVOU 16(R10), X5
 3461     MOVOA X4, (R13)
 3462     MOVOA X5, 16(R13)
 3463     ADDQ  $0x20, R13
 3464     ADDQ  $0x20, R10
 3465     ADDQ  $0x20, R12
 3466     DECQ  R11
 3467     JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
 3468 
 3469 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
 3470     MOVOU -32(R9)(R12*1), X4
 3471     MOVOU -16(R9)(R12*1), X5
 3472     MOVOA X4, -32(AX)(R12*1)
 3473     MOVOA X5, -16(AX)(R12*1)
 3474     ADDQ  $0x20, R12
 3475     CMPQ  R8, R12
 3476     JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 3477     MOVOU X0, (AX)
 3478     MOVOU X1, 16(AX)
 3479     MOVOU X2, -32(AX)(R8*1)
 3480     MOVOU X3, -16(AX)(R8*1)
 3481     MOVQ  BP, AX
 3482 
 3483 emit_literal_done_repeat_emit_encodeBlockAsm10B:
 3484     ADDL $0x05, CX
 3485     MOVL CX, BP
 3486     SUBL 16(SP), BP
 3487     MOVQ src_len+32(FP), R8
 3488     SUBL CX, R8
 3489     LEAQ (DX)(CX*1), R9
 3490     LEAQ (DX)(BP*1), BP
 3491 
 3492     // matchLen
 3493     XORL R11, R11
 3494     CMPL R8, $0x08
 3495     JL   matchlen_single_repeat_extend_encodeBlockAsm10B
 3496 
 3497 matchlen_loopback_repeat_extend_encodeBlockAsm10B:
 3498     MOVQ  (R9)(R11*1), R10
 3499     XORQ  (BP)(R11*1), R10
 3500     TESTQ R10, R10
 3501     JZ    matchlen_loop_repeat_extend_encodeBlockAsm10B
 3502     BSFQ  R10, R10
 3503     SARQ  $0x03, R10
 3504     LEAL  (R11)(R10*1), R11
 3505     JMP   repeat_extend_forward_end_encodeBlockAsm10B
 3506 
 3507 matchlen_loop_repeat_extend_encodeBlockAsm10B:
 3508     LEAL -8(R8), R8
 3509     LEAL 8(R11), R11
 3510     CMPL R8, $0x08
 3511     JGE  matchlen_loopback_repeat_extend_encodeBlockAsm10B
 3512 
 3513 matchlen_single_repeat_extend_encodeBlockAsm10B:
 3514     TESTL R8, R8
 3515     JZ    repeat_extend_forward_end_encodeBlockAsm10B
 3516 
 3517 matchlen_single_loopback_repeat_extend_encodeBlockAsm10B:
 3518     MOVB (R9)(R11*1), R10
 3519     CMPB (BP)(R11*1), R10
 3520     JNE  repeat_extend_forward_end_encodeBlockAsm10B
 3521     LEAL 1(R11), R11
 3522     DECL R8
 3523     JNZ  matchlen_single_loopback_repeat_extend_encodeBlockAsm10B
 3524 
 3525 repeat_extend_forward_end_encodeBlockAsm10B:
 3526     ADDL  R11, CX
 3527     MOVL  CX, BP
 3528     SUBL  SI, BP
 3529     MOVL  16(SP), SI
 3530     TESTL DI, DI
 3531     JZ    repeat_as_copy_encodeBlockAsm10B
 3532 
 3533     // emitRepeat
 3534     MOVL BP, DI
 3535     LEAL -4(BP), BP
 3536     CMPL DI, $0x08
 3537     JLE  repeat_two_match_repeat_encodeBlockAsm10B
 3538     CMPL DI, $0x0c
 3539     JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
 3540     CMPL SI, $0x00000800
 3541     JLT  repeat_two_offset_match_repeat_encodeBlockAsm10B
 3542 
 3543 cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
 3544     CMPL BP, $0x00000104
 3545     JLT  repeat_three_match_repeat_encodeBlockAsm10B
 3546     LEAL -256(BP), BP
 3547     MOVW $0x0019, (AX)
 3548     MOVW BP, 2(AX)
 3549     ADDQ $0x04, AX
 3550     JMP  repeat_end_emit_encodeBlockAsm10B
 3551 
 3552 repeat_three_match_repeat_encodeBlockAsm10B:
 3553     LEAL -4(BP), BP
 3554     MOVW $0x0015, (AX)
 3555     MOVB BP, 2(AX)
 3556     ADDQ $0x03, AX
 3557     JMP  repeat_end_emit_encodeBlockAsm10B
 3558 
 3559 repeat_two_match_repeat_encodeBlockAsm10B:
 3560     SHLL $0x02, BP
 3561     ORL  $0x01, BP
 3562     MOVW BP, (AX)
 3563     ADDQ $0x02, AX
 3564     JMP  repeat_end_emit_encodeBlockAsm10B
 3565 
 3566 repeat_two_offset_match_repeat_encodeBlockAsm10B:
 3567     XORQ DI, DI
 3568     LEAL 1(DI)(BP*4), BP
 3569     MOVB SI, 1(AX)
 3570     SARL $0x08, SI
 3571     SHLL $0x05, SI
 3572     ORL  SI, BP
 3573     MOVB BP, (AX)
 3574     ADDQ $0x02, AX
 3575     JMP  repeat_end_emit_encodeBlockAsm10B
 3576 
 3577 repeat_as_copy_encodeBlockAsm10B:
 3578     // emitCopy
 3579 two_byte_offset_repeat_as_copy_encodeBlockAsm10B:
 3580     CMPL BP, $0x40
 3581     JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
 3582     MOVB $0xee, (AX)
 3583     MOVW SI, 1(AX)
 3584     LEAL -60(BP), BP
 3585     ADDQ $0x03, AX
 3586 
 3587     // emitRepeat
 3588     MOVL BP, DI
 3589     LEAL -4(BP), BP
 3590     CMPL DI, $0x08
 3591     JLE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 3592     CMPL DI, $0x0c
 3593     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 3594     CMPL SI, $0x00000800
 3595     JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 3596 
 3597 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 3598     CMPL BP, $0x00000104
 3599     JLT  repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 3600     LEAL -256(BP), BP
 3601     MOVW $0x0019, (AX)
 3602     MOVW BP, 2(AX)
 3603     ADDQ $0x04, AX
 3604     JMP  repeat_end_emit_encodeBlockAsm10B
 3605 
 3606 repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 3607     LEAL -4(BP), BP
 3608     MOVW $0x0015, (AX)
 3609     MOVB BP, 2(AX)
 3610     ADDQ $0x03, AX
 3611     JMP  repeat_end_emit_encodeBlockAsm10B
 3612 
 3613 repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 3614     SHLL $0x02, BP
 3615     ORL  $0x01, BP
 3616     MOVW BP, (AX)
 3617     ADDQ $0x02, AX
 3618     JMP  repeat_end_emit_encodeBlockAsm10B
 3619 
 3620 repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
 3621     XORQ DI, DI
 3622     LEAL 1(DI)(BP*4), BP
 3623     MOVB SI, 1(AX)
 3624     SARL $0x08, SI
 3625     SHLL $0x05, SI
 3626     ORL  SI, BP
 3627     MOVB BP, (AX)
 3628     ADDQ $0x02, AX
 3629     JMP  repeat_end_emit_encodeBlockAsm10B
 3630     JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B
 3631 
 3632 two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
 3633     CMPL BP, $0x0c
 3634     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
 3635     CMPL SI, $0x00000800
 3636     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
 3637     MOVB $0x01, BL
 3638     LEAL -16(BX)(BP*4), BP
 3639     MOVB SI, 1(AX)
 3640     SHRL $0x08, SI
 3641     SHLL $0x05, SI
 3642     ORL  SI, BP
 3643     MOVB BP, (AX)
 3644     ADDQ $0x02, AX
 3645     JMP  repeat_end_emit_encodeBlockAsm10B
 3646 
 3647 emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
 3648     MOVB $0x02, BL
 3649     LEAL -4(BX)(BP*4), BP
 3650     MOVB BP, (AX)
 3651     MOVW SI, 1(AX)
 3652     ADDQ $0x03, AX
 3653 
 3654 repeat_end_emit_encodeBlockAsm10B:
 3655     MOVL CX, 12(SP)
 3656     JMP  search_loop_encodeBlockAsm10B
 3657 
 3658 no_repeat_found_encodeBlockAsm10B:
 3659     CMPL (DX)(BP*1), SI
 3660     JEQ  candidate_match_encodeBlockAsm10B
 3661     SHRQ $0x08, SI
 3662     MOVL 24(SP)(R9*4), BP
 3663     LEAL 2(CX), R8
 3664     CMPL (DX)(DI*1), SI
 3665     JEQ  candidate2_match_encodeBlockAsm10B
 3666     MOVL R8, 24(SP)(R9*4)
 3667     SHRQ $0x08, SI
 3668     CMPL (DX)(BP*1), SI
 3669     JEQ  candidate3_match_encodeBlockAsm10B
 3670     MOVL 20(SP), CX
 3671     JMP  search_loop_encodeBlockAsm10B
 3672 
 3673 candidate3_match_encodeBlockAsm10B:
 3674     ADDL $0x02, CX
 3675     JMP  candidate_match_encodeBlockAsm10B
 3676 
 3677 candidate2_match_encodeBlockAsm10B:
 3678     MOVL R8, 24(SP)(R9*4)
 3679     INCL CX
 3680     MOVL DI, BP
 3681 
 3682 candidate_match_encodeBlockAsm10B:
 3683     MOVL  12(SP), SI
 3684     TESTL BP, BP
 3685     JZ    match_extend_back_end_encodeBlockAsm10B
 3686 
 3687 match_extend_back_loop_encodeBlockAsm10B:
 3688     CMPL CX, SI
 3689     JLE  match_extend_back_end_encodeBlockAsm10B
 3690     MOVB -1(DX)(BP*1), BL
 3691     MOVB -1(DX)(CX*1), DI
 3692     CMPB BL, DI
 3693     JNE  match_extend_back_end_encodeBlockAsm10B
 3694     LEAL -1(CX), CX
 3695     DECL BP
 3696     JZ   match_extend_back_end_encodeBlockAsm10B
 3697     JMP  match_extend_back_loop_encodeBlockAsm10B
 3698 
 3699 match_extend_back_end_encodeBlockAsm10B:
 3700     MOVL CX, SI
 3701     SUBL 12(SP), SI
 3702     LEAQ 3(AX)(SI*1), SI
 3703     CMPQ SI, (SP)
 3704     JL   match_dst_size_check_encodeBlockAsm10B
 3705     MOVQ $0x00000000, ret+48(FP)
 3706     RET
 3707 
 3708 match_dst_size_check_encodeBlockAsm10B:
 3709     MOVL CX, SI
 3710     MOVL 12(SP), DI
 3711     CMPL DI, SI
 3712     JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
 3713     MOVL SI, R8
 3714     MOVL SI, 12(SP)
 3715     LEAQ (DX)(DI*1), SI
 3716     SUBL DI, R8
 3717     LEAL -1(R8), DI
 3718     CMPL DI, $0x3c
 3719     JLT  one_byte_match_emit_encodeBlockAsm10B
 3720     CMPL DI, $0x00000100
 3721     JLT  two_bytes_match_emit_encodeBlockAsm10B
 3722     MOVB $0xf4, (AX)
 3723     MOVW DI, 1(AX)
 3724     ADDQ $0x03, AX
 3725     JMP  memmove_long_match_emit_encodeBlockAsm10B
 3726 
 3727 two_bytes_match_emit_encodeBlockAsm10B:
 3728     MOVB $0xf0, (AX)
 3729     MOVB DI, 1(AX)
 3730     ADDQ $0x02, AX
 3731     CMPL DI, $0x40
 3732     JL   memmove_match_emit_encodeBlockAsm10B
 3733     JMP  memmove_long_match_emit_encodeBlockAsm10B
 3734 
 3735 one_byte_match_emit_encodeBlockAsm10B:
 3736     SHLB $0x02, DI
 3737     MOVB DI, (AX)
 3738     ADDQ $0x01, AX
 3739 
 3740 memmove_match_emit_encodeBlockAsm10B:
 3741     LEAQ (AX)(R8*1), DI
 3742 
 3743     // genMemMoveShort
 3744     CMPQ R8, $0x03
 3745     JB   emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2
 3746     JE   emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3
 3747     CMPQ R8, $0x08
 3748     JB   emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7
 3749     CMPQ R8, $0x10
 3750     JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
 3751     CMPQ R8, $0x20
 3752     JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
 3753     JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
 3754 
 3755 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2:
 3756     MOVB (SI), R9
 3757     MOVB -1(SI)(R8*1), SI
 3758     MOVB R9, (AX)
 3759     MOVB SI, -1(AX)(R8*1)
 3760     JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 3761 
 3762 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3:
 3763     MOVW (SI), R9
 3764     MOVB 2(SI), SI
 3765     MOVW R9, (AX)
 3766     MOVB SI, 2(AX)
 3767     JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 3768 
 3769 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7:
 3770     MOVL (SI), R9
 3771     MOVL -4(SI)(R8*1), SI
 3772     MOVL R9, (AX)
 3773     MOVL SI, -4(AX)(R8*1)
 3774     JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 3775 
 3776 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
 3777     MOVQ (SI), R9
 3778     MOVQ -8(SI)(R8*1), SI
 3779     MOVQ R9, (AX)
 3780     MOVQ SI, -8(AX)(R8*1)
 3781     JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 3782 
 3783 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
 3784     MOVOU (SI), X0
 3785     MOVOU -16(SI)(R8*1), X1
 3786     MOVOU X0, (AX)
 3787     MOVOU X1, -16(AX)(R8*1)
 3788     JMP   memmove_end_copy_match_emit_encodeBlockAsm10B
 3789 
 3790 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
 3791     MOVOU (SI), X0
 3792     MOVOU 16(SI), X1
 3793     MOVOU -32(SI)(R8*1), X2
 3794     MOVOU -16(SI)(R8*1), X3
 3795     MOVOU X0, (AX)
 3796     MOVOU X1, 16(AX)
 3797     MOVOU X2, -32(AX)(R8*1)
 3798     MOVOU X3, -16(AX)(R8*1)
 3799 
 3800 memmove_end_copy_match_emit_encodeBlockAsm10B:
 3801     MOVQ DI, AX
 3802     JMP  emit_literal_done_match_emit_encodeBlockAsm10B
 3803 
 3804 memmove_long_match_emit_encodeBlockAsm10B:
 3805     LEAQ (AX)(R8*1), DI
 3806 
 3807     // genMemMoveLong
 3808     MOVOU (SI), X0
 3809     MOVOU 16(SI), X1
 3810     MOVOU -32(SI)(R8*1), X2
 3811     MOVOU -16(SI)(R8*1), X3
 3812     MOVQ  R8, R10
 3813     SHRQ  $0x05, R10
 3814     MOVQ  AX, R9
 3815     ANDL  $0x0000001f, R9
 3816     MOVQ  $0x00000040, R11
 3817     SUBQ  R9, R11
 3818     DECQ  R10
 3819     JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 3820     LEAQ  -32(SI)(R11*1), R9
 3821     LEAQ  -32(AX)(R11*1), R12
 3822 
 3823 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
 3824     MOVOU (R9), X4
 3825     MOVOU 16(R9), X5
 3826     MOVOA X4, (R12)
 3827     MOVOA X5, 16(R12)
 3828     ADDQ  $0x20, R12
 3829     ADDQ  $0x20, R9
 3830     ADDQ  $0x20, R11
 3831     DECQ  R10
 3832     JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
 3833 
 3834 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
 3835     MOVOU -32(SI)(R11*1), X4
 3836     MOVOU -16(SI)(R11*1), X5
 3837     MOVOA X4, -32(AX)(R11*1)
 3838     MOVOA X5, -16(AX)(R11*1)
 3839     ADDQ  $0x20, R11
 3840     CMPQ  R8, R11
 3841     JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
 3842     MOVOU X0, (AX)
 3843     MOVOU X1, 16(AX)
 3844     MOVOU X2, -32(AX)(R8*1)
 3845     MOVOU X3, -16(AX)(R8*1)
 3846     MOVQ  DI, AX
 3847 
 3848 emit_literal_done_match_emit_encodeBlockAsm10B:
 3849 match_nolit_loop_encodeBlockAsm10B:
 3850     MOVL CX, SI
 3851     SUBL BP, SI
 3852     MOVL SI, 16(SP)
 3853     ADDL $0x04, CX
 3854     ADDL $0x04, BP
 3855     MOVQ src_len+32(FP), SI
 3856     SUBL CX, SI
 3857     LEAQ (DX)(CX*1), DI
 3858     LEAQ (DX)(BP*1), BP
 3859 
 3860     // matchLen
 3861     XORL R9, R9
 3862     CMPL SI, $0x08
 3863     JL   matchlen_single_match_nolit_encodeBlockAsm10B
 3864 
 3865 matchlen_loopback_match_nolit_encodeBlockAsm10B:
 3866     MOVQ  (DI)(R9*1), R8
 3867     XORQ  (BP)(R9*1), R8
 3868     TESTQ R8, R8
 3869     JZ    matchlen_loop_match_nolit_encodeBlockAsm10B
 3870     BSFQ  R8, R8
 3871     SARQ  $0x03, R8
 3872     LEAL  (R9)(R8*1), R9
 3873     JMP   match_nolit_end_encodeBlockAsm10B
 3874 
 3875 matchlen_loop_match_nolit_encodeBlockAsm10B:
 3876     LEAL -8(SI), SI
 3877     LEAL 8(R9), R9
 3878     CMPL SI, $0x08
 3879     JGE  matchlen_loopback_match_nolit_encodeBlockAsm10B
 3880 
 3881 matchlen_single_match_nolit_encodeBlockAsm10B:
 3882     TESTL SI, SI
 3883     JZ    match_nolit_end_encodeBlockAsm10B
 3884 
 3885 matchlen_single_loopback_match_nolit_encodeBlockAsm10B:
 3886     MOVB (DI)(R9*1), R8
 3887     CMPB (BP)(R9*1), R8
 3888     JNE  match_nolit_end_encodeBlockAsm10B
 3889     LEAL 1(R9), R9
 3890     DECL SI
 3891     JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm10B
 3892 
 3893 match_nolit_end_encodeBlockAsm10B:
 3894     ADDL R9, CX
 3895     MOVL 16(SP), BP
 3896     ADDL $0x04, R9
 3897     MOVL CX, 12(SP)
 3898 
 3899     // emitCopy
 3900 two_byte_offset_match_nolit_encodeBlockAsm10B:
 3901     CMPL R9, $0x40
 3902     JLE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
 3903     MOVB $0xee, (AX)
 3904     MOVW BP, 1(AX)
 3905     LEAL -60(R9), R9
 3906     ADDQ $0x03, AX
 3907 
 3908     // emitRepeat
 3909     MOVL R9, SI
 3910     LEAL -4(R9), R9
 3911     CMPL SI, $0x08
 3912     JLE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
 3913     CMPL SI, $0x0c
 3914     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
 3915     CMPL BP, $0x00000800
 3916     JLT  repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
 3917 
 3918 cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
 3919     CMPL R9, $0x00000104
 3920     JLT  repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
 3921     LEAL -256(R9), R9
 3922     MOVW $0x0019, (AX)
 3923     MOVW R9, 2(AX)
 3924     ADDQ $0x04, AX
 3925     JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 3926 
 3927 repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
 3928     LEAL -4(R9), R9
 3929     MOVW $0x0015, (AX)
 3930     MOVB R9, 2(AX)
 3931     ADDQ $0x03, AX
 3932     JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 3933 
 3934 repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
 3935     SHLL $0x02, R9
 3936     ORL  $0x01, R9
 3937     MOVW R9, (AX)
 3938     ADDQ $0x02, AX
 3939     JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 3940 
 3941 repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
 3942     XORQ SI, SI
 3943     LEAL 1(SI)(R9*4), R9
 3944     MOVB BP, 1(AX)
 3945     SARL $0x08, BP
 3946     SHLL $0x05, BP
 3947     ORL  BP, R9
 3948     MOVB R9, (AX)
 3949     ADDQ $0x02, AX
 3950     JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 3951     JMP two_byte_offset_match_nolit_encodeBlockAsm10B
 3952 
 3953 two_byte_offset_short_match_nolit_encodeBlockAsm10B:
 3954     CMPL R9, $0x0c
 3955     JGE  emit_copy_three_match_nolit_encodeBlockAsm10B
 3956     CMPL BP, $0x00000800
 3957     JGE  emit_copy_three_match_nolit_encodeBlockAsm10B
 3958     MOVB $0x01, BL
 3959     LEAL -16(BX)(R9*4), R9
 3960     MOVB BP, 1(AX)
 3961     SHRL $0x08, BP
 3962     SHLL $0x05, BP
 3963     ORL  BP, R9
 3964     MOVB R9, (AX)
 3965     ADDQ $0x02, AX
 3966     JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 3967 
 3968 emit_copy_three_match_nolit_encodeBlockAsm10B:
 3969     MOVB $0x02, BL
 3970     LEAL -4(BX)(R9*4), R9
 3971     MOVB R9, (AX)
 3972     MOVW BP, 1(AX)
 3973     ADDQ $0x03, AX
 3974 
 3975 match_nolit_emitcopy_end_encodeBlockAsm10B:
 3976     CMPL CX, 8(SP)
 3977     JGE  emit_remainder_encodeBlockAsm10B
 3978     MOVQ -2(DX)(CX*1), SI
 3979     CMPQ AX, (SP)
 3980     JL   match_nolit_dst_ok_encodeBlockAsm10B
 3981     MOVQ $0x00000000, ret+48(FP)
 3982     RET
 3983 
 3984 match_nolit_dst_ok_encodeBlockAsm10B:
 3985     MOVQ  $0x9e3779b1, R8
 3986     MOVQ  SI, DI
 3987     SHRQ  $0x10, SI
 3988     MOVQ  SI, BP
 3989     SHLQ  $0x20, DI
 3990     IMULQ R8, DI
 3991     SHRQ  $0x36, DI
 3992     SHLQ  $0x20, BP
 3993     IMULQ R8, BP
 3994     SHRQ  $0x36, BP
 3995     LEAL  -2(CX), R8
 3996     LEAQ  24(SP)(BP*4), R9
 3997     MOVL  (R9), BP
 3998     MOVL  R8, 24(SP)(DI*4)
 3999     MOVL  CX, (R9)
 4000     CMPL  (DX)(BP*1), SI
 4001     JEQ   match_nolit_loop_encodeBlockAsm10B
 4002     INCL  CX
 4003     JMP   search_loop_encodeBlockAsm10B
 4004 
 4005 emit_remainder_encodeBlockAsm10B:
 4006     MOVQ src_len+32(FP), CX
 4007     SUBL 12(SP), CX
 4008     LEAQ 3(AX)(CX*1), CX
 4009     CMPQ CX, (SP)
 4010     JL   emit_remainder_ok_encodeBlockAsm10B
 4011     MOVQ $0x00000000, ret+48(FP)
 4012     RET
 4013 
 4014 emit_remainder_ok_encodeBlockAsm10B:
 4015     MOVQ src_len+32(FP), CX
 4016     MOVL 12(SP), BX
 4017     CMPL BX, CX
 4018     JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
 4019     MOVL CX, BP
 4020     MOVL CX, 12(SP)
 4021     LEAQ (DX)(BX*1), CX
 4022     SUBL BX, BP
 4023     LEAL -1(BP), DX
 4024     CMPL DX, $0x3c
 4025     JLT  one_byte_emit_remainder_encodeBlockAsm10B
 4026     CMPL DX, $0x00000100
 4027     JLT  two_bytes_emit_remainder_encodeBlockAsm10B
 4028     MOVB $0xf4, (AX)
 4029     MOVW DX, 1(AX)
 4030     ADDQ $0x03, AX
 4031     JMP  memmove_long_emit_remainder_encodeBlockAsm10B
 4032 
 4033 two_bytes_emit_remainder_encodeBlockAsm10B:
 4034     MOVB $0xf0, (AX)
 4035     MOVB DL, 1(AX)
 4036     ADDQ $0x02, AX
 4037     CMPL DX, $0x40
 4038     JL   memmove_emit_remainder_encodeBlockAsm10B
 4039     JMP  memmove_long_emit_remainder_encodeBlockAsm10B
 4040 
 4041 one_byte_emit_remainder_encodeBlockAsm10B:
 4042     SHLB $0x02, DL
 4043     MOVB DL, (AX)
 4044     ADDQ $0x01, AX
 4045 
 4046 memmove_emit_remainder_encodeBlockAsm10B:
 4047     LEAQ (AX)(BP*1), DX
 4048     MOVL BP, BX
 4049 
 4050     // genMemMoveShort
 4051     CMPQ BX, $0x03
 4052     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
 4053     JE   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
 4054     CMPQ BX, $0x08
 4055     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
 4056     CMPQ BX, $0x10
 4057     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
 4058     CMPQ BX, $0x20
 4059     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
 4060     JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
 4061 
 4062 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
 4063     MOVB (CX), BP
 4064     MOVB -1(CX)(BX*1), CL
 4065     MOVB BP, (AX)
 4066     MOVB CL, -1(AX)(BX*1)
 4067     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 4068 
 4069 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
 4070     MOVW (CX), BP
 4071     MOVB 2(CX), CL
 4072     MOVW BP, (AX)
 4073     MOVB CL, 2(AX)
 4074     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 4075 
 4076 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
 4077     MOVL (CX), BP
 4078     MOVL -4(CX)(BX*1), CX
 4079     MOVL BP, (AX)
 4080     MOVL CX, -4(AX)(BX*1)
 4081     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 4082 
 4083 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
 4084     MOVQ (CX), BP
 4085     MOVQ -8(CX)(BX*1), CX
 4086     MOVQ BP, (AX)
 4087     MOVQ CX, -8(AX)(BX*1)
 4088     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 4089 
 4090 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
 4091     MOVOU (CX), X0
 4092     MOVOU -16(CX)(BX*1), X1
 4093     MOVOU X0, (AX)
 4094     MOVOU X1, -16(AX)(BX*1)
 4095     JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B
 4096 
 4097 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
 4098     MOVOU (CX), X0
 4099     MOVOU 16(CX), X1
 4100     MOVOU -32(CX)(BX*1), X2
 4101     MOVOU -16(CX)(BX*1), X3
 4102     MOVOU X0, (AX)
 4103     MOVOU X1, 16(AX)
 4104     MOVOU X2, -32(AX)(BX*1)
 4105     MOVOU X3, -16(AX)(BX*1)
 4106 
 4107 memmove_end_copy_emit_remainder_encodeBlockAsm10B:
 4108     MOVQ DX, AX
 4109     JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B
 4110 
 4111 memmove_long_emit_remainder_encodeBlockAsm10B:
 4112     LEAQ (AX)(BP*1), DX
 4113     MOVL BP, BX
 4114 
 4115     // genMemMoveLong
 4116     MOVOU (CX), X0
 4117     MOVOU 16(CX), X1
 4118     MOVOU -32(CX)(BX*1), X2
 4119     MOVOU -16(CX)(BX*1), X3
 4120     MOVQ  BX, SI
 4121     SHRQ  $0x05, SI
 4122     MOVQ  AX, BP
 4123     ANDL  $0x0000001f, BP
 4124     MOVQ  $0x00000040, DI
 4125     SUBQ  BP, DI
 4126     DECQ  SI
 4127     JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
 4128     LEAQ  -32(CX)(DI*1), BP
 4129     LEAQ  -32(AX)(DI*1), R8
 4130 
 4131 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
 4132     MOVOU (BP), X4
 4133     MOVOU 16(BP), X5
 4134     MOVOA X4, (R8)
 4135     MOVOA X5, 16(R8)
 4136     ADDQ  $0x20, R8
 4137     ADDQ  $0x20, BP
 4138     ADDQ  $0x20, DI
 4139     DECQ  SI
 4140     JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
 4141 
 4142 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
 4143     MOVOU -32(CX)(DI*1), X4
 4144     MOVOU -16(CX)(DI*1), X5
 4145     MOVOA X4, -32(AX)(DI*1)
 4146     MOVOA X5, -16(AX)(DI*1)
 4147     ADDQ  $0x20, DI
 4148     CMPQ  BX, DI
 4149     JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
 4150     MOVOU X0, (AX)
 4151     MOVOU X1, 16(AX)
 4152     MOVOU X2, -32(AX)(BX*1)
 4153     MOVOU X3, -16(AX)(BX*1)
 4154     MOVQ  DX, AX
 4155 
 4156 emit_literal_done_emit_remainder_encodeBlockAsm10B:
 4157     MOVQ dst_base+0(FP), CX
 4158     SUBQ CX, AX
 4159     MOVQ AX, ret+48(FP)
 4160     RET
 4161 
 4162 // func encodeBlockAsm8B(dst []byte, src []byte) int
 4163 // Requires: SSE2
 4164 TEXT ·encodeBlockAsm8B(SB), $1048-56
 4165     MOVQ dst_base+0(FP), AX
 4166     MOVQ $0x00000008, CX
 4167     LEAQ 24(SP), DX
 4168     PXOR X0, X0
 4169 
 4170 zero_loop_encodeBlockAsm8B:
 4171     MOVOU X0, (DX)
 4172     MOVOU X0, 16(DX)
 4173     MOVOU X0, 32(DX)
 4174     MOVOU X0, 48(DX)
 4175     MOVOU X0, 64(DX)
 4176     MOVOU X0, 80(DX)
 4177     MOVOU X0, 96(DX)
 4178     MOVOU X0, 112(DX)
 4179     ADDQ  $0x80, DX
 4180     DECQ  CX
 4181     JNZ   zero_loop_encodeBlockAsm8B
 4182     MOVL  $0x00000000, 12(SP)
 4183     MOVQ  src_len+32(FP), CX
 4184     LEAQ  -5(CX), DX
 4185     LEAQ  -8(CX), BP
 4186     MOVL  BP, 8(SP)
 4187     SHRQ  $0x05, CX
 4188     SUBL  CX, DX
 4189     LEAQ  (AX)(DX*1), DX
 4190     MOVQ  DX, (SP)
 4191     MOVL  $0x00000001, CX
 4192     MOVL  CX, 16(SP)
 4193     MOVQ  src_base+24(FP), DX
 4194 
 4195 search_loop_encodeBlockAsm8B:
 4196     MOVQ  (DX)(CX*1), SI
 4197     MOVL  CX, BP
 4198     SUBL  12(SP), BP
 4199     SHRL  $0x04, BP
 4200     LEAL  4(CX)(BP*1), BP
 4201     CMPL  BP, 8(SP)
 4202     JGE   emit_remainder_encodeBlockAsm8B
 4203     MOVL  BP, 20(SP)
 4204     MOVQ  $0x9e3779b1, R8
 4205     MOVQ  SI, R9
 4206     MOVQ  SI, R10
 4207     SHRQ  $0x08, R10
 4208     SHLQ  $0x20, R9
 4209     IMULQ R8, R9
 4210     SHRQ  $0x38, R9
 4211     SHLQ  $0x20, R10
 4212     IMULQ R8, R10
 4213     SHRQ  $0x38, R10
 4214     MOVL  24(SP)(R9*4), BP
 4215     MOVL  24(SP)(R10*4), DI
 4216     MOVL  CX, 24(SP)(R9*4)
 4217     LEAL  1(CX), R9
 4218     MOVL  R9, 24(SP)(R10*4)
 4219     MOVQ  SI, R9
 4220     SHRQ  $0x10, R9
 4221     SHLQ  $0x20, R9
 4222     IMULQ R8, R9
 4223     SHRQ  $0x38, R9
 4224     MOVL  CX, R8
 4225     SUBL  16(SP), R8
 4226     MOVL  1(DX)(R8*1), R10
 4227     MOVQ  SI, R8
 4228     SHRQ  $0x08, R8
 4229     CMPL  R8, R10
 4230     JNE   no_repeat_found_encodeBlockAsm8B
 4231     LEAL  1(CX), SI
 4232     MOVL  12(SP), DI
 4233     MOVL  SI, BP
 4234     SUBL  16(SP), BP
 4235     JZ    repeat_extend_back_end_encodeBlockAsm8B
 4236 
 4237 repeat_extend_back_loop_encodeBlockAsm8B:
 4238     CMPL SI, DI
 4239     JLE  repeat_extend_back_end_encodeBlockAsm8B
 4240     MOVB -1(DX)(BP*1), BL
 4241     MOVB -1(DX)(SI*1), R8
 4242     CMPB BL, R8
 4243     JNE  repeat_extend_back_end_encodeBlockAsm8B
 4244     LEAL -1(SI), SI
 4245     DECL BP
 4246     JNZ  repeat_extend_back_loop_encodeBlockAsm8B
 4247 
 4248 repeat_extend_back_end_encodeBlockAsm8B:
 4249     MOVL 12(SP), BP
 4250     CMPL BP, SI
 4251     JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
 4252     MOVL SI, R8
 4253     MOVL SI, 12(SP)
 4254     LEAQ (DX)(BP*1), R9
 4255     SUBL BP, R8
 4256     LEAL -1(R8), BP
 4257     CMPL BP, $0x3c
 4258     JLT  one_byte_repeat_emit_encodeBlockAsm8B
 4259     CMPL BP, $0x00000100
 4260     JLT  two_bytes_repeat_emit_encodeBlockAsm8B
 4261     MOVB $0xf4, (AX)
 4262     MOVW BP, 1(AX)
 4263     ADDQ $0x03, AX
 4264     JMP  memmove_long_repeat_emit_encodeBlockAsm8B
 4265 
 4266 two_bytes_repeat_emit_encodeBlockAsm8B:
 4267     MOVB $0xf0, (AX)
 4268     MOVB BP, 1(AX)
 4269     ADDQ $0x02, AX
 4270     CMPL BP, $0x40
 4271     JL   memmove_repeat_emit_encodeBlockAsm8B
 4272     JMP  memmove_long_repeat_emit_encodeBlockAsm8B
 4273 
 4274 one_byte_repeat_emit_encodeBlockAsm8B:
 4275     SHLB $0x02, BP
 4276     MOVB BP, (AX)
 4277     ADDQ $0x01, AX
 4278 
 4279 memmove_repeat_emit_encodeBlockAsm8B:
 4280     LEAQ (AX)(R8*1), BP
 4281 
 4282     // genMemMoveShort
 4283     CMPQ R8, $0x03
 4284     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2
 4285     JE   emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3
 4286     CMPQ R8, $0x08
 4287     JB   emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7
 4288     CMPQ R8, $0x10
 4289     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
 4290     CMPQ R8, $0x20
 4291     JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
 4292     JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
 4293 
 4294 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2:
 4295     MOVB (R9), R10
 4296     MOVB -1(R9)(R8*1), R9
 4297     MOVB R10, (AX)
 4298     MOVB R9, -1(AX)(R8*1)
 4299     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 4300 
 4301 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3:
 4302     MOVW (R9), R10
 4303     MOVB 2(R9), R9
 4304     MOVW R10, (AX)
 4305     MOVB R9, 2(AX)
 4306     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 4307 
 4308 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7:
 4309     MOVL (R9), R10
 4310     MOVL -4(R9)(R8*1), R9
 4311     MOVL R10, (AX)
 4312     MOVL R9, -4(AX)(R8*1)
 4313     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 4314 
 4315 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
 4316     MOVQ (R9), R10
 4317     MOVQ -8(R9)(R8*1), R9
 4318     MOVQ R10, (AX)
 4319     MOVQ R9, -8(AX)(R8*1)
 4320     JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 4321 
 4322 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
 4323     MOVOU (R9), X0
 4324     MOVOU -16(R9)(R8*1), X1
 4325     MOVOU X0, (AX)
 4326     MOVOU X1, -16(AX)(R8*1)
 4327     JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B
 4328 
 4329 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
 4330     MOVOU (R9), X0
 4331     MOVOU 16(R9), X1
 4332     MOVOU -32(R9)(R8*1), X2
 4333     MOVOU -16(R9)(R8*1), X3
 4334     MOVOU X0, (AX)
 4335     MOVOU X1, 16(AX)
 4336     MOVOU X2, -32(AX)(R8*1)
 4337     MOVOU X3, -16(AX)(R8*1)
 4338 
 4339 memmove_end_copy_repeat_emit_encodeBlockAsm8B:
 4340     MOVQ BP, AX
 4341     JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B
 4342 
 4343 memmove_long_repeat_emit_encodeBlockAsm8B:
 4344     LEAQ (AX)(R8*1), BP
 4345 
 4346     // genMemMoveLong
 4347     MOVOU (R9), X0
 4348     MOVOU 16(R9), X1
 4349     MOVOU -32(R9)(R8*1), X2
 4350     MOVOU -16(R9)(R8*1), X3
 4351     MOVQ  R8, R11
 4352     SHRQ  $0x05, R11
 4353     MOVQ  AX, R10
 4354     ANDL  $0x0000001f, R10
 4355     MOVQ  $0x00000040, R12
 4356     SUBQ  R10, R12
 4357     DECQ  R11
 4358     JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 4359     LEAQ  -32(R9)(R12*1), R10
 4360     LEAQ  -32(AX)(R12*1), R13
 4361 
 4362 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
 4363     MOVOU (R10), X4
 4364     MOVOU 16(R10), X5
 4365     MOVOA X4, (R13)
 4366     MOVOA X5, 16(R13)
 4367     ADDQ  $0x20, R13
 4368     ADDQ  $0x20, R10
 4369     ADDQ  $0x20, R12
 4370     DECQ  R11
 4371     JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
 4372 
 4373 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
 4374     MOVOU -32(R9)(R12*1), X4
 4375     MOVOU -16(R9)(R12*1), X5
 4376     MOVOA X4, -32(AX)(R12*1)
 4377     MOVOA X5, -16(AX)(R12*1)
 4378     ADDQ  $0x20, R12
 4379     CMPQ  R8, R12
 4380     JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 4381     MOVOU X0, (AX)
 4382     MOVOU X1, 16(AX)
 4383     MOVOU X2, -32(AX)(R8*1)
 4384     MOVOU X3, -16(AX)(R8*1)
 4385     MOVQ  BP, AX
 4386 
 4387 emit_literal_done_repeat_emit_encodeBlockAsm8B:
 4388     ADDL $0x05, CX
 4389     MOVL CX, BP
 4390     SUBL 16(SP), BP
 4391     MOVQ src_len+32(FP), R8
 4392     SUBL CX, R8
 4393     LEAQ (DX)(CX*1), R9
 4394     LEAQ (DX)(BP*1), BP
 4395 
 4396     // matchLen
 4397     XORL R11, R11
 4398     CMPL R8, $0x08
 4399     JL   matchlen_single_repeat_extend_encodeBlockAsm8B
 4400 
 4401 matchlen_loopback_repeat_extend_encodeBlockAsm8B:
 4402     MOVQ  (R9)(R11*1), R10
 4403     XORQ  (BP)(R11*1), R10
 4404     TESTQ R10, R10
 4405     JZ    matchlen_loop_repeat_extend_encodeBlockAsm8B
 4406     BSFQ  R10, R10
 4407     SARQ  $0x03, R10
 4408     LEAL  (R11)(R10*1), R11
 4409     JMP   repeat_extend_forward_end_encodeBlockAsm8B
 4410 
 4411 matchlen_loop_repeat_extend_encodeBlockAsm8B:
 4412     LEAL -8(R8), R8
 4413     LEAL 8(R11), R11
 4414     CMPL R8, $0x08
 4415     JGE  matchlen_loopback_repeat_extend_encodeBlockAsm8B
 4416 
 4417 matchlen_single_repeat_extend_encodeBlockAsm8B:
 4418     TESTL R8, R8
 4419     JZ    repeat_extend_forward_end_encodeBlockAsm8B
 4420 
 4421 matchlen_single_loopback_repeat_extend_encodeBlockAsm8B:
 4422     MOVB (R9)(R11*1), R10
 4423     CMPB (BP)(R11*1), R10
 4424     JNE  repeat_extend_forward_end_encodeBlockAsm8B
 4425     LEAL 1(R11), R11
 4426     DECL R8
 4427     JNZ  matchlen_single_loopback_repeat_extend_encodeBlockAsm8B
 4428 
 4429 repeat_extend_forward_end_encodeBlockAsm8B:
 4430     ADDL  R11, CX
 4431     MOVL  CX, BP
 4432     SUBL  SI, BP
 4433     MOVL  16(SP), SI
 4434     TESTL DI, DI
 4435     JZ    repeat_as_copy_encodeBlockAsm8B
 4436 
 4437     // emitRepeat
 4438     MOVL BP, SI
 4439     LEAL -4(BP), BP
 4440     CMPL SI, $0x08
 4441     JLE  repeat_two_match_repeat_encodeBlockAsm8B
 4442     CMPL SI, $0x0c
 4443     JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
 4444 
 4445 cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
 4446     CMPL BP, $0x00000104
 4447     JLT  repeat_three_match_repeat_encodeBlockAsm8B
 4448     LEAL -256(BP), BP
 4449     MOVW $0x0019, (AX)
 4450     MOVW BP, 2(AX)
 4451     ADDQ $0x04, AX
 4452     JMP  repeat_end_emit_encodeBlockAsm8B
 4453 
 4454 repeat_three_match_repeat_encodeBlockAsm8B:
 4455     LEAL -4(BP), BP
 4456     MOVW $0x0015, (AX)
 4457     MOVB BP, 2(AX)
 4458     ADDQ $0x03, AX
 4459     JMP  repeat_end_emit_encodeBlockAsm8B
 4460 
 4461 repeat_two_match_repeat_encodeBlockAsm8B:
 4462     SHLL $0x02, BP
 4463     ORL  $0x01, BP
 4464     MOVW BP, (AX)
 4465     ADDQ $0x02, AX
 4466     JMP  repeat_end_emit_encodeBlockAsm8B
 4467     XORQ DI, DI
 4468     LEAL 1(DI)(BP*4), BP
 4469     MOVB SI, 1(AX)
 4470     SARL $0x08, SI
 4471     SHLL $0x05, SI
 4472     ORL  SI, BP
 4473     MOVB BP, (AX)
 4474     ADDQ $0x02, AX
 4475     JMP  repeat_end_emit_encodeBlockAsm8B
 4476 
 4477 repeat_as_copy_encodeBlockAsm8B:
 4478     // emitCopy
 4479 two_byte_offset_repeat_as_copy_encodeBlockAsm8B:
 4480     CMPL BP, $0x40
 4481     JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
 4482     MOVB $0xee, (AX)
 4483     MOVW SI, 1(AX)
 4484     LEAL -60(BP), BP
 4485     ADDQ $0x03, AX
 4486 
 4487     // emitRepeat
 4488     MOVL BP, SI
 4489     LEAL -4(BP), BP
 4490     CMPL SI, $0x08
 4491     JLE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 4492     CMPL SI, $0x0c
 4493     JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 4494 
 4495 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
 4496     CMPL BP, $0x00000104
 4497     JLT  repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 4498     LEAL -256(BP), BP
 4499     MOVW $0x0019, (AX)
 4500     MOVW BP, 2(AX)
 4501     ADDQ $0x04, AX
 4502     JMP  repeat_end_emit_encodeBlockAsm8B
 4503 
 4504 repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
 4505     LEAL -4(BP), BP
 4506     MOVW $0x0015, (AX)
 4507     MOVB BP, 2(AX)
 4508     ADDQ $0x03, AX
 4509     JMP  repeat_end_emit_encodeBlockAsm8B
 4510 
 4511 repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
 4512     SHLL $0x02, BP
 4513     ORL  $0x01, BP
 4514     MOVW BP, (AX)
 4515     ADDQ $0x02, AX
 4516     JMP  repeat_end_emit_encodeBlockAsm8B
 4517     XORQ DI, DI
 4518     LEAL 1(DI)(BP*4), BP
 4519     MOVB SI, 1(AX)
 4520     SARL $0x08, SI
 4521     SHLL $0x05, SI
 4522     ORL  SI, BP
 4523     MOVB BP, (AX)
 4524     ADDQ $0x02, AX
 4525     JMP  repeat_end_emit_encodeBlockAsm8B
 4526     JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B
 4527 
 4528 two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
 4529     CMPL BP, $0x0c
 4530     JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
 4531     MOVB $0x01, BL
 4532     LEAL -16(BX)(BP*4), BP
 4533     MOVB SI, 1(AX)
 4534     SHRL $0x08, SI
 4535     SHLL $0x05, SI
 4536     ORL  SI, BP
 4537     MOVB BP, (AX)
 4538     ADDQ $0x02, AX
 4539     JMP  repeat_end_emit_encodeBlockAsm8B
 4540 
 4541 emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
 4542     MOVB $0x02, BL
 4543     LEAL -4(BX)(BP*4), BP
 4544     MOVB BP, (AX)
 4545     MOVW SI, 1(AX)
 4546     ADDQ $0x03, AX
 4547 
 4548 repeat_end_emit_encodeBlockAsm8B:
 4549     MOVL CX, 12(SP)
 4550     JMP  search_loop_encodeBlockAsm8B
 4551 
 4552 no_repeat_found_encodeBlockAsm8B:
 4553     CMPL (DX)(BP*1), SI
 4554     JEQ  candidate_match_encodeBlockAsm8B
 4555     SHRQ $0x08, SI
 4556     MOVL 24(SP)(R9*4), BP
 4557     LEAL 2(CX), R8
 4558     CMPL (DX)(DI*1), SI
 4559     JEQ  candidate2_match_encodeBlockAsm8B
 4560     MOVL R8, 24(SP)(R9*4)
 4561     SHRQ $0x08, SI
 4562     CMPL (DX)(BP*1), SI
 4563     JEQ  candidate3_match_encodeBlockAsm8B
 4564     MOVL 20(SP), CX
 4565     JMP  search_loop_encodeBlockAsm8B
 4566 
 4567 candidate3_match_encodeBlockAsm8B:
 4568     ADDL $0x02, CX
 4569     JMP  candidate_match_encodeBlockAsm8B
 4570 
 4571 candidate2_match_encodeBlockAsm8B:
 4572     MOVL R8, 24(SP)(R9*4)
 4573     INCL CX
 4574     MOVL DI, BP
 4575 
 4576 candidate_match_encodeBlockAsm8B:
 4577     MOVL  12(SP), SI
 4578     TESTL BP, BP
 4579     JZ    match_extend_back_end_encodeBlockAsm8B
 4580 
 4581 match_extend_back_loop_encodeBlockAsm8B:
 4582     CMPL CX, SI
 4583     JLE  match_extend_back_end_encodeBlockAsm8B
 4584     MOVB -1(DX)(BP*1), BL
 4585     MOVB -1(DX)(CX*1), DI
 4586     CMPB BL, DI
 4587     JNE  match_extend_back_end_encodeBlockAsm8B
 4588     LEAL -1(CX), CX
 4589     DECL BP
 4590     JZ   match_extend_back_end_encodeBlockAsm8B
 4591     JMP  match_extend_back_loop_encodeBlockAsm8B
 4592 
 4593 match_extend_back_end_encodeBlockAsm8B:
 4594     MOVL CX, SI
 4595     SUBL 12(SP), SI
 4596     LEAQ 3(AX)(SI*1), SI
 4597     CMPQ SI, (SP)
 4598     JL   match_dst_size_check_encodeBlockAsm8B
 4599     MOVQ $0x00000000, ret+48(FP)
 4600     RET
 4601 
 4602 match_dst_size_check_encodeBlockAsm8B:
 4603     MOVL CX, SI
 4604     MOVL 12(SP), DI
 4605     CMPL DI, SI
 4606     JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
 4607     MOVL SI, R8
 4608     MOVL SI, 12(SP)
 4609     LEAQ (DX)(DI*1), SI
 4610     SUBL DI, R8
 4611     LEAL -1(R8), DI
 4612     CMPL DI, $0x3c
 4613     JLT  one_byte_match_emit_encodeBlockAsm8B
 4614     CMPL DI, $0x00000100
 4615     JLT  two_bytes_match_emit_encodeBlockAsm8B
 4616     MOVB $0xf4, (AX)
 4617     MOVW DI, 1(AX)
 4618     ADDQ $0x03, AX
 4619     JMP  memmove_long_match_emit_encodeBlockAsm8B
 4620 
 4621 two_bytes_match_emit_encodeBlockAsm8B:
 4622     MOVB $0xf0, (AX)
 4623     MOVB DI, 1(AX)
 4624     ADDQ $0x02, AX
 4625     CMPL DI, $0x40
 4626     JL   memmove_match_emit_encodeBlockAsm8B
 4627     JMP  memmove_long_match_emit_encodeBlockAsm8B
 4628 
 4629 one_byte_match_emit_encodeBlockAsm8B:
 4630     SHLB $0x02, DI
 4631     MOVB DI, (AX)
 4632     ADDQ $0x01, AX
 4633 
 4634 memmove_match_emit_encodeBlockAsm8B:
 4635     LEAQ (AX)(R8*1), DI
 4636 
 4637     // genMemMoveShort
 4638     CMPQ R8, $0x03
 4639     JB   emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2
 4640     JE   emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3
 4641     CMPQ R8, $0x08
 4642     JB   emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7
 4643     CMPQ R8, $0x10
 4644     JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
 4645     CMPQ R8, $0x20
 4646     JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
 4647     JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
 4648 
 4649 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2:
 4650     MOVB (SI), R9
 4651     MOVB -1(SI)(R8*1), SI
 4652     MOVB R9, (AX)
 4653     MOVB SI, -1(AX)(R8*1)
 4654     JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 4655 
 4656 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3:
 4657     MOVW (SI), R9
 4658     MOVB 2(SI), SI
 4659     MOVW R9, (AX)
 4660     MOVB SI, 2(AX)
 4661     JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 4662 
 4663 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7:
 4664     MOVL (SI), R9
 4665     MOVL -4(SI)(R8*1), SI
 4666     MOVL R9, (AX)
 4667     MOVL SI, -4(AX)(R8*1)
 4668     JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 4669 
 4670 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
 4671     MOVQ (SI), R9
 4672     MOVQ -8(SI)(R8*1), SI
 4673     MOVQ R9, (AX)
 4674     MOVQ SI, -8(AX)(R8*1)
 4675     JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 4676 
 4677 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
 4678     MOVOU (SI), X0
 4679     MOVOU -16(SI)(R8*1), X1
 4680     MOVOU X0, (AX)
 4681     MOVOU X1, -16(AX)(R8*1)
 4682     JMP   memmove_end_copy_match_emit_encodeBlockAsm8B
 4683 
 4684 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
 4685     MOVOU (SI), X0
 4686     MOVOU 16(SI), X1
 4687     MOVOU -32(SI)(R8*1), X2
 4688     MOVOU -16(SI)(R8*1), X3
 4689     MOVOU X0, (AX)
 4690     MOVOU X1, 16(AX)
 4691     MOVOU X2, -32(AX)(R8*1)
 4692     MOVOU X3, -16(AX)(R8*1)
 4693 
 4694 memmove_end_copy_match_emit_encodeBlockAsm8B:
 4695     MOVQ DI, AX
 4696     JMP  emit_literal_done_match_emit_encodeBlockAsm8B
 4697 
 4698 memmove_long_match_emit_encodeBlockAsm8B:
 4699     LEAQ (AX)(R8*1), DI
 4700 
 4701     // genMemMoveLong
 4702     MOVOU (SI), X0
 4703     MOVOU 16(SI), X1
 4704     MOVOU -32(SI)(R8*1), X2
 4705     MOVOU -16(SI)(R8*1), X3
 4706     MOVQ  R8, R10
 4707     SHRQ  $0x05, R10
 4708     MOVQ  AX, R9
 4709     ANDL  $0x0000001f, R9
 4710     MOVQ  $0x00000040, R11
 4711     SUBQ  R9, R11
 4712     DECQ  R10
 4713     JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 4714     LEAQ  -32(SI)(R11*1), R9
 4715     LEAQ  -32(AX)(R11*1), R12
 4716 
 4717 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
 4718     MOVOU (R9), X4
 4719     MOVOU 16(R9), X5
 4720     MOVOA X4, (R12)
 4721     MOVOA X5, 16(R12)
 4722     ADDQ  $0x20, R12
 4723     ADDQ  $0x20, R9
 4724     ADDQ  $0x20, R11
 4725     DECQ  R10
 4726     JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
 4727 
 4728 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
 4729     MOVOU -32(SI)(R11*1), X4
 4730     MOVOU -16(SI)(R11*1), X5
 4731     MOVOA X4, -32(AX)(R11*1)
 4732     MOVOA X5, -16(AX)(R11*1)
 4733     ADDQ  $0x20, R11
 4734     CMPQ  R8, R11
 4735     JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
 4736     MOVOU X0, (AX)
 4737     MOVOU X1, 16(AX)
 4738     MOVOU X2, -32(AX)(R8*1)
 4739     MOVOU X3, -16(AX)(R8*1)
 4740     MOVQ  DI, AX
 4741 
 4742 emit_literal_done_match_emit_encodeBlockAsm8B:
 4743 match_nolit_loop_encodeBlockAsm8B:
 4744     MOVL CX, SI
 4745     SUBL BP, SI
 4746     MOVL SI, 16(SP)
 4747     ADDL $0x04, CX
 4748     ADDL $0x04, BP
 4749     MOVQ src_len+32(FP), SI
 4750     SUBL CX, SI
 4751     LEAQ (DX)(CX*1), DI
 4752     LEAQ (DX)(BP*1), BP
 4753 
 4754     // matchLen
 4755     XORL R9, R9
 4756     CMPL SI, $0x08
 4757     JL   matchlen_single_match_nolit_encodeBlockAsm8B
 4758 
 4759 matchlen_loopback_match_nolit_encodeBlockAsm8B:
 4760     MOVQ  (DI)(R9*1), R8
 4761     XORQ  (BP)(R9*1), R8
 4762     TESTQ R8, R8
 4763     JZ    matchlen_loop_match_nolit_encodeBlockAsm8B
 4764     BSFQ  R8, R8
 4765     SARQ  $0x03, R8
 4766     LEAL  (R9)(R8*1), R9
 4767     JMP   match_nolit_end_encodeBlockAsm8B
 4768 
 4769 matchlen_loop_match_nolit_encodeBlockAsm8B:
 4770     LEAL -8(SI), SI
 4771     LEAL 8(R9), R9
 4772     CMPL SI, $0x08
 4773     JGE  matchlen_loopback_match_nolit_encodeBlockAsm8B
 4774 
 4775 matchlen_single_match_nolit_encodeBlockAsm8B:
 4776     TESTL SI, SI
 4777     JZ    match_nolit_end_encodeBlockAsm8B
 4778 
 4779 matchlen_single_loopback_match_nolit_encodeBlockAsm8B:
 4780     MOVB (DI)(R9*1), R8
 4781     CMPB (BP)(R9*1), R8
 4782     JNE  match_nolit_end_encodeBlockAsm8B
 4783     LEAL 1(R9), R9
 4784     DECL SI
 4785     JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm8B
 4786 
 4787 match_nolit_end_encodeBlockAsm8B:
 4788     ADDL R9, CX
 4789     MOVL 16(SP), BP
 4790     ADDL $0x04, R9
 4791     MOVL CX, 12(SP)
 4792 
 4793     // emitCopy
 4794 two_byte_offset_match_nolit_encodeBlockAsm8B:
 4795     CMPL R9, $0x40
 4796     JLE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
 4797     MOVB $0xee, (AX)
 4798     MOVW BP, 1(AX)
 4799     LEAL -60(R9), R9
 4800     ADDQ $0x03, AX
 4801 
 4802     // emitRepeat
 4803     MOVL R9, BP
 4804     LEAL -4(R9), R9
 4805     CMPL BP, $0x08
 4806     JLE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
 4807     CMPL BP, $0x0c
 4808     JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
 4809 
 4810 cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
 4811     CMPL R9, $0x00000104
 4812     JLT  repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
 4813     LEAL -256(R9), R9
 4814     MOVW $0x0019, (AX)
 4815     MOVW R9, 2(AX)
 4816     ADDQ $0x04, AX
 4817     JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 4818 
 4819 repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
 4820     LEAL -4(R9), R9
 4821     MOVW $0x0015, (AX)
 4822     MOVB R9, 2(AX)
 4823     ADDQ $0x03, AX
 4824     JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 4825 
 4826 repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
 4827     SHLL $0x02, R9
 4828     ORL  $0x01, R9
 4829     MOVW R9, (AX)
 4830     ADDQ $0x02, AX
 4831     JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 4832     XORQ SI, SI
 4833     LEAL 1(SI)(R9*4), R9
 4834     MOVB BP, 1(AX)
 4835     SARL $0x08, BP
 4836     SHLL $0x05, BP
 4837     ORL  BP, R9
 4838     MOVB R9, (AX)
 4839     ADDQ $0x02, AX
 4840     JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 4841     JMP two_byte_offset_match_nolit_encodeBlockAsm8B
 4842 
 4843 two_byte_offset_short_match_nolit_encodeBlockAsm8B:
 4844     CMPL R9, $0x0c
 4845     JGE  emit_copy_three_match_nolit_encodeBlockAsm8B
 4846     MOVB $0x01, BL
 4847     LEAL -16(BX)(R9*4), R9
 4848     MOVB BP, 1(AX)
 4849     SHRL $0x08, BP
 4850     SHLL $0x05, BP
 4851     ORL  BP, R9
 4852     MOVB R9, (AX)
 4853     ADDQ $0x02, AX
 4854     JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 4855 
 4856 emit_copy_three_match_nolit_encodeBlockAsm8B:
 4857     MOVB $0x02, BL
 4858     LEAL -4(BX)(R9*4), R9
 4859     MOVB R9, (AX)
 4860     MOVW BP, 1(AX)
 4861     ADDQ $0x03, AX
 4862 
 4863 match_nolit_emitcopy_end_encodeBlockAsm8B:
 4864     CMPL CX, 8(SP)
 4865     JGE  emit_remainder_encodeBlockAsm8B
 4866     MOVQ -2(DX)(CX*1), SI
 4867     CMPQ AX, (SP)
 4868     JL   match_nolit_dst_ok_encodeBlockAsm8B
 4869     MOVQ $0x00000000, ret+48(FP)
 4870     RET
 4871 
 4872 match_nolit_dst_ok_encodeBlockAsm8B:
 4873     MOVQ  $0x9e3779b1, R8
 4874     MOVQ  SI, DI
 4875     SHRQ  $0x10, SI
 4876     MOVQ  SI, BP
 4877     SHLQ  $0x20, DI
 4878     IMULQ R8, DI
 4879     SHRQ  $0x38, DI
 4880     SHLQ  $0x20, BP
 4881     IMULQ R8, BP
 4882     SHRQ  $0x38, BP
 4883     LEAL  -2(CX), R8
 4884     LEAQ  24(SP)(BP*4), R9
 4885     MOVL  (R9), BP
 4886     MOVL  R8, 24(SP)(DI*4)
 4887     MOVL  CX, (R9)
 4888     CMPL  (DX)(BP*1), SI
 4889     JEQ   match_nolit_loop_encodeBlockAsm8B
 4890     INCL  CX
 4891     JMP   search_loop_encodeBlockAsm8B
 4892 
 4893 emit_remainder_encodeBlockAsm8B:
 4894     MOVQ src_len+32(FP), CX
 4895     SUBL 12(SP), CX
 4896     LEAQ 3(AX)(CX*1), CX
 4897     CMPQ CX, (SP)
 4898     JL   emit_remainder_ok_encodeBlockAsm8B
 4899     MOVQ $0x00000000, ret+48(FP)
 4900     RET
 4901 
 4902 emit_remainder_ok_encodeBlockAsm8B:
 4903     MOVQ src_len+32(FP), CX
 4904     MOVL 12(SP), BX
 4905     CMPL BX, CX
 4906     JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
 4907     MOVL CX, BP
 4908     MOVL CX, 12(SP)
 4909     LEAQ (DX)(BX*1), CX
 4910     SUBL BX, BP
 4911     LEAL -1(BP), DX
 4912     CMPL DX, $0x3c
 4913     JLT  one_byte_emit_remainder_encodeBlockAsm8B
 4914     CMPL DX, $0x00000100
 4915     JLT  two_bytes_emit_remainder_encodeBlockAsm8B
 4916     MOVB $0xf4, (AX)
 4917     MOVW DX, 1(AX)
 4918     ADDQ $0x03, AX
 4919     JMP  memmove_long_emit_remainder_encodeBlockAsm8B
 4920 
 4921 two_bytes_emit_remainder_encodeBlockAsm8B:
 4922     MOVB $0xf0, (AX)
 4923     MOVB DL, 1(AX)
 4924     ADDQ $0x02, AX
 4925     CMPL DX, $0x40
 4926     JL   memmove_emit_remainder_encodeBlockAsm8B
 4927     JMP  memmove_long_emit_remainder_encodeBlockAsm8B
 4928 
 4929 one_byte_emit_remainder_encodeBlockAsm8B:
 4930     SHLB $0x02, DL
 4931     MOVB DL, (AX)
 4932     ADDQ $0x01, AX
 4933 
 4934 memmove_emit_remainder_encodeBlockAsm8B:
 4935     LEAQ (AX)(BP*1), DX
 4936     MOVL BP, BX
 4937 
 4938     // genMemMoveShort
 4939     CMPQ BX, $0x03
 4940     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
 4941     JE   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
 4942     CMPQ BX, $0x08
 4943     JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
 4944     CMPQ BX, $0x10
 4945     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
 4946     CMPQ BX, $0x20
 4947     JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
 4948     JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
 4949 
 4950 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
 4951     MOVB (CX), BP
 4952     MOVB -1(CX)(BX*1), CL
 4953     MOVB BP, (AX)
 4954     MOVB CL, -1(AX)(BX*1)
 4955     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 4956 
 4957 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
 4958     MOVW (CX), BP
 4959     MOVB 2(CX), CL
 4960     MOVW BP, (AX)
 4961     MOVB CL, 2(AX)
 4962     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 4963 
 4964 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
 4965     MOVL (CX), BP
 4966     MOVL -4(CX)(BX*1), CX
 4967     MOVL BP, (AX)
 4968     MOVL CX, -4(AX)(BX*1)
 4969     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 4970 
 4971 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
 4972     MOVQ (CX), BP
 4973     MOVQ -8(CX)(BX*1), CX
 4974     MOVQ BP, (AX)
 4975     MOVQ CX, -8(AX)(BX*1)
 4976     JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 4977 
 4978 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
 4979     MOVOU (CX), X0
 4980     MOVOU -16(CX)(BX*1), X1
 4981     MOVOU X0, (AX)
 4982     MOVOU X1, -16(AX)(BX*1)
 4983     JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B
 4984 
 4985 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
 4986     MOVOU (CX), X0
 4987     MOVOU 16(CX), X1
 4988     MOVOU -32(CX)(BX*1), X2
 4989     MOVOU -16(CX)(BX*1), X3
 4990     MOVOU X0, (AX)
 4991     MOVOU X1, 16(AX)
 4992     MOVOU X2, -32(AX)(BX*1)
 4993     MOVOU X3, -16(AX)(BX*1)
 4994 
 4995 memmove_end_copy_emit_remainder_encodeBlockAsm8B:
 4996     MOVQ DX, AX
 4997     JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B
 4998 
 4999 memmove_long_emit_remainder_encodeBlockAsm8B:
 5000     LEAQ (AX)(BP*1), DX
 5001     MOVL BP, BX
 5002 
 5003     // genMemMoveLong
 5004     MOVOU (CX), X0
 5005     MOVOU 16(CX), X1
 5006     MOVOU -32(CX)(BX*1), X2
 5007     MOVOU -16(CX)(BX*1), X3
 5008     MOVQ  BX, SI
 5009     SHRQ  $0x05, SI
 5010     MOVQ  AX, BP
 5011     ANDL  $0x0000001f, BP
 5012     MOVQ  $0x00000040, DI
 5013     SUBQ  BP, DI
 5014     DECQ  SI
 5015     JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
 5016     LEAQ  -32(CX)(DI*1), BP
 5017     LEAQ  -32(AX)(DI*1), R8
 5018 
 5019 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
 5020     MOVOU (BP), X4
 5021     MOVOU 16(BP), X5
 5022     MOVOA X4, (R8)
 5023     MOVOA X5, 16(R8)
 5024     ADDQ  $0x20, R8
 5025     ADDQ  $0x20, BP
 5026     ADDQ  $0x20, DI
 5027     DECQ  SI
 5028     JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
 5029 
 5030 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
 5031     MOVOU -32(CX)(DI*1), X4
 5032     MOVOU -16(CX)(DI*1), X5
 5033     MOVOA X4, -32(AX)(DI*1)
 5034     MOVOA X5, -16(AX)(DI*1)
 5035     ADDQ  $0x20, DI
 5036     CMPQ  BX, DI
 5037     JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
 5038     MOVOU X0, (AX)
 5039     MOVOU X1, 16(AX)
 5040     MOVOU X2, -32(AX)(BX*1)
 5041     MOVOU X3, -16(AX)(BX*1)
 5042     MOVQ  DX, AX
 5043 
 5044 emit_literal_done_emit_remainder_encodeBlockAsm8B:
 5045     MOVQ dst_base+0(FP), CX
 5046     SUBQ CX, AX
 5047     MOVQ AX, ret+48(FP)
 5048     RET
 5049 
 5050 // func encodeBetterBlockAsm(dst []byte, src []byte) int
 5051 // Requires: SSE2
 5052 TEXT ·encodeBetterBlockAsm(SB), $327704-56
 5053     MOVQ dst_base+0(FP), AX
 5054     MOVQ $0x00000a00, CX
 5055     LEAQ 24(SP), DX
 5056     PXOR X0, X0
 5057 
 5058 zero_loop_encodeBetterBlockAsm:
 5059     MOVOU X0, (DX)
 5060     MOVOU X0, 16(DX)
 5061     MOVOU X0, 32(DX)
 5062     MOVOU X0, 48(DX)
 5063     MOVOU X0, 64(DX)
 5064     MOVOU X0, 80(DX)
 5065     MOVOU X0, 96(DX)
 5066     MOVOU X0, 112(DX)
 5067     ADDQ  $0x80, DX
 5068     DECQ  CX
 5069     JNZ   zero_loop_encodeBetterBlockAsm
 5070     MOVL  $0x00000000, 12(SP)
 5071     MOVQ  src_len+32(FP), CX
 5072     LEAQ  -6(CX), DX
 5073     LEAQ  -8(CX), BP
 5074     MOVL  BP, 8(SP)
 5075     SHRQ  $0x05, CX
 5076     SUBL  CX, DX
 5077     LEAQ  (AX)(DX*1), DX
 5078     MOVQ  DX, (SP)
 5079     MOVL  $0x00000001, CX
 5080     MOVL  $0x00000000, 16(SP)
 5081     MOVQ  src_base+24(FP), DX
 5082 
 5083 search_loop_encodeBetterBlockAsm:
 5084     MOVQ  (DX)(CX*1), SI
 5085     MOVL  CX, BP
 5086     SUBL  12(SP), BP
 5087     SHRL  $0x07, BP
 5088     LEAL  1(CX)(BP*1), BP
 5089     CMPL  BP, 8(SP)
 5090     JGE   emit_remainder_encodeBetterBlockAsm
 5091     MOVL  BP, 20(SP)
 5092     MOVQ  $0x00cf1bbcdcbfa563, R8
 5093     MOVQ  $0x9e3779b1, BP
 5094     MOVQ  SI, R9
 5095     MOVQ  SI, R10
 5096     SHLQ  $0x08, R9
 5097     IMULQ R8, R9
 5098     SHRQ  $0x30, R9
 5099     SHLQ  $0x20, R10
 5100     IMULQ BP, R10
 5101     SHRQ  $0x32, R10
 5102     MOVL  24(SP)(R9*4), BP
 5103     MOVL  262168(SP)(R10*4), DI
 5104     MOVL  CX, 24(SP)(R9*4)
 5105     MOVL  CX, 262168(SP)(R10*4)
 5106     CMPL  (DX)(BP*1), SI
 5107     JEQ   candidate_match_encodeBetterBlockAsm
 5108     CMPL  (DX)(DI*1), SI
 5109     JEQ   candidateS_match_encodeBetterBlockAsm
 5110     MOVL  20(SP), CX
 5111     JMP   search_loop_encodeBetterBlockAsm
 5112 
 5113 candidateS_match_encodeBetterBlockAsm:
 5114     SHRQ  $0x08, SI
 5115     MOVQ  SI, R9
 5116     SHLQ  $0x08, R9
 5117     IMULQ R8, R9
 5118     SHRQ  $0x30, R9
 5119     MOVL  24(SP)(R9*4), BP
 5120     INCL  CX
 5121     MOVL  CX, 24(SP)(R9*4)
 5122     CMPL  (DX)(BP*1), SI
 5123     JEQ   candidate_match_encodeBetterBlockAsm
 5124     DECL  CX
 5125     MOVL  DI, BP
 5126 
 5127 candidate_match_encodeBetterBlockAsm:
 5128     MOVL  12(SP), SI
 5129     TESTL BP, BP
 5130     JZ    match_extend_back_end_encodeBetterBlockAsm
 5131 
 5132 match_extend_back_loop_encodeBetterBlockAsm:
 5133     CMPL CX, SI
 5134     JLE  match_extend_back_end_encodeBetterBlockAsm
 5135     MOVB -1(DX)(BP*1), BL
 5136     MOVB -1(DX)(CX*1), DI
 5137     CMPB BL, DI
 5138     JNE  match_extend_back_end_encodeBetterBlockAsm
 5139     LEAL -1(CX), CX
 5140     DECL BP
 5141     JZ   match_extend_back_end_encodeBetterBlockAsm
 5142     JMP  match_extend_back_loop_encodeBetterBlockAsm
 5143 
 5144 match_extend_back_end_encodeBetterBlockAsm:
 5145     MOVL CX, SI
 5146     SUBL 12(SP), SI
 5147     LEAQ 5(AX)(SI*1), SI
 5148     CMPQ SI, (SP)
 5149     JL   match_dst_size_check_encodeBetterBlockAsm
 5150     MOVQ $0x00000000, ret+48(FP)
 5151     RET
 5152 
 5153 match_dst_size_check_encodeBetterBlockAsm:
 5154     MOVL CX, SI
 5155     ADDL $0x04, CX
 5156     ADDL $0x04, BP
 5157     MOVQ src_len+32(FP), DI
 5158     SUBL CX, DI
 5159     LEAQ (DX)(CX*1), R8
 5160     LEAQ (DX)(BP*1), R9
 5161 
 5162     // matchLen
 5163     XORL R11, R11
 5164     CMPL DI, $0x08
 5165     JL   matchlen_single_match_nolit_encodeBetterBlockAsm
 5166 
 5167 matchlen_loopback_match_nolit_encodeBetterBlockAsm:
 5168     MOVQ  (R8)(R11*1), R10
 5169     XORQ  (R9)(R11*1), R10
 5170     TESTQ R10, R10
 5171     JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm
 5172     BSFQ  R10, R10
 5173     SARQ  $0x03, R10
 5174     LEAL  (R11)(R10*1), R11
 5175     JMP   match_nolit_end_encodeBetterBlockAsm
 5176 
 5177 matchlen_loop_match_nolit_encodeBetterBlockAsm:
 5178     LEAL -8(DI), DI
 5179     LEAL 8(R11), R11
 5180     CMPL DI, $0x08
 5181     JGE  matchlen_loopback_match_nolit_encodeBetterBlockAsm
 5182 
 5183 matchlen_single_match_nolit_encodeBetterBlockAsm:
 5184     TESTL DI, DI
 5185     JZ    match_nolit_end_encodeBetterBlockAsm
 5186 
 5187 matchlen_single_loopback_match_nolit_encodeBetterBlockAsm:
 5188     MOVB (R8)(R11*1), R10
 5189     CMPB (R9)(R11*1), R10
 5190     JNE  match_nolit_end_encodeBetterBlockAsm
 5191     LEAL 1(R11), R11
 5192     DECL DI
 5193     JNZ  matchlen_single_loopback_match_nolit_encodeBetterBlockAsm
 5194 
 5195 match_nolit_end_encodeBetterBlockAsm:
 5196     MOVL CX, DI
 5197     SUBL BP, DI
 5198 
 5199     // Check if repeat
 5200     CMPL 16(SP), DI
 5201     JEQ  match_is_repeat_encodeBetterBlockAsm
 5202     CMPL R11, $0x01
 5203     JG   match_length_ok_encodeBetterBlockAsm
 5204     CMPL DI, $0x0000ffff
 5205     JLE  match_length_ok_encodeBetterBlockAsm
 5206     MOVL 20(SP), CX
 5207     INCL CX
 5208     JMP  search_loop_encodeBetterBlockAsm
 5209 
 5210 match_length_ok_encodeBetterBlockAsm:
 5211     MOVL DI, 16(SP)
 5212     MOVL 12(SP), BP
 5213     CMPL BP, SI
 5214     JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm
 5215     MOVL SI, R8
 5216     MOVL SI, 12(SP)
 5217     LEAQ (DX)(BP*1), R9
 5218     SUBL BP, R8
 5219     LEAL -1(R8), BP
 5220     CMPL BP, $0x3c
 5221     JLT  one_byte_match_emit_encodeBetterBlockAsm
 5222     CMPL BP, $0x00000100
 5223     JLT  two_bytes_match_emit_encodeBetterBlockAsm
 5224     CMPL BP, $0x00010000
 5225     JLT  three_bytes_match_emit_encodeBetterBlockAsm
 5226     CMPL BP, $0x01000000
 5227     JLT  four_bytes_match_emit_encodeBetterBlockAsm
 5228     MOVB $0xfc, (AX)
 5229     MOVL BP, 1(AX)
 5230     ADDQ $0x05, AX
 5231     JMP  memmove_long_match_emit_encodeBetterBlockAsm
 5232 
 5233 four_bytes_match_emit_encodeBetterBlockAsm:
 5234     MOVL BP, R10
 5235     SHRL $0x10, R10
 5236     MOVB $0xf8, (AX)
 5237     MOVW BP, 1(AX)
 5238     MOVB R10, 3(AX)
 5239     ADDQ $0x04, AX
 5240     JMP  memmove_long_match_emit_encodeBetterBlockAsm
 5241 
 5242 three_bytes_match_emit_encodeBetterBlockAsm:
 5243     MOVB $0xf4, (AX)
 5244     MOVW BP, 1(AX)
 5245     ADDQ $0x03, AX
 5246     JMP  memmove_long_match_emit_encodeBetterBlockAsm
 5247 
 5248 two_bytes_match_emit_encodeBetterBlockAsm:
 5249     MOVB $0xf0, (AX)
 5250     MOVB BP, 1(AX)
 5251     ADDQ $0x02, AX
 5252     CMPL BP, $0x40
 5253     JL   memmove_match_emit_encodeBetterBlockAsm
 5254     JMP  memmove_long_match_emit_encodeBetterBlockAsm
 5255 
 5256 one_byte_match_emit_encodeBetterBlockAsm:
 5257     SHLB $0x02, BP
 5258     MOVB BP, (AX)
 5259     ADDQ $0x01, AX
 5260 
 5261 memmove_match_emit_encodeBetterBlockAsm:
 5262     LEAQ (AX)(R8*1), BP
 5263 
 5264     // genMemMoveShort
 5265     CMPQ R8, $0x03
 5266     JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_1or2
 5267     JE   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_3
 5268     CMPQ R8, $0x08
 5269     JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
 5270     CMPQ R8, $0x10
 5271     JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
 5272     CMPQ R8, $0x20
 5273     JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
 5274     JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
 5275 
 5276 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_1or2:
 5277     MOVB (R9), R10
 5278     MOVB -1(R9)(R8*1), R9
 5279     MOVB R10, (AX)
 5280     MOVB R9, -1(AX)(R8*1)
 5281     JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 5282 
 5283 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_3:
 5284     MOVW (R9), R10
 5285     MOVB 2(R9), R9
 5286     MOVW R10, (AX)
 5287     MOVB R9, 2(AX)
 5288     JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 5289 
 5290 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
 5291     MOVL (R9), R10
 5292     MOVL -4(R9)(R8*1), R9
 5293     MOVL R10, (AX)
 5294     MOVL R9, -4(AX)(R8*1)
 5295     JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 5296 
 5297 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
 5298     MOVQ (R9), R10
 5299     MOVQ -8(R9)(R8*1), R9
 5300     MOVQ R10, (AX)
 5301     MOVQ R9, -8(AX)(R8*1)
 5302     JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 5303 
 5304 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
 5305     MOVOU (R9), X0
 5306     MOVOU -16(R9)(R8*1), X1
 5307     MOVOU X0, (AX)
 5308     MOVOU X1, -16(AX)(R8*1)
 5309     JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm
 5310 
 5311 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
 5312     MOVOU (R9), X0
 5313     MOVOU 16(R9), X1
 5314     MOVOU -32(R9)(R8*1), X2
 5315     MOVOU -16(R9)(R8*1), X3
 5316     MOVOU X0, (AX)
 5317     MOVOU X1, 16(AX)
 5318     MOVOU X2, -32(AX)(R8*1)
 5319     MOVOU X3, -16(AX)(R8*1)
 5320 
 5321 memmove_end_copy_match_emit_encodeBetterBlockAsm:
 5322     MOVQ BP, AX
 5323     JMP  emit_literal_done_match_emit_encodeBetterBlockAsm
 5324 
 5325 memmove_long_match_emit_encodeBetterBlockAsm:
 5326     LEAQ (AX)(R8*1), BP
 5327 
 5328     // genMemMoveLong
 5329     MOVOU (R9), X0
 5330     MOVOU 16(R9), X1
 5331     MOVOU -32(R9)(R8*1), X2
 5332     MOVOU -16(R9)(R8*1), X3
 5333     MOVQ  R8, R12
 5334     SHRQ  $0x05, R12
 5335     MOVQ  AX, R10
 5336     ANDL  $0x0000001f, R10
 5337     MOVQ  $0x00000040, R13
 5338     SUBQ  R10, R13
 5339     DECQ  R12
 5340     JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
 5341     LEAQ  -32(R9)(R13*1), R10
 5342     LEAQ  -32(AX)(R13*1), R14
 5343 
 5344 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
 5345     MOVOU (R10), X4
 5346     MOVOU 16(R10), X5
 5347     MOVOA X4, (R14)
 5348     MOVOA X5, 16(R14)
 5349     ADDQ  $0x20, R14
 5350     ADDQ  $0x20, R10
 5351     ADDQ  $0x20, R13
 5352     DECQ  R12
 5353     JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
 5354 
 5355 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
 5356     MOVOU -32(R9)(R13*1), X4
 5357     MOVOU -16(R9)(R13*1), X5
 5358     MOVOA X4, -32(AX)(R13*1)
 5359     MOVOA X5, -16(AX)(R13*1)
 5360     ADDQ  $0x20, R13
 5361     CMPQ  R8, R13
 5362     JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
 5363     MOVOU X0, (AX)
 5364     MOVOU X1, 16(AX)
 5365     MOVOU X2, -32(AX)(R8*1)
 5366     MOVOU X3, -16(AX)(R8*1)
 5367     MOVQ  BP, AX
 5368 
 5369 emit_literal_done_match_emit_encodeBetterBlockAsm:
 5370     ADDL R11, CX
 5371     ADDL $0x04, R11
 5372     MOVL CX, 12(SP)
 5373 
 5374     // emitCopy
 5375     CMPL DI, $0x00010000
 5376     JL   two_byte_offset_match_nolit_encodeBetterBlockAsm
 5377 
 5378 four_bytes_loop_back_match_nolit_encodeBetterBlockAsm:
 5379     CMPL R11, $0x40
 5380     JLE  four_bytes_remain_match_nolit_encodeBetterBlockAsm
 5381     MOVB $0xff, (AX)
 5382     MOVL DI, 1(AX)
 5383     LEAL -64(R11), R11
 5384     ADDQ $0x05, AX
 5385     CMPL R11, $0x04
 5386     JL   four_bytes_remain_match_nolit_encodeBetterBlockAsm
 5387 
 5388     // emitRepeat
 5389 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
 5390     MOVL R11, BP
 5391     LEAL -4(R11), R11
 5392     CMPL BP, $0x08
 5393     JLE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
 5394     CMPL BP, $0x0c
 5395     JGE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
 5396     CMPL DI, $0x00000800
 5397     JLT  repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
 5398 
 5399 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
 5400     CMPL R11, $0x00000104
 5401     JLT  repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
 5402     CMPL R11, $0x00010100
 5403     JLT  repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
 5404     CMPL R11, $0x0100ffff
 5405     JLT  repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
 5406     LEAL -16842747(R11), R11
 5407     MOVW $0x001d, (AX)
 5408     MOVW $0xfffb, 2(AX)
 5409     MOVB $0xff, 4(AX)
 5410     ADDQ $0x05, AX
 5411     JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
 5412 
 5413 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
 5414     LEAL -65536(R11), R11
 5415     MOVL R11, DI
 5416     MOVW $0x001d, (AX)
 5417     MOVW R11, 2(AX)
 5418     SARL $0x10, DI
 5419     MOVB DI, 4(AX)
 5420     ADDQ $0x05, AX
 5421     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5422 
 5423 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
 5424     LEAL -256(R11), R11
 5425     MOVW $0x0019, (AX)
 5426     MOVW R11, 2(AX)
 5427     ADDQ $0x04, AX
 5428     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5429 
 5430 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
 5431     LEAL -4(R11), R11
 5432     MOVW $0x0015, (AX)
 5433     MOVB R11, 2(AX)
 5434     ADDQ $0x03, AX
 5435     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5436 
 5437 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
 5438     SHLL $0x02, R11
 5439     ORL  $0x01, R11
 5440     MOVW R11, (AX)
 5441     ADDQ $0x02, AX
 5442     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5443 
 5444 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
 5445     XORQ BP, BP
 5446     LEAL 1(BP)(R11*4), R11
 5447     MOVB DI, 1(AX)
 5448     SARL $0x08, DI
 5449     SHLL $0x05, DI
 5450     ORL  DI, R11
 5451     MOVB R11, (AX)
 5452     ADDQ $0x02, AX
 5453     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5454     JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm
 5455 
 5456 four_bytes_remain_match_nolit_encodeBetterBlockAsm:
 5457     TESTL R11, R11
 5458     JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm
 5459     MOVB  $0x03, BL
 5460     LEAL  -4(BX)(R11*4), R11
 5461     MOVB  R11, (AX)
 5462     MOVL  DI, 1(AX)
 5463     ADDQ  $0x05, AX
 5464     JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm
 5465 
 5466 two_byte_offset_match_nolit_encodeBetterBlockAsm:
 5467     CMPL R11, $0x40
 5468     JLE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm
 5469     MOVB $0xee, (AX)
 5470     MOVW DI, 1(AX)
 5471     LEAL -60(R11), R11
 5472     ADDQ $0x03, AX
 5473 
 5474     // emitRepeat
 5475 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5476     MOVL R11, BP
 5477     LEAL -4(R11), R11
 5478     CMPL BP, $0x08
 5479     JLE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5480     CMPL BP, $0x0c
 5481     JGE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5482     CMPL DI, $0x00000800
 5483     JLT  repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5484 
 5485 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5486     CMPL R11, $0x00000104
 5487     JLT  repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5488     CMPL R11, $0x00010100
 5489     JLT  repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5490     CMPL R11, $0x0100ffff
 5491     JLT  repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5492     LEAL -16842747(R11), R11
 5493     MOVW $0x001d, (AX)
 5494     MOVW $0xfffb, 2(AX)
 5495     MOVB $0xff, 4(AX)
 5496     ADDQ $0x05, AX
 5497     JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
 5498 
 5499 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5500     LEAL -65536(R11), R11
 5501     MOVL R11, DI
 5502     MOVW $0x001d, (AX)
 5503     MOVW R11, 2(AX)
 5504     SARL $0x10, DI
 5505     MOVB DI, 4(AX)
 5506     ADDQ $0x05, AX
 5507     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5508 
 5509 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5510     LEAL -256(R11), R11
 5511     MOVW $0x0019, (AX)
 5512     MOVW R11, 2(AX)
 5513     ADDQ $0x04, AX
 5514     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5515 
 5516 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5517     LEAL -4(R11), R11
 5518     MOVW $0x0015, (AX)
 5519     MOVB R11, 2(AX)
 5520     ADDQ $0x03, AX
 5521     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5522 
 5523 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5524     SHLL $0x02, R11
 5525     ORL  $0x01, R11
 5526     MOVW R11, (AX)
 5527     ADDQ $0x02, AX
 5528     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5529 
 5530 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
 5531     XORQ BP, BP
 5532     LEAL 1(BP)(R11*4), R11
 5533     MOVB DI, 1(AX)
 5534     SARL $0x08, DI
 5535     SHLL $0x05, DI
 5536     ORL  DI, R11
 5537     MOVB R11, (AX)
 5538     ADDQ $0x02, AX
 5539     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5540     JMP two_byte_offset_match_nolit_encodeBetterBlockAsm
 5541 
 5542 two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
 5543     CMPL R11, $0x0c
 5544     JGE  emit_copy_three_match_nolit_encodeBetterBlockAsm
 5545     CMPL DI, $0x00000800
 5546     JGE  emit_copy_three_match_nolit_encodeBetterBlockAsm
 5547     MOVB $0x01, BL
 5548     LEAL -16(BX)(R11*4), R11
 5549     MOVB DI, 1(AX)
 5550     SHRL $0x08, DI
 5551     SHLL $0x05, DI
 5552     ORL  DI, R11
 5553     MOVB R11, (AX)
 5554     ADDQ $0x02, AX
 5555     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5556 
 5557 emit_copy_three_match_nolit_encodeBetterBlockAsm:
 5558     MOVB $0x02, BL
 5559     LEAL -4(BX)(R11*4), R11
 5560     MOVB R11, (AX)
 5561     MOVW DI, 1(AX)
 5562     ADDQ $0x03, AX
 5563     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5564 
 5565 match_is_repeat_encodeBetterBlockAsm:
 5566     MOVL 12(SP), BP
 5567     CMPL BP, SI
 5568     JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
 5569     MOVL SI, R8
 5570     MOVL SI, 12(SP)
 5571     LEAQ (DX)(BP*1), R9
 5572     SUBL BP, R8
 5573     LEAL -1(R8), BP
 5574     CMPL BP, $0x3c
 5575     JLT  one_byte_match_emit_repeat_encodeBetterBlockAsm
 5576     CMPL BP, $0x00000100
 5577     JLT  two_bytes_match_emit_repeat_encodeBetterBlockAsm
 5578     CMPL BP, $0x00010000
 5579     JLT  three_bytes_match_emit_repeat_encodeBetterBlockAsm
 5580     CMPL BP, $0x01000000
 5581     JLT  four_bytes_match_emit_repeat_encodeBetterBlockAsm
 5582     MOVB $0xfc, (AX)
 5583     MOVL BP, 1(AX)
 5584     ADDQ $0x05, AX
 5585     JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 5586 
 5587 four_bytes_match_emit_repeat_encodeBetterBlockAsm:
 5588     MOVL BP, R10
 5589     SHRL $0x10, R10
 5590     MOVB $0xf8, (AX)
 5591     MOVW BP, 1(AX)
 5592     MOVB R10, 3(AX)
 5593     ADDQ $0x04, AX
 5594     JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 5595 
 5596 three_bytes_match_emit_repeat_encodeBetterBlockAsm:
 5597     MOVB $0xf4, (AX)
 5598     MOVW BP, 1(AX)
 5599     ADDQ $0x03, AX
 5600     JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 5601 
 5602 two_bytes_match_emit_repeat_encodeBetterBlockAsm:
 5603     MOVB $0xf0, (AX)
 5604     MOVB BP, 1(AX)
 5605     ADDQ $0x02, AX
 5606     CMPL BP, $0x40
 5607     JL   memmove_match_emit_repeat_encodeBetterBlockAsm
 5608     JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 5609 
 5610 one_byte_match_emit_repeat_encodeBetterBlockAsm:
 5611     SHLB $0x02, BP
 5612     MOVB BP, (AX)
 5613     ADDQ $0x01, AX
 5614 
 5615 memmove_match_emit_repeat_encodeBetterBlockAsm:
 5616     LEAQ (AX)(R8*1), BP
 5617 
 5618     // genMemMoveShort
 5619     CMPQ R8, $0x03
 5620     JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_1or2
 5621     JE   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_3
 5622     CMPQ R8, $0x08
 5623     JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
 5624     CMPQ R8, $0x10
 5625     JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
 5626     CMPQ R8, $0x20
 5627     JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
 5628     JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
 5629 
 5630 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_1or2:
 5631     MOVB (R9), R10
 5632     MOVB -1(R9)(R8*1), R9
 5633     MOVB R10, (AX)
 5634     MOVB R9, -1(AX)(R8*1)
 5635     JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 5636 
 5637 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_3:
 5638     MOVW (R9), R10
 5639     MOVB 2(R9), R9
 5640     MOVW R10, (AX)
 5641     MOVB R9, 2(AX)
 5642     JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 5643 
 5644 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
 5645     MOVL (R9), R10
 5646     MOVL -4(R9)(R8*1), R9
 5647     MOVL R10, (AX)
 5648     MOVL R9, -4(AX)(R8*1)
 5649     JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 5650 
 5651 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
 5652     MOVQ (R9), R10
 5653     MOVQ -8(R9)(R8*1), R9
 5654     MOVQ R10, (AX)
 5655     MOVQ R9, -8(AX)(R8*1)
 5656     JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 5657 
 5658 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
 5659     MOVOU (R9), X0
 5660     MOVOU -16(R9)(R8*1), X1
 5661     MOVOU X0, (AX)
 5662     MOVOU X1, -16(AX)(R8*1)
 5663     JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 5664 
 5665 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
 5666     MOVOU (R9), X0
 5667     MOVOU 16(R9), X1
 5668     MOVOU -32(R9)(R8*1), X2
 5669     MOVOU -16(R9)(R8*1), X3
 5670     MOVOU X0, (AX)
 5671     MOVOU X1, 16(AX)
 5672     MOVOU X2, -32(AX)(R8*1)
 5673     MOVOU X3, -16(AX)(R8*1)
 5674 
 5675 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
 5676     MOVQ BP, AX
 5677     JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
 5678 
 5679 memmove_long_match_emit_repeat_encodeBetterBlockAsm:
 5680     LEAQ (AX)(R8*1), BP
 5681 
 5682     // genMemMoveLong
 5683     MOVOU (R9), X0
 5684     MOVOU 16(R9), X1
 5685     MOVOU -32(R9)(R8*1), X2
 5686     MOVOU -16(R9)(R8*1), X3
 5687     MOVQ  R8, R12
 5688     SHRQ  $0x05, R12
 5689     MOVQ  AX, R10
 5690     ANDL  $0x0000001f, R10
 5691     MOVQ  $0x00000040, R13
 5692     SUBQ  R10, R13
 5693     DECQ  R12
 5694     JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
 5695     LEAQ  -32(R9)(R13*1), R10
 5696     LEAQ  -32(AX)(R13*1), R14
 5697 
 5698 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
 5699     MOVOU (R10), X4
 5700     MOVOU 16(R10), X5
 5701     MOVOA X4, (R14)
 5702     MOVOA X5, 16(R14)
 5703     ADDQ  $0x20, R14
 5704     ADDQ  $0x20, R10
 5705     ADDQ  $0x20, R13
 5706     DECQ  R12
 5707     JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
 5708 
 5709 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
 5710     MOVOU -32(R9)(R13*1), X4
 5711     MOVOU -16(R9)(R13*1), X5
 5712     MOVOA X4, -32(AX)(R13*1)
 5713     MOVOA X5, -16(AX)(R13*1)
 5714     ADDQ  $0x20, R13
 5715     CMPQ  R8, R13
 5716     JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
 5717     MOVOU X0, (AX)
 5718     MOVOU X1, 16(AX)
 5719     MOVOU X2, -32(AX)(R8*1)
 5720     MOVOU X3, -16(AX)(R8*1)
 5721     MOVQ  BP, AX
 5722 
 5723 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
 5724     ADDL R11, CX
 5725     ADDL $0x04, R11
 5726     MOVL CX, 12(SP)
 5727 
 5728     // emitRepeat
 5729 emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
 5730     MOVL R11, BP
 5731     LEAL -4(R11), R11
 5732     CMPL BP, $0x08
 5733     JLE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm
 5734     CMPL BP, $0x0c
 5735     JGE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
 5736     CMPL DI, $0x00000800
 5737     JLT  repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
 5738 
 5739 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
 5740     CMPL R11, $0x00000104
 5741     JLT  repeat_three_match_nolit_repeat_encodeBetterBlockAsm
 5742     CMPL R11, $0x00010100
 5743     JLT  repeat_four_match_nolit_repeat_encodeBetterBlockAsm
 5744     CMPL R11, $0x0100ffff
 5745     JLT  repeat_five_match_nolit_repeat_encodeBetterBlockAsm
 5746     LEAL -16842747(R11), R11
 5747     MOVW $0x001d, (AX)
 5748     MOVW $0xfffb, 2(AX)
 5749     MOVB $0xff, 4(AX)
 5750     ADDQ $0x05, AX
 5751     JMP  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
 5752 
 5753 repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
 5754     LEAL -65536(R11), R11
 5755     MOVL R11, DI
 5756     MOVW $0x001d, (AX)
 5757     MOVW R11, 2(AX)
 5758     SARL $0x10, DI
 5759     MOVB DI, 4(AX)
 5760     ADDQ $0x05, AX
 5761     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5762 
 5763 repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
 5764     LEAL -256(R11), R11
 5765     MOVW $0x0019, (AX)
 5766     MOVW R11, 2(AX)
 5767     ADDQ $0x04, AX
 5768     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5769 
 5770 repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
 5771     LEAL -4(R11), R11
 5772     MOVW $0x0015, (AX)
 5773     MOVB R11, 2(AX)
 5774     ADDQ $0x03, AX
 5775     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5776 
 5777 repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
 5778     SHLL $0x02, R11
 5779     ORL  $0x01, R11
 5780     MOVW R11, (AX)
 5781     ADDQ $0x02, AX
 5782     JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 5783 
 5784 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
 5785     XORQ BP, BP
 5786     LEAL 1(BP)(R11*4), R11
 5787     MOVB DI, 1(AX)
 5788     SARL $0x08, DI
 5789     SHLL $0x05, DI
 5790     ORL  DI, R11
 5791     MOVB R11, (AX)
 5792     ADDQ $0x02, AX
 5793 
 5794 match_nolit_emitcopy_end_encodeBetterBlockAsm:
 5795     CMPL CX, 8(SP)
 5796     JGE  emit_remainder_encodeBetterBlockAsm
 5797     CMPQ AX, (SP)
 5798     JL   match_nolit_dst_ok_encodeBetterBlockAsm
 5799     MOVQ $0x00000000, ret+48(FP)
 5800     RET
 5801 
 5802 match_nolit_dst_ok_encodeBetterBlockAsm:
 5803     MOVQ  $0x00cf1bbcdcbfa563, BP
 5804     MOVQ  $0x9e3779b1, DI
 5805     INCL  SI
 5806     MOVQ  (DX)(SI*1), R8
 5807     MOVQ  R8, R9
 5808     MOVQ  R8, R10
 5809     SHRQ  $0x08, R10
 5810     LEAL  1(SI), R11
 5811     MOVQ  -2(DX)(CX*1), R8
 5812     SHLQ  $0x08, R9
 5813     IMULQ BP, R9
 5814     SHRQ  $0x30, R9
 5815     SHLQ  $0x20, R10
 5816     IMULQ DI, R10
 5817     SHRQ  $0x32, R10
 5818     MOVL  SI, 24(SP)(R9*4)
 5819     MOVL  R11, 262168(SP)(R10*4)
 5820     MOVQ  R8, R9
 5821     MOVQ  R8, R10
 5822     SHRQ  $0x08, R10
 5823     LEAL  -2(CX), R8
 5824     LEAL  -1(CX), SI
 5825     SHLQ  $0x08, R9
 5826     IMULQ BP, R9
 5827     SHRQ  $0x30, R9
 5828     SHLQ  $0x20, R10
 5829     IMULQ DI, R10
 5830     SHRQ  $0x32, R10
 5831     MOVL  R8, 24(SP)(R9*4)
 5832     MOVL  SI, 262168(SP)(R10*4)
 5833     JMP   search_loop_encodeBetterBlockAsm
 5834 
 5835 emit_remainder_encodeBetterBlockAsm:
 5836     MOVQ src_len+32(FP), CX
 5837     SUBL 12(SP), CX
 5838     LEAQ 5(AX)(CX*1), CX
 5839     CMPQ CX, (SP)
 5840     JL   emit_remainder_ok_encodeBetterBlockAsm
 5841     MOVQ $0x00000000, ret+48(FP)
 5842     RET
 5843 
 5844 emit_remainder_ok_encodeBetterBlockAsm:
 5845     MOVQ src_len+32(FP), CX
 5846     MOVL 12(SP), BX
 5847     CMPL BX, CX
 5848     JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm
 5849     MOVL CX, BP
 5850     MOVL CX, 12(SP)
 5851     LEAQ (DX)(BX*1), CX
 5852     SUBL BX, BP
 5853     LEAL -1(BP), DX
 5854     CMPL DX, $0x3c
 5855     JLT  one_byte_emit_remainder_encodeBetterBlockAsm
 5856     CMPL DX, $0x00000100
 5857     JLT  two_bytes_emit_remainder_encodeBetterBlockAsm
 5858     CMPL DX, $0x00010000
 5859     JLT  three_bytes_emit_remainder_encodeBetterBlockAsm
 5860     CMPL DX, $0x01000000
 5861     JLT  four_bytes_emit_remainder_encodeBetterBlockAsm
 5862     MOVB $0xfc, (AX)
 5863     MOVL DX, 1(AX)
 5864     ADDQ $0x05, AX
 5865     JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 5866 
 5867 four_bytes_emit_remainder_encodeBetterBlockAsm:
 5868     MOVL DX, BX
 5869     SHRL $0x10, BX
 5870     MOVB $0xf8, (AX)
 5871     MOVW DX, 1(AX)
 5872     MOVB BL, 3(AX)
 5873     ADDQ $0x04, AX
 5874     JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 5875 
 5876 three_bytes_emit_remainder_encodeBetterBlockAsm:
 5877     MOVB $0xf4, (AX)
 5878     MOVW DX, 1(AX)
 5879     ADDQ $0x03, AX
 5880     JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 5881 
 5882 two_bytes_emit_remainder_encodeBetterBlockAsm:
 5883     MOVB $0xf0, (AX)
 5884     MOVB DL, 1(AX)
 5885     ADDQ $0x02, AX
 5886     CMPL DX, $0x40
 5887     JL   memmove_emit_remainder_encodeBetterBlockAsm
 5888     JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 5889 
 5890 one_byte_emit_remainder_encodeBetterBlockAsm:
 5891     SHLB $0x02, DL
 5892     MOVB DL, (AX)
 5893     ADDQ $0x01, AX
 5894 
 5895 memmove_emit_remainder_encodeBetterBlockAsm:
 5896     LEAQ (AX)(BP*1), DX
 5897     MOVL BP, BX
 5898 
 5899     // genMemMoveShort
 5900     CMPQ BX, $0x03
 5901     JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2
 5902     JE   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3
 5903     CMPQ BX, $0x08
 5904     JB   emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
 5905     CMPQ BX, $0x10
 5906     JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
 5907     CMPQ BX, $0x20
 5908     JBE  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
 5909     JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
 5910 
 5911 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
 5912     MOVB (CX), BP
 5913     MOVB -1(CX)(BX*1), CL
 5914     MOVB BP, (AX)
 5915     MOVB CL, -1(AX)(BX*1)
 5916     JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 5917 
 5918 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
 5919     MOVW (CX), BP
 5920     MOVB 2(CX), CL
 5921     MOVW BP, (AX)
 5922     MOVB CL, 2(AX)
 5923     JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 5924 
 5925 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
 5926     MOVL (CX), BP
 5927     MOVL -4(CX)(BX*1), CX
 5928     MOVL BP, (AX)
 5929     MOVL CX, -4(AX)(BX*1)
 5930     JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 5931 
 5932 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
 5933     MOVQ (CX), BP
 5934     MOVQ -8(CX)(BX*1), CX
 5935     MOVQ BP, (AX)
 5936     MOVQ CX, -8(AX)(BX*1)
 5937     JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 5938 
 5939 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
 5940     MOVOU (CX), X0
 5941     MOVOU -16(CX)(BX*1), X1
 5942     MOVOU X0, (AX)
 5943     MOVOU X1, -16(AX)(BX*1)
 5944     JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 5945 
 5946 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
 5947     MOVOU (CX), X0
 5948     MOVOU 16(CX), X1
 5949     MOVOU -32(CX)(BX*1), X2
 5950     MOVOU -16(CX)(BX*1), X3
 5951     MOVOU X0, (AX)
 5952     MOVOU X1, 16(AX)
 5953     MOVOU X2, -32(AX)(BX*1)
 5954     MOVOU X3, -16(AX)(BX*1)
 5955 
 5956 memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
 5957     MOVQ DX, AX
 5958     JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm
 5959 
 5960 memmove_long_emit_remainder_encodeBetterBlockAsm:
 5961     LEAQ (AX)(BP*1), DX
 5962     MOVL BP, BX
 5963 
 5964     // genMemMoveLong
 5965     MOVOU (CX), X0
 5966     MOVOU 16(CX), X1
 5967     MOVOU -32(CX)(BX*1), X2
 5968     MOVOU -16(CX)(BX*1), X3
 5969     MOVQ  BX, SI
 5970     SHRQ  $0x05, SI
 5971     MOVQ  AX, BP
 5972     ANDL  $0x0000001f, BP
 5973     MOVQ  $0x00000040, DI
 5974     SUBQ  BP, DI
 5975     DECQ  SI
 5976     JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
 5977     LEAQ  -32(CX)(DI*1), BP
 5978     LEAQ  -32(AX)(DI*1), R8
 5979 
 5980 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
 5981     MOVOU (BP), X4
 5982     MOVOU 16(BP), X5
 5983     MOVOA X4, (R8)
 5984     MOVOA X5, 16(R8)
 5985     ADDQ  $0x20, R8
 5986     ADDQ  $0x20, BP
 5987     ADDQ  $0x20, DI
 5988     DECQ  SI
 5989     JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
 5990 
 5991 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
 5992     MOVOU -32(CX)(DI*1), X4
 5993     MOVOU -16(CX)(DI*1), X5
 5994     MOVOA X4, -32(AX)(DI*1)
 5995     MOVOA X5, -16(AX)(DI*1)
 5996     ADDQ  $0x20, DI
 5997     CMPQ  BX, DI
 5998     JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
 5999     MOVOU X0, (AX)
 6000     MOVOU X1, 16(AX)
 6001     MOVOU X2, -32(AX)(BX*1)
 6002     MOVOU X3, -16(AX)(BX*1)
 6003     MOVQ  DX, AX
 6004 
 6005 emit_literal_done_emit_remainder_encodeBetterBlockAsm:
 6006     MOVQ dst_base+0(FP), CX
 6007     SUBQ CX, AX
 6008     MOVQ AX, ret+48(FP)
 6009     RET
 6010 
 6011 // func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
 6012 // Requires: SSE2
 6013 TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56
 6014     MOVQ dst_base+0(FP), AX
 6015     MOVQ $0x00000a00, CX
 6016     LEAQ 24(SP), DX
 6017     PXOR X0, X0
 6018 
 6019 zero_loop_encodeBetterBlockAsm4MB:
 6020     MOVOU X0, (DX)
 6021     MOVOU X0, 16(DX)
 6022     MOVOU X0, 32(DX)
 6023     MOVOU X0, 48(DX)
 6024     MOVOU X0, 64(DX)
 6025     MOVOU X0, 80(DX)
 6026     MOVOU X0, 96(DX)
 6027     MOVOU X0, 112(DX)
 6028     ADDQ  $0x80, DX
 6029     DECQ  CX
 6030     JNZ   zero_loop_encodeBetterBlockAsm4MB
 6031     MOVL  $0x00000000, 12(SP)
 6032     MOVQ  src_len+32(FP), CX
 6033     LEAQ  -6(CX), DX
 6034     LEAQ  -8(CX), BP
 6035     MOVL  BP, 8(SP)
 6036     SHRQ  $0x05, CX
 6037     SUBL  CX, DX
 6038     LEAQ  (AX)(DX*1), DX
 6039     MOVQ  DX, (SP)
 6040     MOVL  $0x00000001, CX
 6041     MOVL  $0x00000000, 16(SP)
 6042     MOVQ  src_base+24(FP), DX
 6043 
 6044 search_loop_encodeBetterBlockAsm4MB:
 6045     MOVQ  (DX)(CX*1), SI
 6046     MOVL  CX, BP
 6047     SUBL  12(SP), BP
 6048     SHRL  $0x07, BP
 6049     LEAL  1(CX)(BP*1), BP
 6050     CMPL  BP, 8(SP)
 6051     JGE   emit_remainder_encodeBetterBlockAsm4MB
 6052     MOVL  BP, 20(SP)
 6053     MOVQ  $0x00cf1bbcdcbfa563, R8
 6054     MOVQ  $0x9e3779b1, BP
 6055     MOVQ  SI, R9
 6056     MOVQ  SI, R10
 6057     SHLQ  $0x08, R9
 6058     IMULQ R8, R9
 6059     SHRQ  $0x30, R9
 6060     SHLQ  $0x20, R10
 6061     IMULQ BP, R10
 6062     SHRQ  $0x32, R10
 6063     MOVL  24(SP)(R9*4), BP
 6064     MOVL  262168(SP)(R10*4), DI
 6065     MOVL  CX, 24(SP)(R9*4)
 6066     MOVL  CX, 262168(SP)(R10*4)
 6067     CMPL  (DX)(BP*1), SI
 6068     JEQ   candidate_match_encodeBetterBlockAsm4MB
 6069     CMPL  (DX)(DI*1), SI
 6070     JEQ   candidateS_match_encodeBetterBlockAsm4MB
 6071     MOVL  20(SP), CX
 6072     JMP   search_loop_encodeBetterBlockAsm4MB
 6073 
 6074 candidateS_match_encodeBetterBlockAsm4MB:
 6075     SHRQ  $0x08, SI
 6076     MOVQ  SI, R9
 6077     SHLQ  $0x08, R9
 6078     IMULQ R8, R9
 6079     SHRQ  $0x30, R9
 6080     MOVL  24(SP)(R9*4), BP
 6081     INCL  CX
 6082     MOVL  CX, 24(SP)(R9*4)
 6083     CMPL  (DX)(BP*1), SI
 6084     JEQ   candidate_match_encodeBetterBlockAsm4MB
 6085     DECL  CX
 6086     MOVL  DI, BP
 6087 
 6088 candidate_match_encodeBetterBlockAsm4MB:
 6089     MOVL  12(SP), SI
 6090     TESTL BP, BP
 6091     JZ    match_extend_back_end_encodeBetterBlockAsm4MB
 6092 
 6093 match_extend_back_loop_encodeBetterBlockAsm4MB:
 6094     CMPL CX, SI
 6095     JLE  match_extend_back_end_encodeBetterBlockAsm4MB
 6096     MOVB -1(DX)(BP*1), BL
 6097     MOVB -1(DX)(CX*1), DI
 6098     CMPB BL, DI
 6099     JNE  match_extend_back_end_encodeBetterBlockAsm4MB
 6100     LEAL -1(CX), CX
 6101     DECL BP
 6102     JZ   match_extend_back_end_encodeBetterBlockAsm4MB
 6103     JMP  match_extend_back_loop_encodeBetterBlockAsm4MB
 6104 
 6105 match_extend_back_end_encodeBetterBlockAsm4MB:
 6106     MOVL CX, SI
 6107     SUBL 12(SP), SI
 6108     LEAQ 4(AX)(SI*1), SI
 6109     CMPQ SI, (SP)
 6110     JL   match_dst_size_check_encodeBetterBlockAsm4MB
 6111     MOVQ $0x00000000, ret+48(FP)
 6112     RET
 6113 
 6114 match_dst_size_check_encodeBetterBlockAsm4MB:
 6115     MOVL CX, SI
 6116     ADDL $0x04, CX
 6117     ADDL $0x04, BP
 6118     MOVQ src_len+32(FP), DI
 6119     SUBL CX, DI
 6120     LEAQ (DX)(CX*1), R8
 6121     LEAQ (DX)(BP*1), R9
 6122 
 6123     // matchLen
 6124     XORL R11, R11
 6125     CMPL DI, $0x08
 6126     JL   matchlen_single_match_nolit_encodeBetterBlockAsm4MB
 6127 
 6128 matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB:
 6129     MOVQ  (R8)(R11*1), R10
 6130     XORQ  (R9)(R11*1), R10
 6131     TESTQ R10, R10
 6132     JZ    matchlen_loop_match_nolit_encodeBetterBlockAsm4MB
 6133     BSFQ  R10, R10
 6134     SARQ  $0x03, R10
 6135     LEAL  (R11)(R10*1), R11
 6136     JMP   match_nolit_end_encodeBetterBlockAsm4MB
 6137 
 6138 matchlen_loop_match_nolit_encodeBetterBlockAsm4MB:
 6139     LEAL -8(DI), DI
 6140     LEAL 8(R11), R11
 6141     CMPL DI, $0x08
 6142     JGE  matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB
 6143 
 6144 matchlen_single_match_nolit_encodeBetterBlockAsm4MB:
 6145     TESTL DI, DI
 6146     JZ    match_nolit_end_encodeBetterBlockAsm4MB
 6147 
 6148 matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB:
 6149     MOVB (R8)(R11*1), R10
 6150     CMPB (R9)(R11*1), R10
 6151     JNE  match_nolit_end_encodeBetterBlockAsm4MB
 6152     LEAL 1(R11), R11
 6153     DECL DI
 6154     JNZ  matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB
 6155 
 6156 match_nolit_end_encodeBetterBlockAsm4MB:
 6157     MOVL CX, DI
 6158     SUBL BP, DI
 6159 
 6160     // Check if repeat
 6161     CMPL 16(SP), DI
 6162     JEQ  match_is_repeat_encodeBetterBlockAsm4MB
 6163     CMPL R11, $0x01
 6164     JG   match_length_ok_encodeBetterBlockAsm4MB
 6165     CMPL DI, $0x0000ffff
 6166     JLE  match_length_ok_encodeBetterBlockAsm4MB
 6167     MOVL 20(SP), CX
 6168     INCL CX
 6169     JMP  search_loop_encodeBetterBlockAsm4MB
 6170 
 6171 match_length_ok_encodeBetterBlockAsm4MB:
 6172     MOVL DI, 16(SP)
 6173     MOVL 12(SP), BP
 6174     CMPL BP, SI