Browse Source

Merge commit '1e6c13a0e470b42ddcbab8a0dd10d789b3839795' as 'vendor/github.com/dchest/siphash'

Yawning Angel 3 years ago
parent
commit
d185255157

+ 9 - 0
vendor/github.com/dchest/siphash/.travis.yml

@@ -0,0 +1,9 @@
+language: go
+
+go:
+  - 1.1
+  - 1.2
+  - 1.3
+  - 1.4
+  - 1.5
+  - tip

+ 72 - 0
vendor/github.com/dchest/siphash/README.md

@@ -0,0 +1,72 @@
+SipHash (Go)
+============
+
+[![Build Status](https://travis-ci.org/dchest/siphash.svg)](https://travis-ci.org/dchest/siphash)
+
+Go implementation of SipHash-2-4, a fast short-input PRF created by
+Jean-Philippe Aumasson and Daniel J. Bernstein (http://131002.net/siphash/).
+
+
+## Installation
+
+    $ go get github.com/dchest/siphash
+
+
+Supported Go 1.1 and later.
+
+## Usage
+
+    import "github.com/dchest/siphash"
+
+There are two ways to use this package.
+The slower one is to use the standard hash.Hash64 interface:
+
+    h := siphash.New(key)
+    h.Write([]byte("Hello"))
+    sum := h.Sum(nil) // returns 8-byte []byte
+
+or
+
+    sum64 := h.Sum64() // returns uint64
+
+The faster one is to use Hash() function, which takes two uint64 parts of
+16-byte key and a byte slice, and returns uint64 hash:
+
+    sum64 := siphash.Hash(key0, key1, []byte("Hello"))
+
+The keys and output are little-endian.
+
+
+## Functions
+
+### func Hash(k0, k1 uint64, p []byte) uint64
+
+Hash returns the 64-bit SipHash-2-4 of the given byte slice with two
+64-bit parts of 128-bit key: k0 and k1.
+
+### func Hash128(k0, k1 uint64, p []byte) (uint64, uint64)
+
+Hash128 returns the 128-bit SipHash-2-4 of the given byte slice with two
+64-bit parts of 128-bit key: k0 and k1.
+
+Note that 128-bit SipHash is considered experimental by SipHash authors at this time.
+
+### func New(key []byte) hash.Hash64
+
+New returns a new hash.Hash64 computing SipHash-2-4 with 16-byte key.
+
+### func New128(key []byte) hash.Hash
+
+New128 returns a new hash.Hash computing SipHash-2-4 with 16-byte key and 16-byte output.
+
+Note that 16-byte output is considered experimental by SipHash authors at this time.
+
+
+## Public domain dedication
+
+Written by Dmitry Chestnykh and Damian Gryski.
+
+To the extent possible under law, the authors have dedicated all copyright
+and related and neighboring rights to this software to the public domain
+worldwide. This software is distributed without any warranty.
+http://creativecommons.org/publicdomain/zero/1.0/

+ 148 - 0
vendor/github.com/dchest/siphash/blocks.go

@@ -0,0 +1,148 @@
+// +build !arm,!amd64 appengine gccgo
+
+package siphash
+
+func once(d *digest) {
+	blocks(d, d.x[:])
+}
+
+func finalize(d *digest) uint64 {
+	d0 := *d
+	once(&d0)
+
+	v0, v1, v2, v3 := d0.v0, d0.v1, d0.v2, d0.v3
+	v2 ^= 0xff
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 3.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 4.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	return v0 ^ v1 ^ v2 ^ v3
+}
+
+func blocks(d *digest, p []uint8) {
+	v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3
+
+	for len(p) >= BlockSize {
+		m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
+			uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+
+		v3 ^= m
+
+		// Round 1.
+		v0 += v1
+		v1 = v1<<13 | v1>>(64-13)
+		v1 ^= v0
+		v0 = v0<<32 | v0>>(64-32)
+
+		v2 += v3
+		v3 = v3<<16 | v3>>(64-16)
+		v3 ^= v2
+
+		v0 += v3
+		v3 = v3<<21 | v3>>(64-21)
+		v3 ^= v0
+
+		v2 += v1
+		v1 = v1<<17 | v1>>(64-17)
+		v1 ^= v2
+		v2 = v2<<32 | v2>>(64-32)
+
+		// Round 2.
+		v0 += v1
+		v1 = v1<<13 | v1>>(64-13)
+		v1 ^= v0
+		v0 = v0<<32 | v0>>(64-32)
+
+		v2 += v3
+		v3 = v3<<16 | v3>>(64-16)
+		v3 ^= v2
+
+		v0 += v3
+		v3 = v3<<21 | v3>>(64-21)
+		v3 ^= v0
+
+		v2 += v1
+		v1 = v1<<17 | v1>>(64-17)
+		v1 ^= v2
+		v2 = v2<<32 | v2>>(64-32)
+
+		v0 ^= m
+
+		p = p[BlockSize:]
+	}
+
+	d.v0, d.v1, d.v2, d.v3 = v0, v1, v2, v3
+}

+ 87 - 0
vendor/github.com/dchest/siphash/blocks_amd64.s

@@ -0,0 +1,87 @@
+// +build amd64,!appengine,!gccgo
+	
+#define ROUND(v0, v1, v2, v3) \
+	ADDQ v1, v0; \
+	RORQ $51, v1; \
+	ADDQ v3, v2; \
+	XORQ v0, v1; \
+	RORQ $48, v3; \
+	RORQ $32, v0; \
+	XORQ v2, v3; \
+	ADDQ v1, v2; \
+	ADDQ v3, v0; \
+	RORQ $43, v3; \
+	RORQ $47, v1; \
+	XORQ v0, v3; \
+	XORQ v2, v1; \
+	RORQ $32, v2
+
+// blocks(d *digest, data []uint8)
+TEXT ·blocks(SB),4,$0-32
+	MOVQ d+0(FP), BX
+	MOVQ 0(BX), R9		// R9 = v0
+	MOVQ 8(BX), R10		// R10 = v1
+	MOVQ 16(BX), R11	// R11 = v2
+	MOVQ 24(BX), R12	// R12 = v3
+	MOVQ data+8(FP), DI	// DI = *uint64
+	MOVQ data_len+16(FP), SI// SI = nblocks
+	XORL DX, DX		// DX = index (0)
+	SHRQ $3, SI 		// SI /= 8
+body:
+	CMPQ DX, SI
+	JGE  end
+	MOVQ 0(DI)(DX*8), CX	// CX = m
+	XORQ CX, R12
+	ROUND(R9, R10, R11, R12)
+	ROUND(R9, R10, R11, R12)
+	XORQ CX, R9
+	ADDQ $1, DX
+	JMP  body
+end:
+	MOVQ R9, 0(BX)
+	MOVQ R10, 8(BX)
+	MOVQ R11, 16(BX)
+	MOVQ R12, 24(BX)
+	RET
+
+// once(d *digest)
+TEXT ·once(SB),4,$0-8
+	MOVQ d+0(FP), BX
+	MOVQ 0(BX), R9		// R9 = v0
+	MOVQ 8(BX), R10		// R10 = v1
+	MOVQ 16(BX), R11	// R11 = v2
+	MOVQ 24(BX), R12	// R12 = v3
+	MOVQ 48(BX), CX		// CX = d.x[:]
+	XORQ CX, R12
+	ROUND(R9, R10, R11, R12)
+	ROUND(R9, R10, R11, R12)
+	XORQ CX, R9
+	MOVQ R9, 0(BX)
+	MOVQ R10, 8(BX)
+	MOVQ R11, 16(BX)
+	MOVQ R12, 24(BX)
+	RET
+
+// finalize(d *digest) uint64
+TEXT ·finalize(SB),4,$0-16
+	MOVQ d+0(FP), BX
+	MOVQ 0(BX), R9		// R9 = v0
+	MOVQ 8(BX), R10		// R10 = v1
+	MOVQ 16(BX), R11	// R11 = v2
+	MOVQ 24(BX), R12	// R12 = v3
+	MOVQ 48(BX), CX		// CX = d.x[:]
+	XORQ CX, R12
+	ROUND(R9, R10, R11, R12)
+	ROUND(R9, R10, R11, R12)
+	XORQ CX, R9
+	NOTB R11
+	ROUND(R9, R10, R11, R12)
+	ROUND(R9, R10, R11, R12)
+	ROUND(R9, R10, R11, R12)
+	ROUND(R9, R10, R11, R12)
+	XORQ R12, R11
+	XORQ R10, R9
+	XORQ R11, R9
+	MOVQ R9, ret+8(FP)
+	RET
+

+ 144 - 0
vendor/github.com/dchest/siphash/blocks_arm.s

@@ -0,0 +1,144 @@
+#include "textflag.h"
+#define R10 g
+#define ROUND()\
+	ADD.S	R2,R0,R0;\
+	ADC	R3,R1,R1;\
+	EOR	R2<<13,R0,R8;\
+	EOR	R3>>19,R8,R8;\
+	EOR	R2>>19,R1,R11;\
+	EOR	R3<<13,R11,R11;\
+	ADD.S	R6,R4,R4;\
+	ADC	R7,R5,R5;\
+	EOR	R6<<16,R4,R2;\
+	EOR	R7>>16,R2,R2;\
+	EOR	R6>>16,R5,R3;\
+	EOR	R7<<16,R3,R3;\
+	ADD.S	R2,R1,R1;\
+	ADC	R3,R0,R0;\
+	EOR	R2<<21,R1,R6;\
+	EOR	R3>>11,R6,R6;\
+	EOR	R2>>11,R0,R7;\
+	EOR	R3<<21,R7,R7;\
+	ADD.S	R8,R4,R4;\
+	ADC	R11,R5,R5;\
+	EOR	R8<<17,R4,R2;\
+	EOR	R11>>15,R2,R2;\
+	EOR	R8>>15,R5,R3;\
+	EOR	R11<<17,R3,R3;\
+	ADD.S	R2,R1,R1;\
+	ADC	R3,R0,R0;\
+	EOR	R2<<13,R1,R8;\
+	EOR	R3>>19,R8,R8;\
+	EOR	R2>>19,R0,R11;\
+	EOR	R3<<13,R11,R11;\
+	ADD.S	R6,R5,R5;\
+	ADC	R7,R4,R4;\
+	EOR	R6<<16,R5,R2;\
+	EOR	R7>>16,R2,R2;\
+	EOR	R6>>16,R4,R3;\
+	EOR	R7<<16,R3,R3;\
+	ADD.S	R2,R0,R0;\
+	ADC	R3,R1,R1;\
+	EOR	R2<<21,R0,R6;\
+	EOR	R3>>11,R6,R6;\
+	EOR	R2>>11,R1,R7;\
+	EOR	R3<<21,R7,R7;\
+	ADD.S	R8,R5,R5;\
+	ADC	R11,R4,R4;\
+	EOR	R8<<17,R5,R2;\
+	EOR	R11>>15,R2,R2;\
+	EOR	R8>>15,R4,R3;\
+	EOR	R11<<17,R3,R3;\
+
+// once(d *digest)
+TEXT ·once(SB),NOSPLIT,$4-4
+	MOVW	d+0(FP),R8
+	MOVM.IA	(R8),[R0,R1,R2,R3,R4,R5,R6,R7]
+	MOVW	48(R8),R12
+	MOVW	52(R8),R14
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	MOVW	d+0(FP),R8
+	MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
+	RET
+
+// finalize(d *digest) uint64
+TEXT ·finalize(SB),NOSPLIT,$4-12
+	MOVW	d+0(FP),R8
+	MOVM.IA	(R8),[R0,R1,R2,R3,R4,R5,R6,R7]
+	MOVW	48(R8),R12
+	MOVW	52(R8),R14
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	EOR	$255,R4
+	ROUND()
+	ROUND()
+	EOR	R2,R0,R0
+	EOR	R3,R1,R1
+	EOR	R6,R4,R4
+	EOR	R7,R5,R5
+	EOR	R4,R0,R0
+	EOR	R5,R1,R1
+	MOVW	R0,ret_lo+4(FP)
+	MOVW	R1,ret_hi+8(FP)
+	RET
+
+// blocks(d *digest, data []uint8)
+TEXT ·blocks(SB),NOSPLIT,$8-16
+	MOVW	R10,sav-8(SP)
+	MOVW	d+0(FP),R8
+	MOVM.IA	(R8),[R0,R1,R2,R3,R4,R5,R6,R7]
+	MOVW	p+4(FP),R10
+	MOVW	p_len+8(FP),R11
+	ADD	R10,R11,R11
+	MOVW	R11,endp-4(SP)
+	AND.S	$3,R10,R8
+	BNE	blocksunaligned
+blocksloop:
+	MOVM.IA.W (R10),[R12,R14]
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	MOVW	endp-4(SP),R11
+	CMP	R11,R10
+	BLO	blocksloop
+	MOVW	d+0(FP),R8
+	MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
+	MOVW	sav-8(SP),R10
+	RET
+blocksunaligned:
+	MOVB	(R10),R12
+	MOVB	1(R10),R11
+	ORR	R11<<8,R12,R12
+	MOVB	2(R10),R11
+	ORR	R11<<16,R12,R12
+	MOVB	3(R10),R11
+	ORR	R11<<24,R12,R12
+	MOVB	4(R10),R14
+	MOVB	5(R10),R11
+	ORR	R11<<8,R14,R14
+	MOVB	6(R10),R11
+	ORR	R11<<16,R14,R14
+	MOVB	7(R10),R11
+	ORR	R11<<24,R14,R14
+	ADD	$8,R10,R10
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	MOVW	endp-4(SP),R11
+	CMP	R11,R10
+	BLO	blocksunaligned
+	MOVW	d+0(FP),R8
+	MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
+	MOVW	sav-8(SP),R10
+	RET

+ 216 - 0
vendor/github.com/dchest/siphash/hash.go

@@ -0,0 +1,216 @@
+// +build !arm,!amd64 appengine gccgo
+
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+package siphash
+
+// Hash returns the 64-bit SipHash-2-4 of the given byte slice with two 64-bit
+// parts of 128-bit key: k0 and k1.
+func Hash(k0, k1 uint64, p []byte) uint64 {
+	// Initialization.
+	v0 := k0 ^ 0x736f6d6570736575
+	v1 := k1 ^ 0x646f72616e646f6d
+	v2 := k0 ^ 0x6c7967656e657261
+	v3 := k1 ^ 0x7465646279746573
+	t := uint64(len(p)) << 56
+
+	// Compression.
+	for len(p) >= BlockSize {
+		m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
+			uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+		v3 ^= m
+
+		// Round 1.
+		v0 += v1
+		v1 = v1<<13 | v1>>(64-13)
+		v1 ^= v0
+		v0 = v0<<32 | v0>>(64-32)
+
+		v2 += v3
+		v3 = v3<<16 | v3>>(64-16)
+		v3 ^= v2
+
+		v0 += v3
+		v3 = v3<<21 | v3>>(64-21)
+		v3 ^= v0
+
+		v2 += v1
+		v1 = v1<<17 | v1>>(64-17)
+		v1 ^= v2
+		v2 = v2<<32 | v2>>(64-32)
+
+		// Round 2.
+		v0 += v1
+		v1 = v1<<13 | v1>>(64-13)
+		v1 ^= v0
+		v0 = v0<<32 | v0>>(64-32)
+
+		v2 += v3
+		v3 = v3<<16 | v3>>(64-16)
+		v3 ^= v2
+
+		v0 += v3
+		v3 = v3<<21 | v3>>(64-21)
+		v3 ^= v0
+
+		v2 += v1
+		v1 = v1<<17 | v1>>(64-17)
+		v1 ^= v2
+		v2 = v2<<32 | v2>>(64-32)
+
+		v0 ^= m
+		p = p[BlockSize:]
+	}
+
+	// Compress last block.
+	switch len(p) {
+	case 7:
+		t |= uint64(p[6]) << 48
+		fallthrough
+	case 6:
+		t |= uint64(p[5]) << 40
+		fallthrough
+	case 5:
+		t |= uint64(p[4]) << 32
+		fallthrough
+	case 4:
+		t |= uint64(p[3]) << 24
+		fallthrough
+	case 3:
+		t |= uint64(p[2]) << 16
+		fallthrough
+	case 2:
+		t |= uint64(p[1]) << 8
+		fallthrough
+	case 1:
+		t |= uint64(p[0])
+	}
+
+	v3 ^= t
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	v0 ^= t
+
+	// Finalization.
+	v2 ^= 0xff
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 3.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 4.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	return v0 ^ v1 ^ v2 ^ v3
+}

+ 302 - 0
vendor/github.com/dchest/siphash/hash128.go

@@ -0,0 +1,302 @@
+// +build !arm,!amd64 appengine gccgo
+// Written in 2012 by Dmitry Chestnykh.
+// Modifications 2014 for 128-bit hash function by Damian Gryski.
+//
+// To the extent possible under law, the authors have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+package siphash
+
+// Hash returns the 128-bit SipHash-2-4 of the given byte slice with two 64-bit
+// parts of 128-bit key: k0 and k1.
+//
+// Note that 128-bit SipHash is considered experimental by SipHash authors at this time.
+func Hash128(k0, k1 uint64, p []byte) (uint64, uint64) {
+	// Initialization.
+	v0 := k0 ^ 0x736f6d6570736575
+	v1 := k1 ^ 0x646f72616e646f6d
+	v2 := k0 ^ 0x6c7967656e657261
+	v3 := k1 ^ 0x7465646279746573
+	t := uint64(len(p)) << 56
+
+	v1 ^= 0xee
+
+	// Compression.
+	for len(p) >= BlockSize {
+		m := uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 |
+			uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+		v3 ^= m
+
+		// Round 1.
+		v0 += v1
+		v1 = v1<<13 | v1>>(64-13)
+		v1 ^= v0
+		v0 = v0<<32 | v0>>(64-32)
+
+		v2 += v3
+		v3 = v3<<16 | v3>>(64-16)
+		v3 ^= v2
+
+		v0 += v3
+		v3 = v3<<21 | v3>>(64-21)
+		v3 ^= v0
+
+		v2 += v1
+		v1 = v1<<17 | v1>>(64-17)
+		v1 ^= v2
+		v2 = v2<<32 | v2>>(64-32)
+
+		// Round 2.
+		v0 += v1
+		v1 = v1<<13 | v1>>(64-13)
+		v1 ^= v0
+		v0 = v0<<32 | v0>>(64-32)
+
+		v2 += v3
+		v3 = v3<<16 | v3>>(64-16)
+		v3 ^= v2
+
+		v0 += v3
+		v3 = v3<<21 | v3>>(64-21)
+		v3 ^= v0
+
+		v2 += v1
+		v1 = v1<<17 | v1>>(64-17)
+		v1 ^= v2
+		v2 = v2<<32 | v2>>(64-32)
+
+		v0 ^= m
+		p = p[BlockSize:]
+	}
+
+	// Compress last block.
+	switch len(p) {
+	case 7:
+		t |= uint64(p[6]) << 48
+		fallthrough
+	case 6:
+		t |= uint64(p[5]) << 40
+		fallthrough
+	case 5:
+		t |= uint64(p[4]) << 32
+		fallthrough
+	case 4:
+		t |= uint64(p[3]) << 24
+		fallthrough
+	case 3:
+		t |= uint64(p[2]) << 16
+		fallthrough
+	case 2:
+		t |= uint64(p[1]) << 8
+		fallthrough
+	case 1:
+		t |= uint64(p[0])
+	}
+
+	v3 ^= t
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	v0 ^= t
+
+	// Finalization.
+	v2 ^= 0xee
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 3.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 4.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	r0 := v0 ^ v1 ^ v2 ^ v3
+
+	v1 ^= 0xdd
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 3.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 4.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	r1 := v0 ^ v1 ^ v2 ^ v3
+
+	return r0, r1
+}

+ 292 - 0
vendor/github.com/dchest/siphash/hash128_amd64.s

@@ -0,0 +1,292 @@
+// +build amd64,!appengine,!gccgo
+
+// This is a translation of the gcc output of FloodyBerry's pure-C public
+// domain siphash implementation at https://github.com/floodyberry/siphash
+
+// This assembly code has been modified from the 64-bit output to the experiment 128-bit output.
+
+// SI = v0
+// AX = v1
+// CX = v2
+// DX = v3
+
+// func Hash128(k0, k1 uint64, b []byte) (r0 uint64, r1 uint64)
+TEXT	·Hash128(SB),4,$0-56
+	MOVQ	k0+0(FP),CX
+	MOVQ	$0x736F6D6570736575,R9
+	MOVQ	k1+8(FP),DI
+	MOVQ	$0x6C7967656E657261,BX
+	MOVQ	$0x646F72616E646F6D,AX
+	MOVQ	b_len+24(FP),DX
+	XORQ	$0xEE,AX
+	MOVQ	DX,R11
+	MOVQ	DX,R10
+	XORQ	CX,R9
+	XORQ	CX,BX
+	MOVQ	$0x7465646279746573,CX
+	XORQ	DI,AX
+	XORQ	DI,CX
+	SHLQ	$0x38,R11
+	XORQ	DI,DI
+	MOVQ	b_base+16(FP),SI
+	ANDQ	$0xFFFFFFFFFFFFFFF8,R10
+	JE	afterLoop
+	XCHGQ	AX,AX
+loopBody:
+	MOVQ	0(SI)(DI*1),R8
+	ADDQ	AX,R9
+	RORQ	$0x33,AX
+	XORQ	R9,AX
+	RORQ	$0x20,R9
+	ADDQ	$0x8,DI
+	XORQ	R8,CX
+	ADDQ	CX,BX
+	RORQ	$0x30,CX
+	XORQ	BX,CX
+	ADDQ	AX,BX
+	RORQ	$0x2F,AX
+	ADDQ	CX,R9
+	RORQ	$0x2B,CX
+	XORQ	BX,AX
+	XORQ	R9,CX
+	RORQ	$0x20,BX
+	ADDQ	AX,R9
+	ADDQ	CX,BX
+	RORQ	$0x33,AX
+	RORQ	$0x30,CX
+	XORQ	R9,AX
+	XORQ	BX,CX
+	RORQ	$0x20,R9
+	ADDQ	AX,BX
+	ADDQ	CX,R9
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,CX
+	XORQ	BX,AX
+	RORQ	$0x20,BX
+	XORQ	R9,CX
+	XORQ	R8,R9
+	CMPQ	R10,DI
+	JA	loopBody
+afterLoop:
+	SUBQ	R10,DX
+
+	CMPQ	DX,$0x7
+	JA	afterSwitch
+
+	// no support for jump tables
+
+	CMPQ	DX,$0x7
+	JE	sw7
+
+	CMPQ	DX,$0x6
+	JE	sw6
+
+	CMPQ	DX,$0x5
+	JE	sw5
+
+	CMPQ	DX,$0x4
+	JE	sw4
+
+	CMPQ	DX,$0x3
+	JE	sw3
+
+	CMPQ	DX,$0x2
+	JE	sw2
+
+	CMPQ	DX,$0x1
+	JE	sw1
+
+	JMP	afterSwitch
+
+sw7:	MOVBQZX	6(SI)(DI*1),DX
+	SHLQ	$0x30,DX
+	ORQ	DX,R11
+sw6:	MOVBQZX	0x5(SI)(DI*1),DX
+	SHLQ	$0x28,DX
+	ORQ	DX,R11
+sw5:	MOVBQZX	0x4(SI)(DI*1),DX
+	SHLQ	$0x20,DX
+	ORQ	DX,R11
+sw4:	MOVBQZX	0x3(SI)(DI*1),DX
+	SHLQ	$0x18,DX
+	ORQ	DX,R11
+sw3:	MOVBQZX	0x2(SI)(DI*1),DX
+	SHLQ	$0x10,DX
+	ORQ	DX,R11
+sw2:	MOVBQZX	0x1(SI)(DI*1),DX
+	SHLQ	$0x8,DX
+	ORQ	DX,R11
+sw1:	MOVBQZX	0(SI)(DI*1),DX
+	ORQ	DX,R11
+afterSwitch:
+	LEAQ	(AX)(R9*1),SI
+	XORQ	R11,CX
+	RORQ	$0x33,AX
+	ADDQ	CX,BX
+	MOVQ	CX,DX
+	XORQ	SI,AX
+	RORQ	$0x30,DX
+	RORQ	$0x20,SI
+	LEAQ	0(BX)(AX*1),CX
+	XORQ	BX,DX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	XORQ	SI,AX
+	RORQ	$0x30,DX
+	RORQ	$0x20,SI
+	XORQ	CX,DX
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	XORQ	CX,AX
+	RORQ	$0x2B,DX
+	RORQ	$0x20,CX
+	XORQ	SI,DX
+	XORQ	R11,SI
+	XORB	$0xEE,CL
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	XORQ	CX,DX
+	RORQ	$0x20,SI
+	ADDQ	AX,CX
+	ADDQ	DX,SI
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	ADDQ	DX,CX
+	RORQ	$0x33,AX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	RORQ	$0x20,SI
+	XORQ	CX,DX
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	ADDQ	DX,CX
+	RORQ	$0x33,AX
+	RORQ	$0x30,DX
+	XORQ	CX,DX
+	XORQ	SI,AX
+	RORQ	$0x20,SI
+	ADDQ	DX,SI
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	XORQ	CX,AX
+	RORQ	$0x2B,DX
+	RORQ	$0x20,CX
+	XORQ	SI,DX
+
+	// gcc optimized the tail end of this function differently.  However,
+	// we need to preserve out registers to carry out the second stage of
+	// the finalization.  This is a duplicate of an earlier finalization
+	// round.
+
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	XORQ	CX,DX
+	RORQ	$0x20,SI
+	ADDQ	AX,CX
+	ADDQ	DX,SI
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+
+	// Stuff the result into BX instead of AX as gcc had done
+
+	MOVQ	SI,BX
+	XORQ	AX,BX
+	XORQ	DX,BX
+	XORQ	CX,BX
+	MOVQ	BX,r0+40(FP)
+
+	// Start the second finalization round
+
+	XORB	$0xDD,AL
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	XORQ	CX,DX
+	RORQ	$0x20,SI
+	ADDQ	AX,CX
+	ADDQ	DX,SI
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	ADDQ	DX,CX
+	RORQ	$0x33,AX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	RORQ	$0x20,SI
+	XORQ	CX,DX
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	ADDQ	DX,CX
+	RORQ	$0x33,AX
+	RORQ	$0x30,DX
+	XORQ	CX,DX
+	XORQ	SI,AX
+	RORQ	$0x20,SI
+	ADDQ	DX,SI
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	XORQ	CX,AX
+	RORQ	$0x2B,DX
+	RORQ	$0x20,CX
+	XORQ	SI,DX
+
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	XORQ	CX,DX
+	RORQ	$0x20,SI
+	ADDQ	AX,CX
+	ADDQ	DX,SI
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+
+	MOVQ	SI,BX
+	XORQ	AX,BX
+	XORQ	DX,BX
+	XORQ	CX,BX
+	MOVQ	BX,r1+48(FP)
+
+	RET

+ 169 - 0
vendor/github.com/dchest/siphash/hash128_arm.s

@@ -0,0 +1,169 @@
+#include "textflag.h"
+#define R10 g
+#define ROUND()\
+	ADD.S	R2,R0,R0;\
+	ADC	R3,R1,R1;\
+	EOR	R2<<13,R0,R8;\
+	EOR	R3>>19,R8,R8;\
+	EOR	R2>>19,R1,R11;\
+	EOR	R3<<13,R11,R11;\
+	ADD.S	R6,R4,R4;\
+	ADC	R7,R5,R5;\
+	EOR	R6<<16,R4,R2;\
+	EOR	R7>>16,R2,R2;\
+	EOR	R6>>16,R5,R3;\
+	EOR	R7<<16,R3,R3;\
+	ADD.S	R2,R1,R1;\
+	ADC	R3,R0,R0;\
+	EOR	R2<<21,R1,R6;\
+	EOR	R3>>11,R6,R6;\
+	EOR	R2>>11,R0,R7;\
+	EOR	R3<<21,R7,R7;\
+	ADD.S	R8,R4,R4;\
+	ADC	R11,R5,R5;\
+	EOR	R8<<17,R4,R2;\
+	EOR	R11>>15,R2,R2;\
+	EOR	R8>>15,R5,R3;\
+	EOR	R11<<17,R3,R3;\
+	ADD.S	R2,R1,R1;\
+	ADC	R3,R0,R0;\
+	EOR	R2<<13,R1,R8;\
+	EOR	R3>>19,R8,R8;\
+	EOR	R2>>19,R0,R11;\
+	EOR	R3<<13,R11,R11;\
+	ADD.S	R6,R5,R5;\
+	ADC	R7,R4,R4;\
+	EOR	R6<<16,R5,R2;\
+	EOR	R7>>16,R2,R2;\
+	EOR	R6>>16,R4,R3;\
+	EOR	R7<<16,R3,R3;\
+	ADD.S	R2,R0,R0;\
+	ADC	R3,R1,R1;\
+	EOR	R2<<21,R0,R6;\
+	EOR	R3>>11,R6,R6;\
+	EOR	R2>>11,R1,R7;\
+	EOR	R3<<21,R7,R7;\
+	ADD.S	R8,R5,R5;\
+	ADC	R11,R4,R4;\
+	EOR	R8<<17,R5,R2;\
+	EOR	R11>>15,R2,R2;\
+	EOR	R8>>15,R4,R3;\
+	EOR	R11<<17,R3,R3;\
+
+// Hash128(k0, k1 uint64, b []byte) (uint64, uint64)
+TEXT ·Hash128(SB),NOSPLIT,$8-44
+	MOVW	R10,sav-8(SP)
+	MOVW	k0_lo+0(FP),R12
+	MOVW	k0_hi+4(FP),R14
+	MOVW	$0x70736575,R0
+	MOVW	$0x736f6d65,R1
+	MOVW	$0x6e657261,R4
+	MOVW	$0x6c796765,R5
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	EOR	R12,R4,R4
+	EOR	R14,R5,R5
+	MOVW	k1_lo+8(FP),R12
+	MOVW	k1_hi+12(FP),R14
+	MOVW	$0x6e646f83,R2
+	MOVW	$0x646f7261,R3
+	MOVW	$0x79746573,R6
+	MOVW	$0x74656462,R7
+	EOR	R12,R2,R2
+	EOR	R14,R3,R3
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	MOVW	b+16(FP),R10
+	MOVW	b_len+20(FP),R11
+	ADD	R10,R11,R11
+	MOVW	R11,endb-4(SP)
+hashloop128:
+	MOVW	endb-4(SP),R11
+	SUB	R10,R11,R11
+	SUB.S	$8,R11
+	BLO	hashend128
+	MOVM.IA.W (R10),[R12,R14]
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	B	hashloop128
+hashloop128unaligned:
+	MOVW	endb-4(SP),R11
+	SUB	R10,R11,R11
+	SUB.S	$8,R11
+	BLO	hashend128
+	MOVB	(R10),R12
+	MOVB	1(R10),R11
+	ORR	R11<<8,R12,R12
+	MOVB	2(R10),R11
+	ORR	R11<<16,R12,R12
+	MOVB	3(R10),R11
+	ORR	R11<<24,R12,R12
+	MOVB	4(R10),R14
+	MOVB	5(R10),R11
+	ORR	R11<<8,R14,R14
+	MOVB	6(R10),R11
+	ORR	R11<<16,R14,R14
+	MOVB	7(R10),R11
+	ORR	R11<<24,R14,R14
+	ADD	$8,R10,R10
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	B	hashloop128unaligned
+hashend128:
+	MOVW	$0x0,R12
+	MOVW	$0x0,R14
+	RSB	$0,R11,R11
+	AND.S	$7,R11
+	BEQ	hashlast128
+	MOVW	(R10),R12
+	SLL	$3,R11
+	AND	$63,R11
+	SUB.S	$32,R11,R11
+	BEQ	hashlast128
+	BLO	hashhi128
+	MOVW	R12<<R11,R12
+	MOVW	R12>>R11,R12
+	B	hashlast128
+hashhi128:
+	ADD	$32,R11
+	MOVW	4(R10),R14
+	MOVW	R14<<R11,R14
+	MOVW	R14>>R11,R14
+hashlast128:
+	MOVW	b_len+20(FP),R11
+	ORR	R11<<24,R14,R14
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	EOR	$238,R4
+	ROUND()
+	ROUND()
+	EOR	R0,R2,R12
+	EOR	R1,R3,R14
+	EOR	R4,R12,R12
+	EOR	R5,R14,R14
+	EOR	R6,R12,R12
+	EOR	R7,R14,R14
+	MOVW	R12,ret_lo+28(FP)
+	MOVW	R14,ret_hi+32(FP)
+	EOR	$221,R2
+	ROUND()
+	ROUND()
+	EOR	R0,R2,R12
+	EOR	R1,R3,R14
+	EOR	R4,R12,R12
+	EOR	R5,R14,R14
+	EOR	R6,R12,R12
+	EOR	R7,R14,R14
+	MOVW	R12,unnamed_lo+36(FP)
+	MOVW	R14,unnamed_hi+40(FP)
+	MOVW	sav-8(SP),R10
+	RET

+ 201 - 0
vendor/github.com/dchest/siphash/hash_amd64.s

@@ -0,0 +1,201 @@
+// +build amd64,!appengine,!gccgo
+
+// This is a translation of the gcc output of FloodyBerry's pure-C public
+// domain siphash implementation at https://github.com/floodyberry/siphash
+// func Hash(k0, k1 uint64, b []byte) uint64
+TEXT	·Hash(SB),4,$0-48
+	MOVQ	k0+0(FP),CX
+	MOVQ	$0x736F6D6570736575,R9
+	MOVQ	k1+8(FP),DI
+	MOVQ	$0x6C7967656E657261,BX
+	MOVQ	$0x646F72616E646F6D,AX
+	MOVQ	b_len+24(FP),DX
+	MOVQ	DX,R11
+	MOVQ	DX,R10
+	XORQ	CX,R9
+	XORQ	CX,BX
+	MOVQ	$0x7465646279746573,CX
+	XORQ	DI,AX
+	XORQ	DI,CX
+	SHLQ	$0x38,R11
+	XORQ	DI,DI
+	MOVQ	b_base+16(FP),SI
+	ANDQ	$0xFFFFFFFFFFFFFFF8,R10
+	JE	afterLoop
+	XCHGQ	AX,AX
+loopBody:
+	MOVQ	0(SI)(DI*1),R8
+	ADDQ	AX,R9
+	RORQ	$0x33,AX
+	XORQ	R9,AX
+	RORQ	$0x20,R9
+	ADDQ	$0x8,DI
+	XORQ	R8,CX
+	ADDQ	CX,BX
+	RORQ	$0x30,CX
+	XORQ	BX,CX
+	ADDQ	AX,BX
+	RORQ	$0x2F,AX
+	ADDQ	CX,R9
+	RORQ	$0x2B,CX
+	XORQ	BX,AX
+	XORQ	R9,CX
+	RORQ	$0x20,BX
+	ADDQ	AX,R9
+	ADDQ	CX,BX
+	RORQ	$0x33,AX
+	RORQ	$0x30,CX
+	XORQ	R9,AX
+	XORQ	BX,CX
+	RORQ	$0x20,R9
+	ADDQ	AX,BX
+	ADDQ	CX,R9
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,CX
+	XORQ	BX,AX
+	RORQ	$0x20,BX
+	XORQ	R9,CX
+	XORQ	R8,R9
+	CMPQ	R10,DI
+	JA	loopBody
+afterLoop:
+	SUBQ	R10,DX
+
+	CMPQ	DX,$0x7
+	JA	afterSwitch
+
+	// no support for jump tables
+
+	CMPQ	DX,$0x7
+	JE	sw7
+
+	CMPQ	DX,$0x6
+	JE	sw6
+
+	CMPQ	DX,$0x5
+	JE	sw5
+
+	CMPQ	DX,$0x4
+	JE	sw4
+
+	CMPQ	DX,$0x3
+	JE	sw3
+
+	CMPQ	DX,$0x2
+	JE	sw2
+
+	CMPQ	DX,$0x1
+	JE	sw1
+
+	JMP	afterSwitch
+
+sw7:	MOVBQZX	6(SI)(DI*1),DX
+	SHLQ	$0x30,DX
+	ORQ	DX,R11
+sw6:	MOVBQZX	0x5(SI)(DI*1),DX
+	SHLQ	$0x28,DX
+	ORQ	DX,R11
+sw5:	MOVBQZX	0x4(SI)(DI*1),DX
+	SHLQ	$0x20,DX
+	ORQ	DX,R11
+sw4:	MOVBQZX	0x3(SI)(DI*1),DX
+	SHLQ	$0x18,DX
+	ORQ	DX,R11
+sw3:	MOVBQZX	0x2(SI)(DI*1),DX
+	SHLQ	$0x10,DX
+	ORQ	DX,R11
+sw2:	MOVBQZX	0x1(SI)(DI*1),DX
+	SHLQ	$0x8,DX
+	ORQ	DX,R11
+sw1:	MOVBQZX	0(SI)(DI*1),DX
+	ORQ	DX,R11
+afterSwitch:
+	LEAQ	(AX)(R9*1),SI
+	XORQ	R11,CX
+	RORQ	$0x33,AX
+	ADDQ	CX,BX
+	MOVQ	CX,DX
+	XORQ	SI,AX
+	RORQ	$0x30,DX
+	RORQ	$0x20,SI
+	LEAQ	0(BX)(AX*1),CX
+	XORQ	BX,DX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	XORQ	SI,AX
+	RORQ	$0x30,DX
+	RORQ	$0x20,SI
+	XORQ	CX,DX
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	XORQ	CX,AX
+	RORQ	$0x2B,DX
+	RORQ	$0x20,CX
+	XORQ	SI,DX
+	XORQ	R11,SI
+	XORB	$0xFF,CL
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	XORQ	CX,DX
+	RORQ	$0x20,SI
+	ADDQ	AX,CX
+	ADDQ	DX,SI
+	RORQ	$0x2F,AX
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	ADDQ	DX,CX
+	RORQ	$0x33,AX
+	RORQ	$0x30,DX
+	XORQ	SI,AX
+	RORQ	$0x20,SI
+	XORQ	CX,DX
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	ADDQ	DX,SI
+	RORQ	$0x2B,DX
+	XORQ	CX,AX
+	XORQ	SI,DX
+	RORQ	$0x20,CX
+	ADDQ	AX,SI
+	ADDQ	DX,CX
+	RORQ	$0x33,AX
+	RORQ	$0x30,DX
+	XORQ	CX,DX
+	XORQ	SI,AX
+	RORQ	$0x20,SI
+	ADDQ	DX,SI
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	XORQ	CX,AX
+	RORQ	$0x2B,DX
+	RORQ	$0x20,CX
+	XORQ	SI,DX
+	ADDQ	AX,SI
+	RORQ	$0x33,AX
+	ADDQ	DX,CX
+	XORQ	SI,AX
+	RORQ	$0x30,DX
+	XORQ	CX,DX
+	ADDQ	AX,CX
+	RORQ	$0x2F,AX
+	XORQ	CX,AX
+	RORQ	$0x2B,DX
+	RORQ	$0x20,CX
+	XORQ	DX,AX
+	XORQ	CX,AX
+	MOVQ	AX,ret+40(FP)
+	RET

+ 160 - 0
vendor/github.com/dchest/siphash/hash_arm.s

@@ -0,0 +1,160 @@
+#include "textflag.h"
+#define R10 g
+#define ROUND()\
+	ADD.S	R2,R0,R0;\
+	ADC	R3,R1,R1;\
+	EOR	R2<<13,R0,R8;\
+	EOR	R3>>19,R8,R8;\
+	EOR	R2>>19,R1,R11;\
+	EOR	R3<<13,R11,R11;\
+	ADD.S	R6,R4,R4;\
+	ADC	R7,R5,R5;\
+	EOR	R6<<16,R4,R2;\
+	EOR	R7>>16,R2,R2;\
+	EOR	R6>>16,R5,R3;\
+	EOR	R7<<16,R3,R3;\
+	ADD.S	R2,R1,R1;\
+	ADC	R3,R0,R0;\
+	EOR	R2<<21,R1,R6;\
+	EOR	R3>>11,R6,R6;\
+	EOR	R2>>11,R0,R7;\
+	EOR	R3<<21,R7,R7;\
+	ADD.S	R8,R4,R4;\
+	ADC	R11,R5,R5;\
+	EOR	R8<<17,R4,R2;\
+	EOR	R11>>15,R2,R2;\
+	EOR	R8>>15,R5,R3;\
+	EOR	R11<<17,R3,R3;\
+	ADD.S	R2,R1,R1;\
+	ADC	R3,R0,R0;\
+	EOR	R2<<13,R1,R8;\
+	EOR	R3>>19,R8,R8;\
+	EOR	R2>>19,R0,R11;\
+	EOR	R3<<13,R11,R11;\
+	ADD.S	R6,R5,R5;\
+	ADC	R7,R4,R4;\
+	EOR	R6<<16,R5,R2;\
+	EOR	R7>>16,R2,R2;\
+	EOR	R6>>16,R4,R3;\
+	EOR	R7<<16,R3,R3;\
+	ADD.S	R2,R0,R0;\
+	ADC	R3,R1,R1;\
+	EOR	R2<<21,R0,R6;\
+	EOR	R3>>11,R6,R6;\
+	EOR	R2>>11,R1,R7;\
+	EOR	R3<<21,R7,R7;\
+	ADD.S	R8,R5,R5;\
+	ADC	R11,R4,R4;\
+	EOR	R8<<17,R5,R2;\
+	EOR	R11>>15,R2,R2;\
+	EOR	R8>>15,R4,R3;\
+	EOR	R11<<17,R3,R3;\
+
+// Hash(k0, k1 uint64, b []byte) uint64
+TEXT ·Hash(SB),NOSPLIT,$8-36
+	MOVW	R10,sav-8(SP)
+	MOVW	k0_lo+0(FP),R12
+	MOVW	k0_hi+4(FP),R14
+	MOVW	$0x70736575,R0
+	MOVW	$0x736f6d65,R1
+	MOVW	$0x6e657261,R4
+	MOVW	$0x6c796765,R5
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	EOR	R12,R4,R4
+	EOR	R14,R5,R5
+	MOVW	k1_lo+8(FP),R12
+	MOVW	k1_hi+12(FP),R14
+	MOVW	$0x6e646f6d,R2
+	MOVW	$0x646f7261,R3
+	MOVW	$0x79746573,R6
+	MOVW	$0x74656462,R7
+	EOR	R12,R2,R2
+	EOR	R14,R3,R3
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	MOVW	b+16(FP),R10
+	MOVW	b_len+20(FP),R11
+	ADD	R10,R11,R11
+	MOVW	R11,endb-4(SP)
+	AND.S	$3,R10,R8
+	BNE	hashloopunaligned
+hashloop:
+	MOVW	endb-4(SP),R11
+	SUB	R10,R11,R11
+	SUB.S	$8,R11
+	BLO	hashend
+	MOVM.IA.W (R10),[R12,R14]
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	B	hashloop
+hashloopunaligned:
+	MOVW	endb-4(SP),R11
+	SUB	R10,R11,R11
+	SUB.S	$8,R11
+	BLO	hashend
+	MOVB	(R10),R12
+	MOVB	1(R10),R11
+	ORR	R11<<8,R12,R12
+	MOVB	2(R10),R11
+	ORR	R11<<16,R12,R12
+	MOVB	3(R10),R11
+	ORR	R11<<24,R12,R12
+	MOVB	4(R10),R14
+	MOVB	5(R10),R11
+	ORR	R11<<8,R14,R14
+	MOVB	6(R10),R11
+	ORR	R11<<16,R14,R14
+	MOVB	7(R10),R11
+	ORR	R11<<24,R14,R14
+	ADD	$8,R10,R10
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	B	hashloopunaligned
+hashend:
+	MOVW	$0x0,R12
+	MOVW	$0x0,R14
+	RSB	$0,R11,R11
+	AND.S	$7,R11
+	BEQ	hashlast
+	MOVW	(R10),R12
+	SLL	$3,R11
+	AND	$63,R11
+	SUB.S	$32,R11,R11
+	BEQ	hashlast
+	BLO	hashhi
+	MOVW	R12<<R11,R12
+	MOVW	R12>>R11,R12
+	B	hashlast
+hashhi:
+	ADD	$32,R11
+	MOVW	4(R10),R14
+	MOVW	R14<<R11,R14
+	MOVW	R14>>R11,R14
+hashlast:
+	MOVW	b_len+20(FP),R11
+	ORR	R11<<24,R14,R14
+	EOR	R12,R6,R6
+	EOR	R14,R7,R7
+	ROUND()
+	EOR	R12,R0,R0
+	EOR	R14,R1,R1
+	EOR	$255,R4
+	ROUND()
+	ROUND()
+	EOR	R2,R0,R0
+	EOR	R3,R1,R1
+	EOR	R6,R4,R4
+	EOR	R7,R5,R5
+	EOR	R4,R0,R0
+	EOR	R5,R1,R1
+	MOVW	sav-8(SP),R10
+	MOVW	R0,ret_lo+28(FP)
+	MOVW	R1,ret_hi+32(FP)
+	RET

+ 33 - 0
vendor/github.com/dchest/siphash/hash_asm.go

@@ -0,0 +1,33 @@
+// +build arm amd64,!appengine,!gccgo
+
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+// This file contains a function definition for use with assembly implementations of Hash()
+
+package siphash
+
+//go:noescape
+
+// Hash returns the 64-bit SipHash-2-4 of the given byte slice with two 64-bit
+// parts of 128-bit key: k0 and k1.
+func Hash(k0, k1 uint64, b []byte) uint64
+
+//go:noescape
+
+// Hash128 returns the 128-bit SipHash-2-4 of the given byte slice with two
+// 64-bit parts of 128-bit key: k0 and k1.
+func Hash128(k0, k1 uint64, b []byte) (uint64, uint64)
+
+//go:noescape
+func blocks(d *digest, p []uint8)
+
+//go:noescape
+func finalize(d *digest) uint64
+
+//go:noescape
+func once(d *digest)

+ 318 - 0
vendor/github.com/dchest/siphash/siphash.go

@@ -0,0 +1,318 @@
+// Written in 2012-2014 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+// Package siphash implements SipHash-2-4, a fast short-input PRF
+// created by Jean-Philippe Aumasson and Daniel J. Bernstein.
+package siphash
+
+import "hash"
+
+const (
+	// BlockSize is the block size of hash algorithm in bytes.
+	BlockSize = 8
+
+	// Size is the size of hash output in bytes.
+	Size = 8
+
+	// Size128 is the size of 128-bit hash output in bytes.
+	Size128 = 16
+)
+
+type digest struct {
+	v0, v1, v2, v3 uint64  // state
+	k0, k1         uint64  // two parts of key
+	x              [8]byte // buffer for unprocessed bytes
+	nx             int     // number of bytes in buffer x
+	size           int     // output size in bytes (8 or 16)
+	t              uint8   // message bytes counter (mod 256)
+}
+
+// newDigest returns a new digest with the given output size in bytes (must be 8 or 16).
+func newDigest(size int, key []byte) *digest {
+	if size != Size && size != Size128 {
+		panic("size must be 8 or 16")
+	}
+	d := new(digest)
+	d.k0 = uint64(key[0]) | uint64(key[1])<<8 | uint64(key[2])<<16 | uint64(key[3])<<24 |
+		uint64(key[4])<<32 | uint64(key[5])<<40 | uint64(key[6])<<48 | uint64(key[7])<<56
+	d.k1 = uint64(key[8]) | uint64(key[9])<<8 | uint64(key[10])<<16 | uint64(key[11])<<24 |
+		uint64(key[12])<<32 | uint64(key[13])<<40 | uint64(key[14])<<48 | uint64(key[15])<<56
+	d.size = size
+	d.Reset()
+	return d
+}
+
+// New returns a new hash.Hash64 computing SipHash-2-4 with 16-byte key and 8-byte output.
+func New(key []byte) hash.Hash64 {
+	return newDigest(Size, key)
+}
+
+// New128 returns a new hash.Hash computing SipHash-2-4 with 16-byte key and 16-byte output.
+//
+// Note that 16-byte output is considered experimental by SipHash authors at this time.
+func New128(key []byte) hash.Hash {
+	return newDigest(Size128, key)
+}
+
+func (d *digest) Reset() {
+	d.v0 = d.k0 ^ 0x736f6d6570736575
+	d.v1 = d.k1 ^ 0x646f72616e646f6d
+	d.v2 = d.k0 ^ 0x6c7967656e657261
+	d.v3 = d.k1 ^ 0x7465646279746573
+	d.t = 0
+	d.nx = 0
+	if d.size == Size128 {
+		d.v1 ^= 0xee
+	}
+}
+
+func (d *digest) Size() int { return d.size }
+
+func (d *digest) BlockSize() int { return BlockSize }
+
+func (d *digest) Write(p []byte) (nn int, err error) {
+	nn = len(p)
+	d.t += uint8(nn)
+	if d.nx > 0 {
+		n := len(p)
+		if n > BlockSize-d.nx {
+			n = BlockSize - d.nx
+		}
+		d.nx += copy(d.x[d.nx:], p)
+		if d.nx == BlockSize {
+			once(d)
+			d.nx = 0
+		}
+		p = p[n:]
+	}
+	if len(p) >= BlockSize {
+		n := len(p) &^ (BlockSize - 1)
+		blocks(d, p[:n])
+		p = p[n:]
+	}
+	if len(p) > 0 {
+		d.nx = copy(d.x[:], p)
+	}
+	return
+}
+
+func (d *digest) Sum64() uint64 {
+	for i := d.nx; i < BlockSize-1; i++ {
+		d.x[i] = 0
+	}
+	d.x[7] = d.t
+	return finalize(d)
+}
+
+func (d0 *digest) sum128() (r0, r1 uint64) {
+	// Make a copy of d0 so that caller can keep writing and summing.
+	d := *d0
+
+	for i := d.nx; i < BlockSize-1; i++ {
+		d.x[i] = 0
+	}
+	d.x[7] = d.t
+	blocks(&d, d.x[:])
+
+	v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3
+	v2 ^= 0xee
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 3.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 4.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	r0 = v0 ^ v1 ^ v2 ^ v3
+
+	v1 ^= 0xdd
+
+	// Round 1.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 2.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 3.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	// Round 4.
+	v0 += v1
+	v1 = v1<<13 | v1>>(64-13)
+	v1 ^= v0
+	v0 = v0<<32 | v0>>(64-32)
+
+	v2 += v3
+	v3 = v3<<16 | v3>>(64-16)
+	v3 ^= v2
+
+	v0 += v3
+	v3 = v3<<21 | v3>>(64-21)
+	v3 ^= v0
+
+	v2 += v1
+	v1 = v1<<17 | v1>>(64-17)
+	v1 ^= v2
+	v2 = v2<<32 | v2>>(64-32)
+
+	r1 = v0 ^ v1 ^ v2 ^ v3
+
+	return r0, r1
+}
+
+func (d *digest) Sum(in []byte) []byte {
+	if d.size == Size {
+		r := d.Sum64()
+		in = append(in,
+			byte(r),
+			byte(r>>8),
+			byte(r>>16),
+			byte(r>>24),
+			byte(r>>32),
+			byte(r>>40),
+			byte(r>>48),
+			byte(r>>56))
+	} else {
+		r0, r1 := d.sum128()
+		in = append(in,
+			byte(r0),
+			byte(r0>>8),
+			byte(r0>>16),
+			byte(r0>>24),
+			byte(r0>>32),
+			byte(r0>>40),
+			byte(r0>>48),
+			byte(r0>>56),
+			byte(r1),
+			byte(r1>>8),
+			byte(r1>>16),
+			byte(r1>>24),
+			byte(r1>>32),
+			byte(r1>>40),
+			byte(r1>>48),
+			byte(r1>>56))
+	}
+	return in
+}

+ 591 - 0
vendor/github.com/dchest/siphash/siphash_test.go

@@ -0,0 +1,591 @@
+// Written in 2012 by Dmitry Chestnykh.
+//
+// To the extent possible under law, the author have dedicated all copyright
+// and related and neighboring rights to this software to the public domain
+// worldwide. This software is distributed without any warranty.
+// http://creativecommons.org/publicdomain/zero/1.0/
+
+package siphash
+
+import (
+	"bytes"
+	"encoding/binary"
+	"testing"
+)
+
+var zeroKey = make([]byte, 16)
+
+var golden = []struct {
+	k []byte
+	m []byte
+	r uint64
+}{
+	{
+		[]byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f},
+		[]byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e},
+		0xa129ca6149be45e5,
+	},
+	{
+		zeroKey,
+		[]byte("Hello world"),
+		0xc9e8a3021f3822d9,
+	},
+	{
+		zeroKey,
+		[]byte{}, // zero-length message
+		0x1e924b9d737700d7,
+	},
+	{
+		zeroKey,
+		[]byte("12345678123"),
+		0xf95d77ccdb0649f,
+	},
+	{
+		zeroKey,
+		make([]byte, 8),
+		0xe849e8bb6ffe2567,
+	},
+	{
+		zeroKey,
+		make([]byte, 1535),
+		0xe74d1c0ab64b2afa,
+	},
+}
+
+// Test vectors from reference implementation.
+//
+// SipHash-2-4 output with
+// k = 00 01 02 ...
+// and
+// in = (empty string)
+// in = 00 (1 byte)
+// in = 00 01 (2 bytes)
+// in = 00 01 02 (3 bytes)
+// ...
+// in = 00 01 02 ... 3e (63 bytes)
+var goldenRef = [][]byte{
+	{0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72},
+	{0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74},
+	{0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d},
+	{0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85},
+	{0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf},
+	{0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18},
+	{0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb},
+	{0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab},
+	{0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93},
+	{0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e},
+	{0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a},
+	{0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4},
+	{0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75},
+	{0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14},
+	{0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7},
+	{0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1},
+	{0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f},
+	{0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69},
+	{0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b},
+	{0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb},
+	{0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe},
+	{0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0},
+	{0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93},
+	{0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8},
+	{0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8},
+	{0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc},
+	{0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17},
+	{0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f},
+	{0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde},
+	{0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6},
+	{0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad},
+	{0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32},
+	{0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71},
+	{0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7},
+	{0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12},
+	{0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15},
+	{0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31},
+	{0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02},
+	{0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca},
+	{0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a},
+	{0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e},
+	{0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad},
+	{0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18},
+	{0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4},
+	{0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9},
+	{0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9},
+	{0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb},
+	{0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0},
+	{0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6},
+	{0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7},
+	{0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee},
+	{0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1},
+	{0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a},
+	{0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81},
+	{0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f},
+	{0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24},
+	{0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7},
+	{0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea},
+	{0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60},
+	{0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66},
+	{0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c},
+	{0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f},
+	{0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5},
+	{0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95},
+}
+
+var goldenRef128 = [][]byte{
+	{0xa3, 0x81, 0x7f, 0x04, 0xba, 0x25, 0xa8, 0xe6, 0x6d, 0xf6, 0x72, 0x14, 0xc7, 0x55, 0x02, 0x93},
+	{0xda, 0x87, 0xc1, 0xd8, 0x6b, 0x99, 0xaf, 0x44, 0x34, 0x76, 0x59, 0x11, 0x9b, 0x22, 0xfc, 0x45},
+	{0x81, 0x77, 0x22, 0x8d, 0xa4, 0xa4, 0x5d, 0xc7, 0xfc, 0xa3, 0x8b, 0xde, 0xf6, 0x0a, 0xff, 0xe4},
+	{0x9c, 0x70, 0xb6, 0x0c, 0x52, 0x67, 0xa9, 0x4e, 0x5f, 0x33, 0xb6, 0xb0, 0x29, 0x85, 0xed, 0x51},
+	{0xf8, 0x81, 0x64, 0xc1, 0x2d, 0x9c, 0x8f, 0xaf, 0x7d, 0x0f, 0x6e, 0x7c, 0x7b, 0xcd, 0x55, 0x79},
+	{0x13, 0x68, 0x87, 0x59, 0x80, 0x77, 0x6f, 0x88, 0x54, 0x52, 0x7a, 0x07, 0x69, 0x0e, 0x96, 0x27},
+	{0x14, 0xee, 0xca, 0x33, 0x8b, 0x20, 0x86, 0x13, 0x48, 0x5e, 0xa0, 0x30, 0x8f, 0xd7, 0xa1, 0x5e},
+	{0xa1, 0xf1, 0xeb, 0xbe, 0xd8, 0xdb, 0xc1, 0x53, 0xc0, 0xb8, 0x4a, 0xa6, 0x1f, 0xf0, 0x82, 0x39},
+	{0x3b, 0x62, 0xa9, 0xba, 0x62, 0x58, 0xf5, 0x61, 0x0f, 0x83, 0xe2, 0x64, 0xf3, 0x14, 0x97, 0xb4},
+	{0x26, 0x44, 0x99, 0x06, 0x0a, 0xd9, 0xba, 0xab, 0xc4, 0x7f, 0x8b, 0x02, 0xbb, 0x6d, 0x71, 0xed},
+	{0x00, 0x11, 0x0d, 0xc3, 0x78, 0x14, 0x69, 0x56, 0xc9, 0x54, 0x47, 0xd3, 0xf3, 0xd0, 0xfb, 0xba},
+	{0x01, 0x51, 0xc5, 0x68, 0x38, 0x6b, 0x66, 0x77, 0xa2, 0xb4, 0xdc, 0x6f, 0x81, 0xe5, 0xdc, 0x18},
+	{0xd6, 0x26, 0xb2, 0x66, 0x90, 0x5e, 0xf3, 0x58, 0x82, 0x63, 0x4d, 0xf6, 0x85, 0x32, 0xc1, 0x25},
+	{0x98, 0x69, 0xe2, 0x47, 0xe9, 0xc0, 0x8b, 0x10, 0xd0, 0x29, 0x93, 0x4f, 0xc4, 0xb9, 0x52, 0xf7},
+	{0x31, 0xfc, 0xef, 0xac, 0x66, 0xd7, 0xde, 0x9c, 0x7e, 0xc7, 0x48, 0x5f, 0xe4, 0x49, 0x49, 0x02},
+	{0x54, 0x93, 0xe9, 0x99, 0x33, 0xb0, 0xa8, 0x11, 0x7e, 0x08, 0xec, 0x0f, 0x97, 0xcf, 0xc3, 0xd9},
+	{0x6e, 0xe2, 0xa4, 0xca, 0x67, 0xb0, 0x54, 0xbb, 0xfd, 0x33, 0x15, 0xbf, 0x85, 0x23, 0x05, 0x77},
+	{0x47, 0x3d, 0x06, 0xe8, 0x73, 0x8d, 0xb8, 0x98, 0x54, 0xc0, 0x66, 0xc4, 0x7a, 0xe4, 0x77, 0x40},
+	{0xa4, 0x26, 0xe5, 0xe4, 0x23, 0xbf, 0x48, 0x85, 0x29, 0x4d, 0xa4, 0x81, 0xfe, 0xae, 0xf7, 0x23},
+	{0x78, 0x01, 0x77, 0x31, 0xcf, 0x65, 0xfa, 0xb0, 0x74, 0xd5, 0x20, 0x89, 0x52, 0x51, 0x2e, 0xb1},
+	{0x9e, 0x25, 0xfc, 0x83, 0x3f, 0x22, 0x90, 0x73, 0x3e, 0x93, 0x44, 0xa5, 0xe8, 0x38, 0x39, 0xeb},
+	{0x56, 0x8e, 0x49, 0x5a, 0xbe, 0x52, 0x5a, 0x21, 0x8a, 0x22, 0x14, 0xcd, 0x3e, 0x07, 0x1d, 0x12},
+	{0x4a, 0x29, 0xb5, 0x45, 0x52, 0xd1, 0x6b, 0x9a, 0x46, 0x9c, 0x10, 0x52, 0x8e, 0xff, 0x0a, 0xae},
+	{0xc9, 0xd1, 0x84, 0xdd, 0xd5, 0xa9, 0xf5, 0xe0, 0xcf, 0x8c, 0xe2, 0x9a, 0x9a, 0xbf, 0x69, 0x1c},
+	{0x2d, 0xb4, 0x79, 0xae, 0x78, 0xbd, 0x50, 0xd8, 0x88, 0x2a, 0x8a, 0x17, 0x8a, 0x61, 0x32, 0xad},
+	{0x8e, 0xce, 0x5f, 0x04, 0x2d, 0x5e, 0x44, 0x7b, 0x50, 0x51, 0xb9, 0xea, 0xcb, 0x8d, 0x8f, 0x6f},
+	{0x9c, 0x0b, 0x53, 0xb4, 0xb3, 0xc3, 0x07, 0xe8, 0x7e, 0xae, 0xe0, 0x86, 0x78, 0x14, 0x1f, 0x66},
+	{0xab, 0xf2, 0x48, 0xaf, 0x69, 0xa6, 0xea, 0xe4, 0xbf, 0xd3, 0xeb, 0x2f, 0x12, 0x9e, 0xeb, 0x94},
+	{0x06, 0x64, 0xda, 0x16, 0x68, 0x57, 0x4b, 0x88, 0xb9, 0x35, 0xf3, 0x02, 0x73, 0x58, 0xae, 0xf4},
+	{0xaa, 0x4b, 0x9d, 0xc4, 0xbf, 0x33, 0x7d, 0xe9, 0x0c, 0xd4, 0xfd, 0x3c, 0x46, 0x7c, 0x6a, 0xb7},
+	{0xea, 0x5c, 0x7f, 0x47, 0x1f, 0xaf, 0x6b, 0xde, 0x2b, 0x1a, 0xd7, 0xd4, 0x68, 0x6d, 0x22, 0x87},
+	{0x29, 0x39, 0xb0, 0x18, 0x32, 0x23, 0xfa, 0xfc, 0x17, 0x23, 0xde, 0x4f, 0x52, 0xc4, 0x3d, 0x35},
+	{0x7c, 0x39, 0x56, 0xca, 0x5e, 0xea, 0xfc, 0x3e, 0x36, 0x3e, 0x9d, 0x55, 0x65, 0x46, 0xeb, 0x68},
+	{0x77, 0xc6, 0x07, 0x71, 0x46, 0xf0, 0x1c, 0x32, 0xb6, 0xb6, 0x9d, 0x5f, 0x4e, 0xa9, 0xff, 0xcf},
+	{0x37, 0xa6, 0x98, 0x6c, 0xb8, 0x84, 0x7e, 0xdf, 0x09, 0x25, 0xf0, 0xf1, 0x30, 0x9b, 0x54, 0xde},
+	{0xa7, 0x05, 0xf0, 0xe6, 0x9d, 0xa9, 0xa8, 0xf9, 0x07, 0x24, 0x1a, 0x2e, 0x92, 0x3c, 0x8c, 0xc8},
+	{0x3d, 0xc4, 0x7d, 0x1f, 0x29, 0xc4, 0x48, 0x46, 0x1e, 0x9e, 0x76, 0xed, 0x90, 0x4f, 0x67, 0x11},
+	{0x0d, 0x62, 0xbf, 0x01, 0xe6, 0xfc, 0x0e, 0x1a, 0x0d, 0x3c, 0x47, 0x51, 0xc5, 0xd3, 0x69, 0x2b},
+	{0x8c, 0x03, 0x46, 0x8b, 0xca, 0x7c, 0x66, 0x9e, 0xe4, 0xfd, 0x5e, 0x08, 0x4b, 0xbe, 0xe7, 0xb5},
+	{0x52, 0x8a, 0x5b, 0xb9, 0x3b, 0xaf, 0x2c, 0x9c, 0x44, 0x73, 0xcc, 0xe5, 0xd0, 0xd2, 0x2b, 0xd9},
+	{0xdf, 0x6a, 0x30, 0x1e, 0x95, 0xc9, 0x5d, 0xad, 0x97, 0xae, 0x0c, 0xc8, 0xc6, 0x91, 0x3b, 0xd8},
+	{0x80, 0x11, 0x89, 0x90, 0x2c, 0x85, 0x7f, 0x39, 0xe7, 0x35, 0x91, 0x28, 0x5e, 0x70, 0xb6, 0xdb},
+	{0xe6, 0x17, 0x34, 0x6a, 0xc9, 0xc2, 0x31, 0xbb, 0x36, 0x50, 0xae, 0x34, 0xcc, 0xca, 0x0c, 0x5b},
+	{0x27, 0xd9, 0x34, 0x37, 0xef, 0xb7, 0x21, 0xaa, 0x40, 0x18, 0x21, 0xdc, 0xec, 0x5a, 0xdf, 0x89},
+	{0x89, 0x23, 0x7d, 0x9d, 0xed, 0x9c, 0x5e, 0x78, 0xd8, 0xb1, 0xc9, 0xb1, 0x66, 0xcc, 0x73, 0x42},
+	{0x4a, 0x6d, 0x80, 0x91, 0xbf, 0x5e, 0x7d, 0x65, 0x11, 0x89, 0xfa, 0x94, 0xa2, 0x50, 0xb1, 0x4c},
+	{0x0e, 0x33, 0xf9, 0x60, 0x55, 0xe7, 0xae, 0x89, 0x3f, 0xfc, 0x0e, 0x3d, 0xcf, 0x49, 0x29, 0x02},
+	{0xe6, 0x1c, 0x43, 0x2b, 0x72, 0x0b, 0x19, 0xd1, 0x8e, 0xc8, 0xd8, 0x4b, 0xdc, 0x63, 0x15, 0x1b},
+	{0xf7, 0xe5, 0xae, 0xf5, 0x49, 0xf7, 0x82, 0xcf, 0x37, 0x90, 0x55, 0xa6, 0x08, 0x26, 0x9b, 0x16},
+	{0x43, 0x8d, 0x03, 0x0f, 0xd0, 0xb7, 0xa5, 0x4f, 0xa8, 0x37, 0xf2, 0xad, 0x20, 0x1a, 0x64, 0x03},
+	{0xa5, 0x90, 0xd3, 0xee, 0x4f, 0xbf, 0x04, 0xe3, 0x24, 0x7e, 0x0d, 0x27, 0xf2, 0x86, 0x42, 0x3f},
+	{0x5f, 0xe2, 0xc1, 0xa1, 0x72, 0xfe, 0x93, 0xc4, 0xb1, 0x5c, 0xd3, 0x7c, 0xae, 0xf9, 0xf5, 0x38},
+	{0x2c, 0x97, 0x32, 0x5c, 0xbd, 0x06, 0xb3, 0x6e, 0xb2, 0x13, 0x3d, 0xd0, 0x8b, 0x3a, 0x01, 0x7c},
+	{0x92, 0xc8, 0x14, 0x22, 0x7a, 0x6b, 0xca, 0x94, 0x9f, 0xf0, 0x65, 0x9f, 0x00, 0x2a, 0xd3, 0x9e},
+	{0xdc, 0xe8, 0x50, 0x11, 0x0b, 0xd8, 0x32, 0x8c, 0xfb, 0xd5, 0x08, 0x41, 0xd6, 0x91, 0x1d, 0x87},
+	{0x67, 0xf1, 0x49, 0x84, 0xc7, 0xda, 0x79, 0x12, 0x48, 0xe3, 0x2b, 0xb5, 0x92, 0x25, 0x83, 0xda},
+	{0x19, 0x38, 0xf2, 0xcf, 0x72, 0xd5, 0x4e, 0xe9, 0x7e, 0x94, 0x16, 0x6f, 0xa9, 0x1d, 0x2a, 0x36},
+	{0x74, 0x48, 0x1e, 0x96, 0x46, 0xed, 0x49, 0xfe, 0x0f, 0x62, 0x24, 0x30, 0x16, 0x04, 0x69, 0x8e},
+	{0x57, 0xfc, 0xa5, 0xde, 0x98, 0xa9, 0xd6, 0xd8, 0x00, 0x64, 0x38, 0xd0, 0x58, 0x3d, 0x8a, 0x1d},
+	{0x9f, 0xec, 0xde, 0x1c, 0xef, 0xdc, 0x1c, 0xbe, 0xd4, 0x76, 0x36, 0x74, 0xd9, 0x57, 0x53, 0x59},
+	{0xe3, 0x04, 0x0c, 0x00, 0xeb, 0x28, 0xf1, 0x53, 0x66, 0xca, 0x73, 0xcb, 0xd8, 0x72, 0xe7, 0x40},
+	{0x76, 0x97, 0x00, 0x9a, 0x6a, 0x83, 0x1d, 0xfe, 0xcc, 0xa9, 0x1c, 0x59, 0x93, 0x67, 0x0f, 0x7a},
+	{0x58, 0x53, 0x54, 0x23, 0x21, 0xf5, 0x67, 0xa0, 0x05, 0xd5, 0x47, 0xa4, 0xf0, 0x47, 0x59, 0xbd},
+	{0x51, 0x50, 0xd1, 0x77, 0x2f, 0x50, 0x83, 0x4a, 0x50, 0x3e, 0x06, 0x9a, 0x97, 0x3f, 0xbd, 0x7c},
+}
+
+func TestSum64(t *testing.T) {
+	for i, v := range golden {
+		h := New(v.k)
+		h.Write(v.m)
+		if sum := h.Sum64(); sum != v.r {
+			t.Errorf(`%d: expected "%x", got "%x"`, i, v.r, sum)
+		}
+	}
+}
+
+func TestSum(t *testing.T) {
+	var r [8]byte
+	for i, v := range golden {
+		binary.LittleEndian.PutUint64(r[:], v.r)
+		h := New(v.k)
+		h.Write(v.m)
+		if sum := h.Sum(nil); !bytes.Equal(sum, r[:]) {
+			t.Errorf(`%d: expected "%x", got "%x"`, i, r, sum)
+		}
+	}
+
+	var k [16]byte
+	var in [64]byte
+	for i := range k {
+		k[i] = byte(i)
+	}
+
+	for i := 0; i < 64; i++ {
+		in[i] = byte(i)
+		h := New(k[:])
+		h.Write(in[:i])
+		if sum := h.Sum(nil); !bytes.Equal(sum, goldenRef[i]) {
+			t.Errorf(`%d: expected "%x", got "%x"`, i, goldenRef[i], sum)
+		}
+	}
+}
+
+func TestSumUnaligned(t *testing.T) {
+	const align = 8
+	var k [16]byte
+	var in [64 + align]byte
+	for i := range k {
+		k[i] = byte(i)
+	}
+
+	for a := 1; a < align; a++ {
+		for i := 0; i < 64; i++ {
+			in[a+i] = byte(i)
+			h := New(k[:])
+			h.Write(in[a : a+i])
+			if sum := h.Sum(nil); !bytes.Equal(sum, goldenRef[i]) {
+				t.Errorf(`%d: expected "%x", got "%x"`, i, goldenRef[i], sum)
+			}
+		}
+	}
+}
+
+func TestSum128(t *testing.T) {
+	var k [16]byte
+	var in [64]byte
+	for i := range k {
+		k[i] = byte(i)
+	}
+
+	for i := 0; i < 64; i++ {
+		in[i] = byte(i)
+		h := New128(k[:])
+		h.Write(in[:i])
+		if sum := h.Sum(nil); !bytes.Equal(sum, goldenRef128[i]) {
+			t.Errorf(`%d: expected "%x", got "%x"`, i, goldenRef128[i], sum)
+		}
+	}
+}
+
+func TestHash(t *testing.T) {
+	var k0, k1 uint64
+	for i, v := range golden {
+		k0 = binary.LittleEndian.Uint64(v.k[0:8])
+		k1 = binary.LittleEndian.Uint64(v.k[8:16])
+		if sum := Hash(k0, k1, v.m); sum != v.r {
+			t.Errorf(`%d: expected "%x", got "%x"`, i, v.r, sum)
+		}
+	}
+
+	var k [16]byte
+	var in [64]byte
+	for i := range k {
+		k[i] = byte(i)
+	}
+	k0 = binary.LittleEndian.Uint64(k[0:8])
+	k1 = binary.LittleEndian.Uint64(k[8:16])
+
+	for i := 0; i < 64; i++ {
+		in[i] = byte(i)
+		ref := binary.LittleEndian.Uint64(goldenRef[i])
+		if sum := Hash(k0, k1, in[:i]); sum != ref {
+			t.Errorf(`%d: expected "%x", got "%x"`, i, ref, sum)
+		}
+	}
+}
+
+func TestHashUnaligned(t *testing.T) {
+	const align = 8
+	var k0, k1 uint64
+	var k [16]byte
+	var in [64 + align]byte
+
+	for i := range k {
+		k[i] = byte(i)
+	}
+	k0 = binary.LittleEndian.Uint64(k[0:8])
+	k1 = binary.LittleEndian.Uint64(k[8:16])
+
+	for a := 1; a < align; a++ {
+		for i := 0; i < 64; i++ {
+			in[a+i] = byte(i)
+			ref := binary.LittleEndian.Uint64(goldenRef[i])
+			if sum := Hash(k0, k1, in[a:a+i]); sum != ref {
+				t.Errorf(`%d: expected "%x", got "%x"`, i, ref, sum)
+			}
+		}
+	}
+}
+
+func TestHash128(t *testing.T) {
+	var k0, k1 uint64
+
+	var k [16]byte
+	var in [64]byte
+	for i := range k {
+		k[i] = byte(i)
+	}
+	k0 = binary.LittleEndian.Uint64(k[0:8])
+	k1 = binary.LittleEndian.Uint64(k[8:16])
+
+	for i := 0; i < 64; i++ {
+		in[i] = byte(i)
+		ref0 := binary.LittleEndian.Uint64(goldenRef128[i][0:])
+		ref1 := binary.LittleEndian.Uint64(goldenRef128[i][8:])
+		if sum0, sum1 := Hash128(k0, k1, in[:i]); sum0 != ref0 || sum1 != ref1 {
+			t.Errorf(`%d: expected "%x, %x", got "%x, %x"`, i, ref0, ref1, sum0, sum1)
+		}
+	}
+}
+
+var (
+	key        = zeroKey
+	key0, key1 uint64
+	bench      = New(key)
+	bench128   = New128(key)
+	buf        = make([]byte, 8<<10)
+)
+
+func BenchmarkHash8(b *testing.B) {
+	b.SetBytes(8)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[:8])
+	}
+}
+
+func BenchmarkHash16(b *testing.B) {
+	b.SetBytes(16)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[:16])
+	}
+}
+
+func BenchmarkHash40(b *testing.B) {
+	b.SetBytes(40)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[:40])
+	}
+}
+
+func BenchmarkHash64(b *testing.B) {
+	b.SetBytes(64)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[:64])
+	}
+}
+
+func BenchmarkHash128(b *testing.B) {
+	b.SetBytes(128)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[:128])
+	}
+}
+
+func BenchmarkHash1K(b *testing.B) {
+	b.SetBytes(1024)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[:1024])
+	}
+}
+
+func BenchmarkHash1Kunaligned(b *testing.B) {
+	b.SetBytes(1024)
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf[1:1025])
+	}
+}
+
+func BenchmarkHash8K(b *testing.B) {
+	b.SetBytes(int64(len(buf)))
+	for i := 0; i < b.N; i++ {
+		Hash(key0, key1, buf)
+	}
+}
+
+func BenchmarkHash128_8(b *testing.B) {
+	b.SetBytes(8)
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf[:8])
+	}
+}
+
+func BenchmarkHash128_16(b *testing.B) {
+	b.SetBytes(16)
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf[:16])
+	}
+}
+
+func BenchmarkHash128_40(b *testing.B) {
+	b.SetBytes(40)
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf[:40])
+	}
+}
+
+func BenchmarkHash128_64(b *testing.B) {
+	b.SetBytes(64)
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf[:64])
+	}
+}
+
+func BenchmarkHash128_128(b *testing.B) {
+	b.SetBytes(128)
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf[:128])
+	}
+}
+
+func BenchmarkHash128_1K(b *testing.B) {
+	b.SetBytes(1024)
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf[:1024])
+	}
+}
+
+func BenchmarkHash128_8K(b *testing.B) {
+	b.SetBytes(int64(len(buf)))
+	for i := 0; i < b.N; i++ {
+		Hash128(key0, key1, buf)
+	}
+}
+
+func BenchmarkFull8(b *testing.B) {
+	b.SetBytes(8)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[:8])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull16(b *testing.B) {
+	b.SetBytes(16)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[:16])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull40(b *testing.B) {
+	b.SetBytes(24)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[:16])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull64(b *testing.B) {
+	b.SetBytes(64)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[:64])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull128(b *testing.B) {
+	b.SetBytes(128)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[:64])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull1K(b *testing.B) {
+	b.SetBytes(1024)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[:1024])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull1Kunaligned(b *testing.B) {
+	b.SetBytes(1024)
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf[1:1025])
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull8K(b *testing.B) {
+	b.SetBytes(int64(len(buf)))
+	for i := 0; i < b.N; i++ {
+		bench.Reset()
+		bench.Write(buf)
+		bench.Sum64()
+	}
+}
+
+func BenchmarkFull128_8(b *testing.B) {
+	b.SetBytes(8)
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf[:8])
+		bench128.Sum(nil)
+	}
+}
+
+func BenchmarkFull128_16(b *testing.B) {
+	b.SetBytes(16)
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf[:16])
+		bench128.Sum(nil)
+	}
+}
+
+func BenchmarkFull128_40(b *testing.B) {
+	b.SetBytes(24)
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf[:16])
+		bench128.Sum(nil)
+	}
+}
+
+func BenchmarkFull128_64(b *testing.B) {
+	b.SetBytes(64)
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf[:64])
+		bench128.Sum(nil)
+	}
+}
+
+func BenchmarkFull128_128(b *testing.B) {
+	b.SetBytes(128)
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf[:64])
+		bench128.Sum(nil)
+	}
+}
+
+func BenchmarkFull128_1K(b *testing.B) {
+	b.SetBytes(1024)
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf[:1024])
+		bench128.Sum(nil)
+	}
+}
+
+func BenchmarkFull128_8K(b *testing.B) {
+	b.SetBytes(int64(len(buf)))
+	for i := 0; i < b.N; i++ {
+		bench128.Reset()
+		bench128.Write(buf)
+		bench128.Sum(nil)
+	}
+}