Browse Source

Move the hChaChaRef routine to where it makes more sense.

Yawning Angel 3 years ago
parent
commit
c91e78db50
3 changed files with 148 additions and 145 deletions
  1. 0 144
      chacha20.go
  2. 143 0
      chacha20_ref.go
  3. 5 1
      chacha20_test.go

+ 0 - 144
chacha20.go

@@ -13,7 +13,6 @@ import (
 	"errors"
 	"math"
 	"runtime"
-	"unsafe"
 )
 
 const (
@@ -261,149 +260,6 @@ func HChaCha(key []byte, nonce *[HNonceSize]byte, out *[32]byte) {
 	hChaChaRef(&x, out)
 }
 
-func hChaChaRef(x *[stateSize]uint32, out *[32]byte) {
-	x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
-	x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11]
-
-	for i := chachaRounds; i > 0; i -= 2 {
-		// quarterround(x, 0, 4, 8, 12)
-		x0 += x4
-		x12 ^= x0
-		x12 = (x12 << 16) | (x12 >> 16)
-		x8 += x12
-		x4 ^= x8
-		x4 = (x4 << 12) | (x4 >> 20)
-		x0 += x4
-		x12 ^= x0
-		x12 = (x12 << 8) | (x12 >> 24)
-		x8 += x12
-		x4 ^= x8
-		x4 = (x4 << 7) | (x4 >> 25)
-
-		// quarterround(x, 1, 5, 9, 13)
-		x1 += x5
-		x13 ^= x1
-		x13 = (x13 << 16) | (x13 >> 16)
-		x9 += x13
-		x5 ^= x9
-		x5 = (x5 << 12) | (x5 >> 20)
-		x1 += x5
-		x13 ^= x1
-		x13 = (x13 << 8) | (x13 >> 24)
-		x9 += x13
-		x5 ^= x9
-		x5 = (x5 << 7) | (x5 >> 25)
-
-		// quarterround(x, 2, 6, 10, 14)
-		x2 += x6
-		x14 ^= x2
-		x14 = (x14 << 16) | (x14 >> 16)
-		x10 += x14
-		x6 ^= x10
-		x6 = (x6 << 12) | (x6 >> 20)
-		x2 += x6
-		x14 ^= x2
-		x14 = (x14 << 8) | (x14 >> 24)
-		x10 += x14
-		x6 ^= x10
-		x6 = (x6 << 7) | (x6 >> 25)
-
-		// quarterround(x, 3, 7, 11, 15)
-		x3 += x7
-		x15 ^= x3
-		x15 = (x15 << 16) | (x15 >> 16)
-		x11 += x15
-		x7 ^= x11
-		x7 = (x7 << 12) | (x7 >> 20)
-		x3 += x7
-		x15 ^= x3
-		x15 = (x15 << 8) | (x15 >> 24)
-		x11 += x15
-		x7 ^= x11
-		x7 = (x7 << 7) | (x7 >> 25)
-
-		// quarterround(x, 0, 5, 10, 15)
-		x0 += x5
-		x15 ^= x0
-		x15 = (x15 << 16) | (x15 >> 16)
-		x10 += x15
-		x5 ^= x10
-		x5 = (x5 << 12) | (x5 >> 20)
-		x0 += x5
-		x15 ^= x0
-		x15 = (x15 << 8) | (x15 >> 24)
-		x10 += x15
-		x5 ^= x10
-		x5 = (x5 << 7) | (x5 >> 25)
-
-		// quarterround(x, 1, 6, 11, 12)
-		x1 += x6
-		x12 ^= x1
-		x12 = (x12 << 16) | (x12 >> 16)
-		x11 += x12
-		x6 ^= x11
-		x6 = (x6 << 12) | (x6 >> 20)
-		x1 += x6
-		x12 ^= x1
-		x12 = (x12 << 8) | (x12 >> 24)
-		x11 += x12
-		x6 ^= x11
-		x6 = (x6 << 7) | (x6 >> 25)
-
-		// quarterround(x, 2, 7, 8, 13)
-		x2 += x7
-		x13 ^= x2
-		x13 = (x13 << 16) | (x13 >> 16)
-		x8 += x13
-		x7 ^= x8
-		x7 = (x7 << 12) | (x7 >> 20)
-		x2 += x7
-		x13 ^= x2
-		x13 = (x13 << 8) | (x13 >> 24)
-		x8 += x13
-		x7 ^= x8
-		x7 = (x7 << 7) | (x7 >> 25)
-
-		// quarterround(x, 3, 4, 9, 14)
-		x3 += x4
-		x14 ^= x3
-		x14 = (x14 << 16) | (x14 >> 16)
-		x9 += x14
-		x4 ^= x9
-		x4 = (x4 << 12) | (x4 >> 20)
-		x3 += x4
-		x14 ^= x3
-		x14 = (x14 << 8) | (x14 >> 24)
-		x9 += x14
-		x4 ^= x9
-		x4 = (x4 << 7) | (x4 >> 25)
-	}
-
-	// HChaCha returns x0...x3 | x12...x15, which corresponds to the
-	// indexes of the ChaCha constant and the indexes of the IV.
-	if useUnsafe {
-		outArr := (*[16]uint32)(unsafe.Pointer(&out[0]))
-		outArr[0] = x0
-		outArr[1] = x1
-		outArr[2] = x2
-		outArr[3] = x3
-		outArr[4] = x12
-		outArr[5] = x13
-		outArr[6] = x14
-		outArr[7] = x15
-	} else {
-		binary.LittleEndian.PutUint32(out[0:4], x0)
-		binary.LittleEndian.PutUint32(out[4:8], x1)
-		binary.LittleEndian.PutUint32(out[8:12], x2)
-		binary.LittleEndian.PutUint32(out[12:16], x3)
-		binary.LittleEndian.PutUint32(out[16:20], x12)
-		binary.LittleEndian.PutUint32(out[20:24], x13)
-		binary.LittleEndian.PutUint32(out[24:28], x14)
-		binary.LittleEndian.PutUint32(out[28:32], x15)
-	}
-	return
-}
-
 func init() {
 	switch runtime.GOARCH {
 	case "386", "amd64":

+ 143 - 0
chacha20_ref.go

@@ -247,3 +247,146 @@ func blocksRef(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf
 		x[13] = uint32(ctr >> 32)
 	}
 }
+
+func hChaChaRef(x *[stateSize]uint32, out *[32]byte) {
+	x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
+	x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11]
+
+	for i := chachaRounds; i > 0; i -= 2 {
+		// quarterround(x, 0, 4, 8, 12)
+		x0 += x4
+		x12 ^= x0
+		x12 = (x12 << 16) | (x12 >> 16)
+		x8 += x12
+		x4 ^= x8
+		x4 = (x4 << 12) | (x4 >> 20)
+		x0 += x4
+		x12 ^= x0
+		x12 = (x12 << 8) | (x12 >> 24)
+		x8 += x12
+		x4 ^= x8
+		x4 = (x4 << 7) | (x4 >> 25)
+
+		// quarterround(x, 1, 5, 9, 13)
+		x1 += x5
+		x13 ^= x1
+		x13 = (x13 << 16) | (x13 >> 16)
+		x9 += x13
+		x5 ^= x9
+		x5 = (x5 << 12) | (x5 >> 20)
+		x1 += x5
+		x13 ^= x1
+		x13 = (x13 << 8) | (x13 >> 24)
+		x9 += x13
+		x5 ^= x9
+		x5 = (x5 << 7) | (x5 >> 25)
+
+		// quarterround(x, 2, 6, 10, 14)
+		x2 += x6
+		x14 ^= x2
+		x14 = (x14 << 16) | (x14 >> 16)
+		x10 += x14
+		x6 ^= x10
+		x6 = (x6 << 12) | (x6 >> 20)
+		x2 += x6
+		x14 ^= x2
+		x14 = (x14 << 8) | (x14 >> 24)
+		x10 += x14
+		x6 ^= x10
+		x6 = (x6 << 7) | (x6 >> 25)
+
+		// quarterround(x, 3, 7, 11, 15)
+		x3 += x7
+		x15 ^= x3
+		x15 = (x15 << 16) | (x15 >> 16)
+		x11 += x15
+		x7 ^= x11
+		x7 = (x7 << 12) | (x7 >> 20)
+		x3 += x7
+		x15 ^= x3
+		x15 = (x15 << 8) | (x15 >> 24)
+		x11 += x15
+		x7 ^= x11
+		x7 = (x7 << 7) | (x7 >> 25)
+
+		// quarterround(x, 0, 5, 10, 15)
+		x0 += x5
+		x15 ^= x0
+		x15 = (x15 << 16) | (x15 >> 16)
+		x10 += x15
+		x5 ^= x10
+		x5 = (x5 << 12) | (x5 >> 20)
+		x0 += x5
+		x15 ^= x0
+		x15 = (x15 << 8) | (x15 >> 24)
+		x10 += x15
+		x5 ^= x10
+		x5 = (x5 << 7) | (x5 >> 25)
+
+		// quarterround(x, 1, 6, 11, 12)
+		x1 += x6
+		x12 ^= x1
+		x12 = (x12 << 16) | (x12 >> 16)
+		x11 += x12
+		x6 ^= x11
+		x6 = (x6 << 12) | (x6 >> 20)
+		x1 += x6
+		x12 ^= x1
+		x12 = (x12 << 8) | (x12 >> 24)
+		x11 += x12
+		x6 ^= x11
+		x6 = (x6 << 7) | (x6 >> 25)
+
+		// quarterround(x, 2, 7, 8, 13)
+		x2 += x7
+		x13 ^= x2
+		x13 = (x13 << 16) | (x13 >> 16)
+		x8 += x13
+		x7 ^= x8
+		x7 = (x7 << 12) | (x7 >> 20)
+		x2 += x7
+		x13 ^= x2
+		x13 = (x13 << 8) | (x13 >> 24)
+		x8 += x13
+		x7 ^= x8
+		x7 = (x7 << 7) | (x7 >> 25)
+
+		// quarterround(x, 3, 4, 9, 14)
+		x3 += x4
+		x14 ^= x3
+		x14 = (x14 << 16) | (x14 >> 16)
+		x9 += x14
+		x4 ^= x9
+		x4 = (x4 << 12) | (x4 >> 20)
+		x3 += x4
+		x14 ^= x3
+		x14 = (x14 << 8) | (x14 >> 24)
+		x9 += x14
+		x4 ^= x9
+		x4 = (x4 << 7) | (x4 >> 25)
+	}
+
+	// HChaCha returns x0...x3 | x12...x15, which corresponds to the
+	// indexes of the ChaCha constant and the indexes of the IV.
+	if useUnsafe {
+		outArr := (*[16]uint32)(unsafe.Pointer(&out[0]))
+		outArr[0] = x0
+		outArr[1] = x1
+		outArr[2] = x2
+		outArr[3] = x3
+		outArr[4] = x12
+		outArr[5] = x13
+		outArr[6] = x14
+		outArr[7] = x15
+	} else {
+		binary.LittleEndian.PutUint32(out[0:4], x0)
+		binary.LittleEndian.PutUint32(out[4:8], x1)
+		binary.LittleEndian.PutUint32(out[8:12], x2)
+		binary.LittleEndian.PutUint32(out[12:16], x3)
+		binary.LittleEndian.PutUint32(out[16:20], x12)
+		binary.LittleEndian.PutUint32(out[20:24], x13)
+		binary.LittleEndian.PutUint32(out[24:28], x14)
+		binary.LittleEndian.PutUint32(out[28:32], x15)
+	}
+	return
+}

+ 5 - 1
chacha20_test.go

@@ -404,7 +404,7 @@ func TestChaCha20VectorizedIncremental(t *testing.T) {
 	}()
 
 	const (
-		maxBlocks = 128
+		maxBlocks = 256
 		testSz    = (maxBlocks * (maxBlocks + 1) / 2) * BlockSize
 	)
 
@@ -489,6 +489,10 @@ func BenchmarkChaCha20_256(b *testing.B) {
 	doBenchN(b, 256)
 }
 
+func BenchmarkChaCha20_384(b *testing.B) {
+	doBenchN(b, 384)
+}
+
 func BenchmarkChaCha20_512(b *testing.B) {
 	doBenchN(b, 512)
 }