Browse Source

Initial import.

Slow (~5ms on a i5-4250U), but it passes the test vectors supplied with
the code.

TODO:
 * Test against the vectors in the IETF draft if different.
 * Optimize so that performance isn't abysimaterribad.
   The C code for reference clocks in at:
    * 1.9 ms - 32 bit limbs, no unrolling
    * 648 usec - 32 bit limbs, unrolled
    * 413 usec - 64 bit limbs, unrolled

   Without busting out PeachPy I think I can get < 1 ms even in pure Go.
Yawning Angel 4 years ago
commit
93f5ae59ef
5 changed files with 666 additions and 0 deletions
  1. 23 0
      LICENSE.txt
  2. 13 0
      README.md
  3. 115 0
      x448.go
  4. 299 0
      x448_ref.go
  5. 216 0
      x448_test.go

+ 23 - 0
LICENSE.txt

@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2011 Stanford University.
+Copyright (c) 2014-2015 Cryptography Research, Inc.
+Copyright (c) 2015 Yawning Angel.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

+ 13 - 0
README.md

@@ -0,0 +1,13 @@
+### x448 - curve448 ECDH
+#### Yawning Angel (yawning at schwanenlied dot me)
+
+A straight forward port of Michael Hamburg's x448 code to Go lang.
+
+See: https://tools.ietf.org/html/draft-irtf-cfrg-curves-11
+
+If you're familiar with how to use golang.org/x/crypto/curve25519, you will be
+right at home with using x448, since the functions are the same.  Generate a
+random secret key, ScalarBaseMult() to get the public key, etc etc etc.
+
+Both routines return 0 on success, -1 on failure which MUST be checked, and
+the handshake aborted on failure.

+ 115 - 0
x448.go

@@ -0,0 +1,115 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2011 Stanford University.
+// Copyright (c) 2014-2015 Cryptography Research, Inc.
+// Copyright (c) 2015 Yawning Angel.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// Package x448 provides an implementation of scalar multiplication on the
+// elliptic curve known as curve448.
+//
+// See https://tools.ietf.org/html/draft-irtf-cfrg-curves-11
+package x448 // import "git.schwanenlied.me/yawning/x448"
+
+const (
+	x448Bytes = 56
+	edwardsD  = -39081
+)
+
+var basePoint = [56]byte{
+	5, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+}
+
+func ScalarMult(out, scalar, base *[56]byte) int {
+	var x1, x2, z2, x3, z3, t1, t2 gf
+	x1.deser(base)
+	x2.cpy(&one)
+	z2.cpy(&zero)
+	x3.cpy(&x1)
+	z3.cpy(&one)
+
+	var swap limbUint
+
+	for t := int(448 - 1); t >= 0; t-- {
+		sb := scalar[t/8]
+
+		// Scalar conditioning.
+		if t/8 == 0 {
+			sb &= 0xFC
+		} else if t/8 == x448Bytes-1 {
+			sb |= 0x80
+		}
+
+		kT := (limbUint)((sb >> ((uint)(t) % 8)) & 1)
+		kT = -kT // Set to all 0s or all 1s
+
+		swap ^= kT
+		x2.condSwap(&x3, swap)
+		z2.condSwap(&z3, swap)
+		swap = kT
+
+		t1.add(&x2, &z2) // A = x2 + z2
+		t2.sub(&x2, &z2) // B = x2 - z2
+		z2.sub(&x3, &z3) // D = x3 - z3
+		x2.mul(&t1, &z2) // DA
+		z2.add(&z3, &x3) // C = x3 + z3
+		x3.mul(&t2, &z2) // CB
+		z3.sub(&x2, &x3) // DA-CB
+		z2.sqr(&z3)      // (DA-CB)^2
+		z3.mul(&x1, &z2) // z3 = x1(DA-CB)^2
+		z2.add(&x2, &x3) // (DA+CB)
+		x3.sqr(&z2)      // x3 = (DA+CB)^2
+
+		z2.sqr(&t1)      // AA = A^2
+		t1.sqr(&t2)      // BB = B^2
+		x2.mul(&z2, &t1) // x2 = AA*BB
+		t2.sub(&z2, &t1) // E = AA-BB
+
+		t1.mlw(&t2, -edwardsD) // E*-d = a24*E
+		t1.add(&t1, &z2)       // AA + a24*E
+		z2.mul(&t2, &t1)       // z2 = E(AA+a24*E)
+	}
+
+	// Finish
+	x2.condSwap(&x3, swap)
+	z2.condSwap(&x3, swap)
+	z2.inv(&z2)
+	x1.mul(&x2, &z2)
+	x1.ser(out)
+
+	// As with X25519, both sides MUST check, without leaking extra
+	// information about the value of K, whether the resulting shared K is
+	// the all-zero value and abort if so.
+	var nz limbSint
+	for _, v := range out {
+		nz |= (limbSint)(v)
+	}
+	nz = (nz - 1) >> 8 // 0 = succ, -1 = fail
+
+	// return value: 0 = succ, -1 = fail
+	return (int)(nz)
+}
+
+func ScalarBaseMult(out, scalar *[56]byte) int {
+	return ScalarMult(out, scalar, &basePoint)
+}

+ 299 - 0
x448_ref.go

@@ -0,0 +1,299 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2011 Stanford University.
+// Copyright (c) 2014-2015 Cryptography Research, Inc.
+// Copyright (c) 2015 Yawning Angel.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package x448
+
+// This should really use 64 bit limbs, but Go is fucking retarded and doesn't
+// have __(u)int128_t, so the 32 bit code it is, at a hefty performance
+// penalty.  Fuck my life, I'm going to have to bust out PeachPy to get this
+// to go fast aren't I.
+//
+// This is equivalent to the non-unrolled reference code, though the compiler
+// is free to unroll as it sees fit.  If performance is horrendous I'll
+// manually unroll things.
+
+const (
+	wBits     = 32
+	lBits     = (wBits * 7 / 8)
+	x448Limbs = (448 / lBits)
+	lMask     = (1 << lBits) - 1
+)
+
+type limbUint uint32
+type limbSint int32
+
+type gf struct {
+	limb [x448Limbs]uint32
+}
+
+var zero = gf{[x448Limbs]uint32{0}}
+var one = gf{[x448Limbs]uint32{1}}
+var p = gf{[x448Limbs]uint32{
+	lMask, lMask, lMask, lMask, lMask, lMask, lMask, lMask,
+	lMask - 1, lMask, lMask, lMask, lMask, lMask, lMask, lMask,
+}}
+
+// cpy copies x = y.
+func (x *gf) cpy(y *gf) {
+	for i, v := range y.limb { // XXX: Unroll
+		x.limb[i] = v
+	}
+}
+
+// mul multiplies c = a * b. (PERF)
+func (c *gf) mul(a, b *gf) {
+	var aa gf
+	aa.cpy(a)
+
+	var accum [x448Limbs]uint64
+	for i, bv := range b.limb { // XXX: Unroll
+		for j, aav := range aa.limb { // XXX: Unroll
+			accum[(i+j)%x448Limbs] += (uint64)(bv) * (uint64)(aav)
+		}
+		aa.limb[(x448Limbs-1-i)^(x448Limbs/2)] += aa.limb[x448Limbs-1-i]
+	}
+
+	accum[x448Limbs-1] += accum[x448Limbs-2] >> lBits
+	accum[x448Limbs-2] &= lMask
+	accum[x448Limbs/2] += accum[x448Limbs-1] >> lBits
+	for j := uint(0); j < x448Limbs; j++ { // XXX: Unroll
+		accum[j] += accum[(j-1)%x448Limbs] >> lBits
+		accum[(j-1)%x448Limbs] &= lMask
+	}
+	for j, accv := range accum { // XXX: Unroll
+		c.limb[j] = (uint32)(accv)
+	}
+}
+
+// sqr squares (c = x * x).  Just calls multiply. (PERF)
+func (c *gf) sqr(x *gf) {
+	c.mul(x, x)
+}
+
+// isqrt inverse square roots (y = 1/sqrt(x)), using an addition chain.
+func (y *gf) isqrt(x *gf) {
+	var a, b, c gf
+	c.sqr(x)
+
+	// XXX/Yawning, could unroll, but this is called only once.
+
+	// STEP(b,x,1);
+	b.mul(x, &c)
+	c.cpy(&b)
+	for i := 0; i < 1; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(b,x,3);
+	b.mul(x, &c)
+	c.cpy(&b)
+	for i := 0; i < 3; i++ {
+		c.sqr(&c)
+	}
+
+	//STEP(a,b,3);
+	a.mul(&b, &c)
+	c.cpy(&a)
+	for i := 0; i < 3; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(a,b,9);
+	a.mul(&b, &c)
+	c.cpy(&a)
+	for i := 0; i < 9; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(b,a,1);
+	b.mul(&a, &c)
+	c.cpy(&b)
+	for i := 0; i < 1; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(a,x,18);
+	a.mul(x, &c)
+	c.cpy(&a)
+	for i := 0; i < 18; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(a,b,37);
+	a.mul(&b, &c)
+	c.cpy(&a)
+	for i := 0; i < 37; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(b,a,37);
+	b.mul(&a, &c)
+	c.cpy(&b)
+	for i := 0; i < 37; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(b,a,111);
+	b.mul(&a, &c)
+	c.cpy(&b)
+	for i := 0; i < 111; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(a,b,1);
+	a.mul(&b, &c)
+	c.cpy(&a)
+	for i := 0; i < 1; i++ {
+		c.sqr(&c)
+	}
+
+	// STEP(b,x,223);
+	b.mul(x, &c)
+	c.cpy(&b)
+	for i := 0; i < 223; i++ {
+		c.sqr(&c)
+	}
+
+	y.mul(&a, &c)
+}
+
+// inv inverses (y = 1/x).
+func (y *gf) inv(x *gf) {
+	var z, w gf
+	z.sqr(x)     // x^2
+	w.isqrt(&z)  // +- 1/sqrt(x^2) = +- 1/x
+	z.sqr(&w)    // 1/x^2
+	w.mul(x, &z) // 1/x
+	y.cpy(&w)
+}
+
+// reduce weakly reduces mod p
+func (x *gf) reduce() {
+	x.limb[x448Limbs/2] += x.limb[x448Limbs-1] >> lBits
+	for j := uint(0); j < x448Limbs; j++ { // XXX: Unroll
+		x.limb[j] += x.limb[(j-1)%x448Limbs] >> lBits
+		x.limb[(j-1)%x448Limbs] &= lMask
+	}
+}
+
+// add adds mod p. Conservatively always weak-reduces. (PERF)
+func (x *gf) add(y, z *gf) {
+	for i, yv := range y.limb { // XXX: Unroll
+		x.limb[i] = yv + z.limb[i]
+	}
+	x.reduce()
+}
+
+// sub subtracts mod p.  Conservatively always weak-reduces. (PERF)
+func (x *gf) sub(y, z *gf) {
+	for i, yv := range y.limb { // XXX: Unroll
+		x.limb[i] = yv - z.limb[i] + 2*p.limb[i]
+	}
+	x.reduce()
+}
+
+// condSwap swaps x and y in constant time.
+func (x *gf) condSwap(y *gf, swap limbUint) {
+	for i, xv := range x.limb { // XXX: Unroll
+		s := (xv ^ y.limb[i]) & (uint32)(swap) // Sort of dumb, oh well.
+		x.limb[i] ^= s
+		y.limb[i] ^= s
+	}
+}
+
+// mlw multiplies by a signed int.  NOT CONSTANT TIME wrt the sign of the int,
+// but that's ok because it's only ever called with w = -edwardsD.  Just uses
+// a full multiply. (PERF)
+func (a *gf) mlw(b *gf, w int) {
+	if w > 0 {
+		ww := gf{[x448Limbs]uint32{(uint32)(w)}}
+		a.mul(b, &ww)
+	} else {
+		// This branch is *NEVER* taken with the current code.
+		panic("mul called with negative w")
+		ww := gf{[x448Limbs]uint32{(uint32)(-w)}}
+		a.mul(b, &ww)
+		a.sub(&zero, a)
+	}
+}
+
+// canon canonicalizes.
+func (a *gf) canon() {
+	a.reduce()
+
+	// Subtract p with borrow.
+	var carry int64
+	for i, v := range a.limb {
+		carry = carry + (int64)(v) - (int64)(p.limb[i])
+		a.limb[i] = (uint32)(carry & lMask)
+		carry >>= lBits
+	}
+
+	addback := carry
+	carry = 0
+
+	// Add it back.
+	for i, v := range a.limb {
+		carry = carry + (int64)(v) + (int64)(p.limb[i]&(uint32)(addback))
+		a.limb[i] = uint32(carry & lMask)
+		carry >>= lBits
+	}
+}
+
+// deser deserializes into the limb representation.
+func (s *gf) deser(ser *[x448Bytes]byte) int64 {
+	var buf uint64
+	bits := uint(0)
+	k := 0
+
+	for i, v := range ser {
+		buf |= (uint64)(v) << bits
+		for bits += 8; (bits >= lBits || i == x448Bytes-1) && k < x448Limbs; bits, buf = bits-lBits, buf>>lBits {
+			s.limb[k] = (uint32)(buf & lMask)
+			k++
+		}
+	}
+
+	// XXX: Return value never used, this can be omitted.
+	var accum int64
+	for i, v := range s.limb {
+		accum = (accum + (int64)(v) - (int64)(p.limb[i])) >> wBits
+	}
+	return accum
+}
+
+// ser serializes into byte representation.
+func (a *gf) ser(ser *[x448Bytes]byte) {
+	a.canon()
+	k := 0
+	bits := uint(0)
+	var buf uint64
+	for i, v := range a.limb {
+		buf |= (uint64)(v) << bits
+		for bits += lBits; (bits >= 8 || i == x448Limbs-1) && k < x448Bytes; bits, buf = bits-8, buf>>8 {
+			ser[k] = (byte)(buf)
+			k++
+		}
+	}
+}

+ 216 - 0
x448_test.go

@@ -0,0 +1,216 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2011 Stanford University.
+// Copyright (c) 2014-2015 Cryptography Research, Inc.
+// Copyright (c) 2015 Yawning Angel.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package x448
+
+import (
+	"bytes"
+	"crypto/rand"
+	"testing"
+)
+
+func TestX448(t *testing.T) {
+	type KATVectors struct {
+		scalar [x448Bytes]byte
+		base   [x448Bytes]byte
+		answer [x448Bytes]byte
+	}
+
+	vectors := []KATVectors{
+		{
+			[x448Bytes]byte{
+				0x3d, 0x26, 0x2f, 0xdd, 0xf9, 0xec, 0x8e, 0x88,
+				0x49, 0x52, 0x66, 0xfe, 0xa1, 0x9a, 0x34, 0xd2,
+				0x88, 0x82, 0xac, 0xef, 0x04, 0x51, 0x04, 0xd0,
+				0xd1, 0xaa, 0xe1, 0x21, 0x70, 0x0a, 0x77, 0x9c,
+				0x98, 0x4c, 0x24, 0xf8, 0xcd, 0xd7, 0x8f, 0xbf,
+				0xf4, 0x49, 0x43, 0xeb, 0xa3, 0x68, 0xf5, 0x4b,
+				0x29, 0x25, 0x9a, 0x4f, 0x1c, 0x60, 0x0a, 0xd3,
+			},
+			[x448Bytes]byte{
+				0x06, 0xfc, 0xe6, 0x40, 0xfa, 0x34, 0x87, 0xbf,
+				0xda, 0x5f, 0x6c, 0xf2, 0xd5, 0x26, 0x3f, 0x8a,
+				0xad, 0x88, 0x33, 0x4c, 0xbd, 0x07, 0x43, 0x7f,
+				0x02, 0x0f, 0x08, 0xf9, 0x81, 0x4d, 0xc0, 0x31,
+				0xdd, 0xbd, 0xc3, 0x8c, 0x19, 0xc6, 0xda, 0x25,
+				0x83, 0xfa, 0x54, 0x29, 0xdb, 0x94, 0xad, 0xa1,
+				0x8a, 0xa7, 0xa7, 0xfb, 0x4e, 0xf8, 0xa0, 0x86,
+			},
+			[x448Bytes]byte{
+				0xce, 0x3e, 0x4f, 0xf9, 0x5a, 0x60, 0xdc, 0x66,
+				0x97, 0xda, 0x1d, 0xb1, 0xd8, 0x5e, 0x6a, 0xfb,
+				0xdf, 0x79, 0xb5, 0x0a, 0x24, 0x12, 0xd7, 0x54,
+				0x6d, 0x5f, 0x23, 0x9f, 0xe1, 0x4f, 0xba, 0xad,
+				0xeb, 0x44, 0x5f, 0xc6, 0x6a, 0x01, 0xb0, 0x77,
+				0x9d, 0x98, 0x22, 0x39, 0x61, 0x11, 0x1e, 0x21,
+				0x76, 0x62, 0x82, 0xf7, 0x3d, 0xd9, 0x6b, 0x6f,
+			},
+		},
+		{
+			[x448Bytes]byte{
+				0x20, 0x3d, 0x49, 0x44, 0x28, 0xb8, 0x39, 0x93,
+				0x52, 0x66, 0x5d, 0xdc, 0xa4, 0x2f, 0x9d, 0xe8,
+				0xfe, 0xf6, 0x00, 0x90, 0x8e, 0x0d, 0x46, 0x1c,
+				0xb0, 0x21, 0xf8, 0xc5, 0x38, 0x34, 0x5d, 0xd7,
+				0x7c, 0x3e, 0x48, 0x06, 0xe2, 0x5f, 0x46, 0xd3,
+				0x31, 0x5c, 0x44, 0xe0, 0xa5, 0xb4, 0x37, 0x12,
+				0x82, 0xdd, 0x2c, 0x8d, 0x5b, 0xe3, 0x09, 0x5f,
+			},
+			[x448Bytes]byte{
+				0x0f, 0xbc, 0xc2, 0xf9, 0x93, 0xcd, 0x56, 0xd3,
+				0x30, 0x5b, 0x0b, 0x7d, 0x9e, 0x55, 0xd4, 0xc1,
+				0xa8, 0xfb, 0x5d, 0xbb, 0x52, 0xf8, 0xe9, 0xa1,
+				0xe9, 0xb6, 0x20, 0x1b, 0x16, 0x5d, 0x01, 0x58,
+				0x94, 0xe5, 0x6c, 0x4d, 0x35, 0x70, 0xbe, 0xe5,
+				0x2f, 0xe2, 0x05, 0xe2, 0x8a, 0x78, 0xb9, 0x1c,
+				0xdf, 0xbd, 0xe7, 0x1c, 0xe8, 0xd1, 0x57, 0xdb,
+			},
+			[x448Bytes]byte{
+				0x88, 0x4a, 0x02, 0x57, 0x62, 0x39, 0xff, 0x7a,
+				0x2f, 0x2f, 0x63, 0xb2, 0xdb, 0x6a, 0x9f, 0xf3,
+				0x70, 0x47, 0xac, 0x13, 0x56, 0x8e, 0x1e, 0x30,
+				0xfe, 0x63, 0xc4, 0xa7, 0xad, 0x1b, 0x3e, 0xe3,
+				0xa5, 0x70, 0x0d, 0xf3, 0x43, 0x21, 0xd6, 0x20,
+				0x77, 0xe6, 0x36, 0x33, 0xc5, 0x75, 0xc1, 0xc9,
+				0x54, 0x51, 0x4e, 0x99, 0xda, 0x7c, 0x17, 0x9d,
+			},
+		},
+	}
+
+	var out [x448Bytes]byte
+	for i, vec := range vectors {
+		ret := ScalarMult(&out, &vec.scalar, &vec.base)
+		if ret != 0 {
+			t.Errorf("KAT[%d]: ScalarMultiply failed", i)
+		}
+		if !bytes.Equal(out[:], vec.answer[:]) {
+			t.Errorf("KAT[%d]: Mismatch", i)
+		}
+	}
+}
+
+func TestCurve448(t *testing.T) {
+	alicePriv := [x448Bytes]byte{
+		0x9a, 0x8f, 0x49, 0x25, 0xd1, 0x51, 0x9f, 0x57,
+		0x75, 0xcf, 0x46, 0xb0, 0x4b, 0x58, 0x00, 0xd4,
+		0xee, 0x9e, 0xe8, 0xba, 0xe8, 0xbc, 0x55, 0x65,
+		0xd4, 0x98, 0xc2, 0x8d, 0xd9, 0xc9, 0xba, 0xf5,
+		0x74, 0xa9, 0x41, 0x97, 0x44, 0x89, 0x73, 0x91,
+		0x00, 0x63, 0x82, 0xa6, 0xf1, 0x27, 0xab, 0x1d,
+		0x9a, 0xc2, 0xd8, 0xc0, 0xa5, 0x98, 0x72, 0x6b,
+	}
+
+	alicePub := [x448Bytes]byte{
+		0x9b, 0x08, 0xf7, 0xcc, 0x31, 0xb7, 0xe3, 0xe6,
+		0x7d, 0x22, 0xd5, 0xae, 0xa1, 0x21, 0x07, 0x4a,
+		0x27, 0x3b, 0xd2, 0xb8, 0x3d, 0xe0, 0x9c, 0x63,
+		0xfa, 0xa7, 0x3d, 0x2c, 0x22, 0xc5, 0xd9, 0xbb,
+		0xc8, 0x36, 0x64, 0x72, 0x41, 0xd9, 0x53, 0xd4,
+		0x0c, 0x5b, 0x12, 0xda, 0x88, 0x12, 0x0d, 0x53,
+		0x17, 0x7f, 0x80, 0xe5, 0x32, 0xc4, 0x1f, 0xa0,
+	}
+
+	bobPriv := [x448Bytes]byte{
+		0x1c, 0x30, 0x6a, 0x7a, 0xc2, 0xa0, 0xe2, 0xe0,
+		0x99, 0x0b, 0x29, 0x44, 0x70, 0xcb, 0xa3, 0x39,
+		0xe6, 0x45, 0x37, 0x72, 0xb0, 0x75, 0x81, 0x1d,
+		0x8f, 0xad, 0x0d, 0x1d, 0x69, 0x27, 0xc1, 0x20,
+		0xbb, 0x5e, 0xe8, 0x97, 0x2b, 0x0d, 0x3e, 0x21,
+		0x37, 0x4c, 0x9c, 0x92, 0x1b, 0x09, 0xd1, 0xb0,
+		0x36, 0x6f, 0x10, 0xb6, 0x51, 0x73, 0x99, 0x2d,
+	}
+
+	bobPub := [x448Bytes]byte{
+		0x3e, 0xb7, 0xa8, 0x29, 0xb0, 0xcd, 0x20, 0xf5,
+		0xbc, 0xfc, 0x0b, 0x59, 0x9b, 0x6f, 0xec, 0xcf,
+		0x6d, 0xa4, 0x62, 0x71, 0x07, 0xbd, 0xb0, 0xd4,
+		0xf3, 0x45, 0xb4, 0x30, 0x27, 0xd8, 0xb9, 0x72,
+		0xfc, 0x3e, 0x34, 0xfb, 0x42, 0x32, 0xa1, 0x3c,
+		0xa7, 0x06, 0xdc, 0xb5, 0x7a, 0xec, 0x3d, 0xae,
+		0x07, 0xbd, 0xc1, 0xc6, 0x7b, 0xf3, 0x36, 0x09,
+	}
+
+	aliceBob := [x448Bytes]byte{
+		0x07, 0xff, 0xf4, 0x18, 0x1a, 0xc6, 0xcc, 0x95,
+		0xec, 0x1c, 0x16, 0xa9, 0x4a, 0x0f, 0x74, 0xd1,
+		0x2d, 0xa2, 0x32, 0xce, 0x40, 0xa7, 0x75, 0x52,
+		0x28, 0x1d, 0x28, 0x2b, 0xb6, 0x0c, 0x0b, 0x56,
+		0xfd, 0x24, 0x64, 0xc3, 0x35, 0x54, 0x39, 0x36,
+		0x52, 0x1c, 0x24, 0x40, 0x30, 0x85, 0xd5, 0x9a,
+		0x44, 0x9a, 0x50, 0x37, 0x51, 0x4a, 0x87, 0x9d,
+	}
+
+	var out [x448Bytes]byte
+	ret := ScalarBaseMult(&out, &alicePriv)
+	if ret != 0 {
+		t.Error("Alice: ScalarBaseMult failed")
+	}
+	if !bytes.Equal(out[:], alicePub[:]) {
+		t.Error("Alice: ScalarBaseMult Mismatch")
+	}
+	ret = ScalarBaseMult(&out, &bobPriv)
+	if ret != 0 {
+		t.Error("Bob: ScalarBaseMult failed")
+	}
+	if !bytes.Equal(out[:], bobPub[:]) {
+		t.Error("Bob: ScalarBaseMult Mismatch")
+	}
+	ret = ScalarMult(&out, &bobPriv, &alicePub)
+	if ret != 0 {
+		t.Error("Bob: ScalarMult failed")
+	}
+	if !bytes.Equal(out[:], aliceBob[:]) {
+		t.Error("Bob: ScalarMult Mismatch")
+	}
+	ret = ScalarMult(&out, &alicePriv, &bobPub)
+	if ret != 0 {
+		t.Error("Alice: ScalarMult failed")
+	}
+	if !bytes.Equal(out[:], aliceBob[:]) {
+		t.Error("Alice: ScalarMult Mismatch")
+	}
+}
+
+func BenchmarkECDH(b *testing.B) {
+	var sa, sb, pa, pb, ab, ba [x448Bytes]byte
+	ret := 0
+
+	rand.Read(sa[:])
+	rand.Read(sb[:])
+	b.ResetTimer()
+	b.StopTimer()
+	for i := 0; i < b.N; i++ {
+		ret |= ScalarBaseMult(&pa, &sa)
+		ret |= ScalarBaseMult(&pb, &sb)
+		b.StartTimer()
+		ret |= ScalarMult(&ab, &sa, &pb)
+		b.StopTimer()
+		ret |= ScalarMult(&ba, &sb, &pa)
+		if !bytes.Equal(ab[:], ba[:]) {
+			b.Fatal("Alice/Bob: Mismatch")
+		}
+		copy(sa[:], pa[:])
+		copy(sb[:], pb[:])
+	}
+}