Browse Source

Purge the XMM registers as part of per-call cleanup.

Yawning Angel 2 years ago
parent
commit
69751ad09f
3 changed files with 43 additions and 18 deletions
  1. 4 0
      aez_amd64.go
  2. 19 12
      aez_amd64.py
  3. 20 6
      aez_amd64.s

+ 4 - 0
aez_amd64.go

@@ -15,6 +15,9 @@ var useAESNI = false
 func cpuidAMD64(cpuidParams *uint32)
 
 //go:noescape
+func resetAMD64SSE2()
+
+//go:noescape
 func xorBytes1x16AMD64SSE2(a, b, dst *byte)
 
 //go:noescape
@@ -46,6 +49,7 @@ type roundAESNI struct {
 
 func (r *roundAESNI) Reset() {
 	memwipe(r.keys[:])
+	resetAMD64SSE2()
 }
 
 func (r *roundAESNI) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) {

+ 19 - 12
aez_amd64.py

@@ -32,6 +32,25 @@ with Function("cpuidAMD64", (cpuidParams,)):
 
     RETURN()
 
+with Function("resetAMD64SSE2", ()):
+    PXOR(registers.xmm0, registers.xmm0)
+    PXOR(registers.xmm1, registers.xmm1)
+    PXOR(registers.xmm2, registers.xmm2)
+    PXOR(registers.xmm3, registers.xmm3)
+    PXOR(registers.xmm4, registers.xmm4)
+    PXOR(registers.xmm5, registers.xmm5)
+    PXOR(registers.xmm6, registers.xmm6)
+    PXOR(registers.xmm7, registers.xmm7)
+    PXOR(registers.xmm8, registers.xmm8)
+    PXOR(registers.xmm9, registers.xmm9)
+    PXOR(registers.xmm10, registers.xmm10)
+    PXOR(registers.xmm11, registers.xmm10)
+    PXOR(registers.xmm12, registers.xmm12)
+    PXOR(registers.xmm13, registers.xmm13)
+    PXOR(registers.xmm14, registers.xmm14)
+    PXOR(registers.xmm15, registers.xmm15)
+    RETURN()
+
 a = Argument(ptr(const_uint8_t))
 b = Argument(ptr(const_uint8_t))
 c = Argument(ptr(const_uint8_t))
@@ -211,10 +230,6 @@ with Function("aezAES4AMD64AESNI", (j, i, l, k, src, dst), target=uarch.zen):
 
     MOVDQU([reg_dst], xmm_state)
 
-    PXOR(xmm_i, xmm_i)
-    PXOR(xmm_j, xmm_j)
-    PXOR(xmm_l, xmm_l)
-
     RETURN()
 
 with Function("aezAES10AMD64AESNI", (l, k, src, dst), target=uarch.zen):
@@ -250,10 +265,6 @@ with Function("aezAES10AMD64AESNI", (l, k, src, dst), target=uarch.zen):
 
     MOVDQU([reg_dst], xmm_state)
 
-    PXOR(xmm_i, xmm_i)
-    PXOR(xmm_j, xmm_j)
-    PXOR(xmm_l, xmm_l)
-
     RETURN()
 
 def doubleBlock(blk, tmp0, tmp1, c):
@@ -668,8 +679,6 @@ with Function("aezCorePass1AMD64AESNI", (src, dst, x, i, l, k, consts, sz), targ
     # Write back X.
     MOVDQU([reg_x], xmm_x)
 
-    # Clear all the XMM registers. XXX: move to a defer()
-
     RETURN()
 
 y = Argument(ptr(uint8_t))
@@ -1262,6 +1271,4 @@ with Function("aezCorePass2AMD64AESNI", (dst, y, s, j, i, l, k, consts, sz), tar
     # Write back Y.
     MOVDQU([reg_y], xmm_y)
 
-    # Clear all the XMM registers. XXX: move to a defer()
-
     RETURN()

+ 20 - 6
aez_amd64.s

@@ -14,6 +14,26 @@ TEXT ·cpuidAMD64(SB),4,$0-8
 	MOVL DX, 12(R15)
 	RET
 
+// func resetAMD64SSE2()
+TEXT ·resetAMD64SSE2(SB),4,$0
+	PXOR X0, X0
+	PXOR X1, X1
+	PXOR X2, X2
+	PXOR X3, X3
+	PXOR X4, X4
+	PXOR X5, X5
+	PXOR X6, X6
+	PXOR X7, X7
+	PXOR X8, X8
+	PXOR X9, X9
+	PXOR X10, X10
+	PXOR X10, X11
+	PXOR X12, X12
+	PXOR X13, X13
+	PXOR X14, X14
+	PXOR X15, X15
+	RET
+
 // func xorBytes1x16AMD64SSE2(a *uint8, b *uint8, dst *uint8)
 TEXT ·xorBytes1x16AMD64SSE2(SB),4,$0-24
 	MOVQ a+0(FP), AX
@@ -66,9 +86,6 @@ TEXT ·aezAES4AMD64AESNI(SB),4,$0-48
 	AESENC X3, X0
 	AESENC X4, X0
 	MOVOU X0, 0(SI)
-	PXOR X2, X2
-	PXOR X1, X1
-	PXOR X3, X3
 	RET
 
 // func aezAES10AMD64AESNI(l *uint8, k *uint8, src *uint8, dst *uint8)
@@ -94,9 +111,6 @@ TEXT ·aezAES10AMD64AESNI(SB),4,$0-32
 	AESENC X1, X0
 	AESENC X2, X0
 	MOVOU X0, 0(DX)
-	PXOR X2, X2
-	PXOR X3, X3
-	PXOR X1, X1
 	RET
 
 // func aezCorePass1AMD64AESNI(src *uint8, dst *uint8, x *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint)