round_bitsliced32.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. // round_bitsliced32.go - 32 bit Constant time AES round function.
  2. //
  3. // To the extent possible under law, Yawning Angel has waived all copyright
  4. // and related or neighboring rights to aez, using the Creative
  5. // Commons "CC0" public domain dedication. See LICENSE or
  6. // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
  7. package aez
  8. import "git.schwanenlied.me/yawning/bsaes.git/ct32"
  9. type roundB32 struct {
  10. skey [32]uint32 // I, J, L, 0
  11. }
  12. func newRoundB32(extractedKey *[extractedKeySize]byte) aesImpl {
  13. r := new(roundB32)
  14. for i := 0; i < 3; i++ {
  15. ct32.RkeyOrtho(r.skey[i*8:], extractedKey[i*16:])
  16. }
  17. return r
  18. }
  19. func (r *roundB32) Reset() {
  20. memwipeU32(r.skey[:])
  21. }
  22. func (r *roundB32) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
  23. var q [8]uint32
  24. xorBytes4x16(j[:], i[:], l[:], src, dst[:])
  25. ct32.Load4xU32(&q, dst[:])
  26. r.round(&q, r.skey[8:]) // J
  27. r.round(&q, r.skey[0:]) // I
  28. r.round(&q, r.skey[16:]) // L
  29. r.round(&q, r.skey[24:]) // zero
  30. ct32.Store4xU32(dst[:], &q)
  31. memwipeU32(q[:])
  32. }
  33. func (r *roundB32) aes4x2(
  34. j0, i0, l0 *[blockSize]byte, src0 []byte, dst0 *[blockSize]byte,
  35. j1, i1, l1 *[blockSize]byte, src1 []byte, dst1 *[blockSize]byte) {
  36. // XXX/performance: Fairly sure i, src, and dst are the only things
  37. // that are ever different here so XORs can be pruned.
  38. var q [8]uint32
  39. xorBytes4x16(j0[:], i0[:], l0[:], src0, dst0[:])
  40. xorBytes4x16(j1[:], i1[:], l1[:], src1, dst1[:])
  41. ct32.Load8xU32(&q, dst0[:], dst1[:])
  42. r.round(&q, r.skey[8:]) // J
  43. r.round(&q, r.skey[0:]) // I
  44. r.round(&q, r.skey[16:]) // L
  45. r.round(&q, r.skey[24:]) // zero
  46. ct32.Store8xU32(dst0[:], dst1[:], &q)
  47. memwipeU32(q[:])
  48. }
  49. func (r *roundB32) AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
  50. var q [8]uint32
  51. xorBytes1x16(src, l[:], dst[:])
  52. ct32.Load4xU32(&q, dst[:])
  53. for i := 0; i < 3; i++ {
  54. r.round(&q, r.skey[0:]) // I
  55. r.round(&q, r.skey[8:]) // J
  56. r.round(&q, r.skey[16:]) // L
  57. }
  58. r.round(&q, r.skey[0:]) // I
  59. ct32.Store4xU32(dst[:], &q)
  60. memwipeU32(q[:])
  61. }
  62. func (r *roundB32) round(q *[8]uint32, k []uint32) {
  63. ct32.Sbox(q)
  64. ct32.ShiftRows(q)
  65. ct32.MixColumns(q)
  66. ct32.AddRoundKey(q, k)
  67. }
  68. func (r *roundB32) aezCorePass1(e *eState, in, out []byte, X *[blockSize]byte, sz int) {
  69. var tmp0, tmp1, I [blockSize]byte
  70. copy(I[:], e.I[1][:])
  71. i := 1
  72. // Process 4 * 16 bytes at a time in a loop.
  73. for sz >= 4*blockSize {
  74. r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], in[blockSize:], &tmp0,
  75. &e.J[0], &I, &e.L[(i+1)%8], in[blockSize*3:], &tmp1) // E(1,i), E(1,i+1)
  76. xorBytes1x16(in[:], tmp0[:], out[:])
  77. xorBytes1x16(in[blockSize*2:], tmp1[:], out[blockSize*2:])
  78. r.aes4x2(&zero, &e.I[0], &e.L[0], out[:], &tmp0,
  79. &zero, &e.I[0], &e.L[0], out[blockSize*2:], &tmp1) // E(0,0), E(0,0)
  80. xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:])
  81. xorBytes1x16(in[blockSize*3:], tmp1[:], out[blockSize*3:])
  82. xorBytes1x16(out[blockSize:], X[:], X[:])
  83. xorBytes1x16(out[blockSize*3:], X[:], X[:])
  84. sz -= 4 * blockSize
  85. in, out = in[64:], out[64:]
  86. if (i+1)%8 == 0 {
  87. doubleBlock(&I)
  88. }
  89. i += 2
  90. }
  91. if sz > 0 {
  92. r.AES4(&e.J[0], &I, &e.L[i%8], in[blockSize:], &tmp0) // E(1,i)
  93. xorBytes1x16(in[:], tmp0[:], out[:])
  94. r.AES4(&zero, &e.I[0], &e.L[0], out[:], &tmp0) // E(0,0)
  95. xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:])
  96. xorBytes1x16(out[blockSize:], X[:], X[:])
  97. }
  98. memwipe(tmp0[:])
  99. memwipe(tmp1[:])
  100. memwipe(I[:])
  101. }
  102. func (r *roundB32) aezCorePass2(e *eState, out []byte, Y, S *[blockSize]byte, sz int) {
  103. var tmp0, tmp1, I [blockSize]byte
  104. copy(I[:], e.I[1][:])
  105. i := 1
  106. // Process 4 * 16 bytes at a time in a loop.
  107. for sz >= 4*blockSize {
  108. r.aes4x2(&e.J[1], &I, &e.L[(i+0)%8], S[:], &tmp0,
  109. &e.J[1], &I, &e.L[(i+1)%8], S[:], &tmp1) // E(2,i)
  110. xorBytes1x16(out, tmp0[:], out[:])
  111. xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:])
  112. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  113. xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:])
  114. xorBytes1x16(out, Y[:], Y[:])
  115. xorBytes1x16(out[blockSize*2:], Y[:], Y[:])
  116. r.aes4x2(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0,
  117. &zero, &e.I[0], &e.L[0], out[blockSize*3:], &tmp1) // E(0,0)
  118. xorBytes1x16(out, tmp0[:], out[:])
  119. xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:])
  120. r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], out[:], &tmp0,
  121. &e.J[0], &I, &e.L[(i+1)%8], out[blockSize*2:], &tmp1) // E(1,i)
  122. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  123. xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:])
  124. swapBlocks(&tmp0, out)
  125. swapBlocks(&tmp0, out[blockSize*2:])
  126. sz -= 4 * blockSize
  127. out = out[64:]
  128. if (i+1)%8 == 0 {
  129. doubleBlock(&I)
  130. }
  131. i += 2
  132. }
  133. if sz > 0 {
  134. r.AES4(&e.J[1], &I, &e.L[i%8], S[:], &tmp0) // E(2,i)
  135. xorBytes1x16(out, tmp0[:], out[:])
  136. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  137. xorBytes1x16(out, Y[:], Y[:])
  138. r.AES4(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0) // E(0,0)
  139. xorBytes1x16(out, tmp0[:], out[:])
  140. r.AES4(&e.J[0], &I, &e.L[i%8], out[:], &tmp0) // E(1,i)
  141. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  142. swapBlocks(&tmp0, out)
  143. }
  144. memwipe(tmp0[:])
  145. memwipe(tmp1[:])
  146. memwipe(I[:])
  147. }
  148. func memwipeU32(b []uint32) {
  149. for i := range b {
  150. b[i] = 0
  151. }
  152. }