round_bitsliced32.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. // round_bitsliced32.go - 32 bit Constant time AES round function.
  2. //
  3. // To the extent possible under law, Yawning Angel has waived all copyright
  4. // and related or neighboring rights to aez, using the Creative
  5. // Commons "CC0" public domain dedication. See LICENSE or
  6. // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
  7. package aez
  8. import "git.schwanenlied.me/yawning/bsaes.git/ct32"
  9. type roundB32 struct {
  10. ct32.Impl32
  11. skey [32]uint32 // I, J, L, 0
  12. }
  13. func newRoundB32(extractedKey *[extractedKeySize]byte) aesImpl {
  14. r := new(roundB32)
  15. for i := 0; i < 3; i++ {
  16. r.RkeyOrtho(r.skey[i*8:], extractedKey[i*16:])
  17. }
  18. return r
  19. }
  20. func (r *roundB32) Reset() {
  21. memwipeU32(r.skey[:])
  22. }
  23. func (r *roundB32) AES4(j, i, l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
  24. var q [8]uint32
  25. xorBytes4x16(j[:], i[:], l[:], src, dst[:])
  26. r.Load4xU32(&q, dst[:])
  27. r.round(&q, r.skey[8:]) // J
  28. r.round(&q, r.skey[0:]) // I
  29. r.round(&q, r.skey[16:]) // L
  30. r.round(&q, r.skey[24:]) // zero
  31. r.Store4xU32(dst[:], &q)
  32. memwipeU32(q[:])
  33. }
  34. func (r *roundB32) aes4x2(
  35. j0, i0, l0 *[blockSize]byte, src0 []byte, dst0 *[blockSize]byte,
  36. j1, i1, l1 *[blockSize]byte, src1 []byte, dst1 *[blockSize]byte) {
  37. // XXX/performance: Fairly sure i, src, and dst are the only things
  38. // that are ever different here so XORs can be pruned.
  39. var q [8]uint32
  40. xorBytes4x16(j0[:], i0[:], l0[:], src0, dst0[:])
  41. xorBytes4x16(j1[:], i1[:], l1[:], src1, dst1[:])
  42. r.Load8xU32(&q, dst0[:], dst1[:])
  43. r.round(&q, r.skey[8:]) // J
  44. r.round(&q, r.skey[0:]) // I
  45. r.round(&q, r.skey[16:]) // L
  46. r.round(&q, r.skey[24:]) // zero
  47. r.Store8xU32(dst0[:], dst1[:], &q)
  48. memwipeU32(q[:])
  49. }
  50. func (r *roundB32) AES10(l *[blockSize]byte, src []byte, dst *[blockSize]byte) {
  51. var q [8]uint32
  52. xorBytes1x16(src, l[:], dst[:])
  53. r.Load4xU32(&q, dst[:])
  54. for i := 0; i < 3; i++ {
  55. r.round(&q, r.skey[0:]) // I
  56. r.round(&q, r.skey[8:]) // J
  57. r.round(&q, r.skey[16:]) // L
  58. }
  59. r.round(&q, r.skey[0:]) // I
  60. r.Store4xU32(dst[:], &q)
  61. memwipeU32(q[:])
  62. }
  63. func (r *roundB32) round(q *[8]uint32, k []uint32) {
  64. r.Sbox(q)
  65. r.ShiftRows(q)
  66. r.MixColumns(q)
  67. r.AddRoundKey(q, k)
  68. }
  69. func (r *roundB32) aezCorePass1(e *eState, in, out []byte, X *[blockSize]byte, sz int) {
  70. var tmp0, tmp1, I [blockSize]byte
  71. copy(I[:], e.I[1][:])
  72. i := 1
  73. // Process 4 * 16 bytes at a time in a loop.
  74. for sz >= 4*blockSize {
  75. r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], in[blockSize:], &tmp0,
  76. &e.J[0], &I, &e.L[(i+1)%8], in[blockSize*3:], &tmp1) // E(1,i), E(1,i+1)
  77. xorBytes1x16(in[:], tmp0[:], out[:])
  78. xorBytes1x16(in[blockSize*2:], tmp1[:], out[blockSize*2:])
  79. r.aes4x2(&zero, &e.I[0], &e.L[0], out[:], &tmp0,
  80. &zero, &e.I[0], &e.L[0], out[blockSize*2:], &tmp1) // E(0,0), E(0,0)
  81. xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:])
  82. xorBytes1x16(in[blockSize*3:], tmp1[:], out[blockSize*3:])
  83. xorBytes1x16(out[blockSize:], X[:], X[:])
  84. xorBytes1x16(out[blockSize*3:], X[:], X[:])
  85. sz -= 4 * blockSize
  86. in, out = in[64:], out[64:]
  87. if (i+1)%8 == 0 {
  88. doubleBlock(&I)
  89. }
  90. i += 2
  91. }
  92. if sz > 0 {
  93. r.AES4(&e.J[0], &I, &e.L[i%8], in[blockSize:], &tmp0) // E(1,i)
  94. xorBytes1x16(in[:], tmp0[:], out[:])
  95. r.AES4(&zero, &e.I[0], &e.L[0], out[:], &tmp0) // E(0,0)
  96. xorBytes1x16(in[blockSize:], tmp0[:], out[blockSize:])
  97. xorBytes1x16(out[blockSize:], X[:], X[:])
  98. }
  99. memwipe(tmp0[:])
  100. memwipe(tmp1[:])
  101. memwipe(I[:])
  102. }
  103. func (r *roundB32) aezCorePass2(e *eState, out []byte, Y, S *[blockSize]byte, sz int) {
  104. var tmp0, tmp1, I [blockSize]byte
  105. copy(I[:], e.I[1][:])
  106. i := 1
  107. // Process 4 * 16 bytes at a time in a loop.
  108. for sz >= 4*blockSize {
  109. r.aes4x2(&e.J[1], &I, &e.L[(i+0)%8], S[:], &tmp0,
  110. &e.J[1], &I, &e.L[(i+1)%8], S[:], &tmp1) // E(2,i)
  111. xorBytes1x16(out, tmp0[:], out[:])
  112. xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:])
  113. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  114. xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:])
  115. xorBytes1x16(out, Y[:], Y[:])
  116. xorBytes1x16(out[blockSize*2:], Y[:], Y[:])
  117. r.aes4x2(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0,
  118. &zero, &e.I[0], &e.L[0], out[blockSize*3:], &tmp1) // E(0,0)
  119. xorBytes1x16(out, tmp0[:], out[:])
  120. xorBytes1x16(out[blockSize*2:], tmp1[:], out[blockSize*2:])
  121. r.aes4x2(&e.J[0], &I, &e.L[(i+0)%8], out[:], &tmp0,
  122. &e.J[0], &I, &e.L[(i+1)%8], out[blockSize*2:], &tmp1) // E(1,i)
  123. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  124. xorBytes1x16(out[blockSize*3:], tmp1[:], out[blockSize*3:])
  125. copy(tmp0[:], out[:])
  126. copy(tmp1[:], out[blockSize*2:])
  127. copy(out[:blockSize], out[blockSize:])
  128. copy(out[blockSize*2:blockSize*3], out[blockSize*3:])
  129. copy(out[blockSize:], tmp0[:])
  130. copy(out[blockSize*3:], tmp1[:])
  131. sz -= 4 * blockSize
  132. out = out[64:]
  133. if (i+1)%8 == 0 {
  134. doubleBlock(&I)
  135. }
  136. i += 2
  137. }
  138. if sz > 0 {
  139. r.AES4(&e.J[1], &I, &e.L[i%8], S[:], &tmp0) // E(2,i)
  140. xorBytes1x16(out, tmp0[:], out[:])
  141. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  142. xorBytes1x16(out, Y[:], Y[:])
  143. r.AES4(&zero, &e.I[0], &e.L[0], out[blockSize:], &tmp0) // E(0,0)
  144. xorBytes1x16(out, tmp0[:], out[:])
  145. r.AES4(&e.J[0], &I, &e.L[i%8], out[:], &tmp0) // E(1,i)
  146. xorBytes1x16(out[blockSize:], tmp0[:], out[blockSize:])
  147. copy(tmp0[:], out[:])
  148. copy(out[:blockSize], out[blockSize:])
  149. copy(out[blockSize:], tmp0[:])
  150. }
  151. memwipe(tmp0[:])
  152. memwipe(tmp1[:])
  153. memwipe(I[:])
  154. }
  155. func memwipeU32(b []uint32) {
  156. for i := range b {
  157. b[i] = 0
  158. }
  159. }