chacha20_ref_go19.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. // chacha20_ref.go - Reference ChaCha20.
  2. //
  3. // To the extent possible under law, Yawning Angel has waived all copyright
  4. // and related or neighboring rights to chacha20, using the Creative
  5. // Commons "CC0" public domain dedication. See LICENSE or
  6. // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
  7. // +build go1.9
  8. package chacha20
  9. import (
  10. "encoding/binary"
  11. "math"
  12. "math/bits"
  13. "unsafe"
  14. )
  15. func blocksRef(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) {
  16. if isIetf {
  17. var totalBlocks uint64
  18. totalBlocks = uint64(x[12]) + uint64(nrBlocks)
  19. if totalBlocks > math.MaxUint32 {
  20. panic("chacha20: Exceeded keystream per nonce limit")
  21. }
  22. }
  23. // This routine ignores x[0]...x[4] in favor the const values since it's
  24. // ever so slightly faster.
  25. for n := 0; n < nrBlocks; n++ {
  26. x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
  27. x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
  28. for i := chachaRounds; i > 0; i -= 2 {
  29. // quarterround(x, 0, 4, 8, 12)
  30. x0 += x4
  31. x12 ^= x0
  32. x12 = bits.RotateLeft32(x12, 16)
  33. x8 += x12
  34. x4 ^= x8
  35. x4 = bits.RotateLeft32(x4, 12)
  36. x0 += x4
  37. x12 ^= x0
  38. x12 = bits.RotateLeft32(x12, 8)
  39. x8 += x12
  40. x4 ^= x8
  41. x4 = bits.RotateLeft32(x4, 7)
  42. // quarterround(x, 1, 5, 9, 13)
  43. x1 += x5
  44. x13 ^= x1
  45. x13 = bits.RotateLeft32(x13, 16)
  46. x9 += x13
  47. x5 ^= x9
  48. x5 = bits.RotateLeft32(x5, 12)
  49. x1 += x5
  50. x13 ^= x1
  51. x13 = bits.RotateLeft32(x13, 8)
  52. x9 += x13
  53. x5 ^= x9
  54. x5 = bits.RotateLeft32(x5, 7)
  55. // quarterround(x, 2, 6, 10, 14)
  56. x2 += x6
  57. x14 ^= x2
  58. x14 = bits.RotateLeft32(x14, 16)
  59. x10 += x14
  60. x6 ^= x10
  61. x6 = bits.RotateLeft32(x6, 12)
  62. x2 += x6
  63. x14 ^= x2
  64. x14 = bits.RotateLeft32(x14, 8)
  65. x10 += x14
  66. x6 ^= x10
  67. x6 = bits.RotateLeft32(x6, 7)
  68. // quarterround(x, 3, 7, 11, 15)
  69. x3 += x7
  70. x15 ^= x3
  71. x15 = bits.RotateLeft32(x15, 16)
  72. x11 += x15
  73. x7 ^= x11
  74. x7 = bits.RotateLeft32(x7, 12)
  75. x3 += x7
  76. x15 ^= x3
  77. x15 = bits.RotateLeft32(x15, 8)
  78. x11 += x15
  79. x7 ^= x11
  80. x7 = bits.RotateLeft32(x7, 7)
  81. // quarterround(x, 0, 5, 10, 15)
  82. x0 += x5
  83. x15 ^= x0
  84. x15 = bits.RotateLeft32(x15, 16)
  85. x10 += x15
  86. x5 ^= x10
  87. x5 = bits.RotateLeft32(x5, 12)
  88. x0 += x5
  89. x15 ^= x0
  90. x15 = bits.RotateLeft32(x15, 8)
  91. x10 += x15
  92. x5 ^= x10
  93. x5 = bits.RotateLeft32(x5, 7)
  94. // quarterround(x, 1, 6, 11, 12)
  95. x1 += x6
  96. x12 ^= x1
  97. x12 = bits.RotateLeft32(x12, 16)
  98. x11 += x12
  99. x6 ^= x11
  100. x6 = bits.RotateLeft32(x6, 12)
  101. x1 += x6
  102. x12 ^= x1
  103. x12 = bits.RotateLeft32(x12, 8)
  104. x11 += x12
  105. x6 ^= x11
  106. x6 = bits.RotateLeft32(x6, 7)
  107. // quarterround(x, 2, 7, 8, 13)
  108. x2 += x7
  109. x13 ^= x2
  110. x13 = bits.RotateLeft32(x13, 16)
  111. x8 += x13
  112. x7 ^= x8
  113. x7 = bits.RotateLeft32(x7, 12)
  114. x2 += x7
  115. x13 ^= x2
  116. x13 = bits.RotateLeft32(x13, 8)
  117. x8 += x13
  118. x7 ^= x8
  119. x7 = bits.RotateLeft32(x7, 7)
  120. // quarterround(x, 3, 4, 9, 14)
  121. x3 += x4
  122. x14 ^= x3
  123. x14 = bits.RotateLeft32(x14, 16)
  124. x9 += x14
  125. x4 ^= x9
  126. x4 = bits.RotateLeft32(x4, 12)
  127. x3 += x4
  128. x14 ^= x3
  129. x14 = bits.RotateLeft32(x14, 8)
  130. x9 += x14
  131. x4 ^= x9
  132. x4 = bits.RotateLeft32(x4, 7)
  133. }
  134. // On amd64 at least, this is a rather big boost.
  135. if useUnsafe {
  136. if in != nil {
  137. inArr := (*[16]uint32)(unsafe.Pointer(&in[n*BlockSize]))
  138. outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize]))
  139. outArr[0] = inArr[0] ^ (x0 + sigma0)
  140. outArr[1] = inArr[1] ^ (x1 + sigma1)
  141. outArr[2] = inArr[2] ^ (x2 + sigma2)
  142. outArr[3] = inArr[3] ^ (x3 + sigma3)
  143. outArr[4] = inArr[4] ^ (x4 + x[4])
  144. outArr[5] = inArr[5] ^ (x5 + x[5])
  145. outArr[6] = inArr[6] ^ (x6 + x[6])
  146. outArr[7] = inArr[7] ^ (x7 + x[7])
  147. outArr[8] = inArr[8] ^ (x8 + x[8])
  148. outArr[9] = inArr[9] ^ (x9 + x[9])
  149. outArr[10] = inArr[10] ^ (x10 + x[10])
  150. outArr[11] = inArr[11] ^ (x11 + x[11])
  151. outArr[12] = inArr[12] ^ (x12 + x[12])
  152. outArr[13] = inArr[13] ^ (x13 + x[13])
  153. outArr[14] = inArr[14] ^ (x14 + x[14])
  154. outArr[15] = inArr[15] ^ (x15 + x[15])
  155. } else {
  156. outArr := (*[16]uint32)(unsafe.Pointer(&out[n*BlockSize]))
  157. outArr[0] = x0 + sigma0
  158. outArr[1] = x1 + sigma1
  159. outArr[2] = x2 + sigma2
  160. outArr[3] = x3 + sigma3
  161. outArr[4] = x4 + x[4]
  162. outArr[5] = x5 + x[5]
  163. outArr[6] = x6 + x[6]
  164. outArr[7] = x7 + x[7]
  165. outArr[8] = x8 + x[8]
  166. outArr[9] = x9 + x[9]
  167. outArr[10] = x10 + x[10]
  168. outArr[11] = x11 + x[11]
  169. outArr[12] = x12 + x[12]
  170. outArr[13] = x13 + x[13]
  171. outArr[14] = x14 + x[14]
  172. outArr[15] = x15 + x[15]
  173. }
  174. } else {
  175. // Slow path, either the architecture cares about alignment, or is not little endian.
  176. x0 += sigma0
  177. x1 += sigma1
  178. x2 += sigma2
  179. x3 += sigma3
  180. x4 += x[4]
  181. x5 += x[5]
  182. x6 += x[6]
  183. x7 += x[7]
  184. x8 += x[8]
  185. x9 += x[9]
  186. x10 += x[10]
  187. x11 += x[11]
  188. x12 += x[12]
  189. x13 += x[13]
  190. x14 += x[14]
  191. x15 += x[15]
  192. if in != nil {
  193. binary.LittleEndian.PutUint32(out[0:4], binary.LittleEndian.Uint32(in[0:4])^x0)
  194. binary.LittleEndian.PutUint32(out[4:8], binary.LittleEndian.Uint32(in[4:8])^x1)
  195. binary.LittleEndian.PutUint32(out[8:12], binary.LittleEndian.Uint32(in[8:12])^x2)
  196. binary.LittleEndian.PutUint32(out[12:16], binary.LittleEndian.Uint32(in[12:16])^x3)
  197. binary.LittleEndian.PutUint32(out[16:20], binary.LittleEndian.Uint32(in[16:20])^x4)
  198. binary.LittleEndian.PutUint32(out[20:24], binary.LittleEndian.Uint32(in[20:24])^x5)
  199. binary.LittleEndian.PutUint32(out[24:28], binary.LittleEndian.Uint32(in[24:28])^x6)
  200. binary.LittleEndian.PutUint32(out[28:32], binary.LittleEndian.Uint32(in[28:32])^x7)
  201. binary.LittleEndian.PutUint32(out[32:36], binary.LittleEndian.Uint32(in[32:36])^x8)
  202. binary.LittleEndian.PutUint32(out[36:40], binary.LittleEndian.Uint32(in[36:40])^x9)
  203. binary.LittleEndian.PutUint32(out[40:44], binary.LittleEndian.Uint32(in[40:44])^x10)
  204. binary.LittleEndian.PutUint32(out[44:48], binary.LittleEndian.Uint32(in[44:48])^x11)
  205. binary.LittleEndian.PutUint32(out[48:52], binary.LittleEndian.Uint32(in[48:52])^x12)
  206. binary.LittleEndian.PutUint32(out[52:56], binary.LittleEndian.Uint32(in[52:56])^x13)
  207. binary.LittleEndian.PutUint32(out[56:60], binary.LittleEndian.Uint32(in[56:60])^x14)
  208. binary.LittleEndian.PutUint32(out[60:64], binary.LittleEndian.Uint32(in[60:64])^x15)
  209. in = in[BlockSize:]
  210. } else {
  211. binary.LittleEndian.PutUint32(out[0:4], x0)
  212. binary.LittleEndian.PutUint32(out[4:8], x1)
  213. binary.LittleEndian.PutUint32(out[8:12], x2)
  214. binary.LittleEndian.PutUint32(out[12:16], x3)
  215. binary.LittleEndian.PutUint32(out[16:20], x4)
  216. binary.LittleEndian.PutUint32(out[20:24], x5)
  217. binary.LittleEndian.PutUint32(out[24:28], x6)
  218. binary.LittleEndian.PutUint32(out[28:32], x7)
  219. binary.LittleEndian.PutUint32(out[32:36], x8)
  220. binary.LittleEndian.PutUint32(out[36:40], x9)
  221. binary.LittleEndian.PutUint32(out[40:44], x10)
  222. binary.LittleEndian.PutUint32(out[44:48], x11)
  223. binary.LittleEndian.PutUint32(out[48:52], x12)
  224. binary.LittleEndian.PutUint32(out[52:56], x13)
  225. binary.LittleEndian.PutUint32(out[56:60], x14)
  226. binary.LittleEndian.PutUint32(out[60:64], x15)
  227. }
  228. out = out[BlockSize:]
  229. }
  230. // Stoping at 2^70 bytes per nonce is the user's responsibility.
  231. ctr := uint64(x[13])<<32 | uint64(x[12])
  232. ctr++
  233. x[12] = uint32(ctr)
  234. x[13] = uint32(ctr >> 32)
  235. }
  236. }
  237. func hChaChaRef(x *[stateSize]uint32, out *[32]byte) {
  238. x0, x1, x2, x3 := sigma0, sigma1, sigma2, sigma3
  239. x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11]
  240. for i := chachaRounds; i > 0; i -= 2 {
  241. // quarterround(x, 0, 4, 8, 12)
  242. x0 += x4
  243. x12 ^= x0
  244. x12 = bits.RotateLeft32(x12, 16)
  245. x8 += x12
  246. x4 ^= x8
  247. x4 = bits.RotateLeft32(x4, 12)
  248. x0 += x4
  249. x12 ^= x0
  250. x12 = bits.RotateLeft32(x12, 8)
  251. x8 += x12
  252. x4 ^= x8
  253. x4 = bits.RotateLeft32(x4, 7)
  254. // quarterround(x, 1, 5, 9, 13)
  255. x1 += x5
  256. x13 ^= x1
  257. x13 = bits.RotateLeft32(x13, 16)
  258. x9 += x13
  259. x5 ^= x9
  260. x5 = bits.RotateLeft32(x5, 12)
  261. x1 += x5
  262. x13 ^= x1
  263. x13 = bits.RotateLeft32(x13, 8)
  264. x9 += x13
  265. x5 ^= x9
  266. x5 = bits.RotateLeft32(x5, 7)
  267. // quarterround(x, 2, 6, 10, 14)
  268. x2 += x6
  269. x14 ^= x2
  270. x14 = bits.RotateLeft32(x14, 16)
  271. x10 += x14
  272. x6 ^= x10
  273. x6 = bits.RotateLeft32(x6, 12)
  274. x2 += x6
  275. x14 ^= x2
  276. x14 = bits.RotateLeft32(x14, 8)
  277. x10 += x14
  278. x6 ^= x10
  279. x6 = bits.RotateLeft32(x6, 7)
  280. // quarterround(x, 3, 7, 11, 15)
  281. x3 += x7
  282. x15 ^= x3
  283. x15 = bits.RotateLeft32(x15, 16)
  284. x11 += x15
  285. x7 ^= x11
  286. x7 = bits.RotateLeft32(x7, 12)
  287. x3 += x7
  288. x15 ^= x3
  289. x15 = bits.RotateLeft32(x15, 8)
  290. x11 += x15
  291. x7 ^= x11
  292. x7 = bits.RotateLeft32(x7, 7)
  293. // quarterround(x, 0, 5, 10, 15)
  294. x0 += x5
  295. x15 ^= x0
  296. x15 = bits.RotateLeft32(x15, 16)
  297. x10 += x15
  298. x5 ^= x10
  299. x5 = bits.RotateLeft32(x5, 12)
  300. x0 += x5
  301. x15 ^= x0
  302. x15 = bits.RotateLeft32(x15, 8)
  303. x10 += x15
  304. x5 ^= x10
  305. x5 = bits.RotateLeft32(x5, 7)
  306. // quarterround(x, 1, 6, 11, 12)
  307. x1 += x6
  308. x12 ^= x1
  309. x12 = bits.RotateLeft32(x12, 16)
  310. x11 += x12
  311. x6 ^= x11
  312. x6 = bits.RotateLeft32(x6, 12)
  313. x1 += x6
  314. x12 ^= x1
  315. x12 = bits.RotateLeft32(x12, 8)
  316. x11 += x12
  317. x6 ^= x11
  318. x6 = bits.RotateLeft32(x6, 7)
  319. // quarterround(x, 2, 7, 8, 13)
  320. x2 += x7
  321. x13 ^= x2
  322. x13 = bits.RotateLeft32(x13, 16)
  323. x8 += x13
  324. x7 ^= x8
  325. x7 = bits.RotateLeft32(x7, 12)
  326. x2 += x7
  327. x13 ^= x2
  328. x13 = bits.RotateLeft32(x13, 8)
  329. x8 += x13
  330. x7 ^= x8
  331. x7 = bits.RotateLeft32(x7, 7)
  332. // quarterround(x, 3, 4, 9, 14)
  333. x3 += x4
  334. x14 ^= x3
  335. x14 = bits.RotateLeft32(x14, 16)
  336. x9 += x14
  337. x4 ^= x9
  338. x4 = bits.RotateLeft32(x4, 12)
  339. x3 += x4
  340. x14 ^= x3
  341. x14 = bits.RotateLeft32(x14, 8)
  342. x9 += x14
  343. x4 ^= x9
  344. x4 = bits.RotateLeft32(x4, 7)
  345. }
  346. // HChaCha returns x0...x3 | x12...x15, which corresponds to the
  347. // indexes of the ChaCha constant and the indexes of the IV.
  348. if useUnsafe {
  349. outArr := (*[16]uint32)(unsafe.Pointer(&out[0]))
  350. outArr[0] = x0
  351. outArr[1] = x1
  352. outArr[2] = x2
  353. outArr[3] = x3
  354. outArr[4] = x12
  355. outArr[5] = x13
  356. outArr[6] = x14
  357. outArr[7] = x15
  358. } else {
  359. binary.LittleEndian.PutUint32(out[0:4], x0)
  360. binary.LittleEndian.PutUint32(out[4:8], x1)
  361. binary.LittleEndian.PutUint32(out[8:12], x2)
  362. binary.LittleEndian.PutUint32(out[12:16], x3)
  363. binary.LittleEndian.PutUint32(out[16:20], x12)
  364. binary.LittleEndian.PutUint32(out[20:24], x13)
  365. binary.LittleEndian.PutUint32(out[24:28], x14)
  366. binary.LittleEndian.PutUint32(out[28:32], x15)
  367. }
  368. return
  369. }