chacha20_amd64.go 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. // chacha20_amd64.go - AMD64 optimized chacha20.
  2. //
  3. // To the extent possible under law, Yawning Angel has waived all copyright
  4. // and related or neighboring rights to chacha20, using the Creative
  5. // Commons "CC0" public domain dedication. See LICENSE or
  6. // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
  7. // +build amd64,!gccgo,!appengine
  8. package chacha20
  9. import (
  10. "math"
  11. )
  12. var usingAVX2 = false
  13. func blocksAmd64SSE2(x *uint32, inp, outp *byte, nrBlocks uint)
  14. func blocksAmd64AVX2(x *uint32, inp, outp *byte, nrBlocks uint)
  15. func cpuidAmd64(cpuidParams *uint32)
  16. func xgetbv0Amd64(xcrVec *uint32)
  17. func blocksAmd64(x *[stateSize]uint32, in []byte, out []byte, nrBlocks int, isIetf bool) {
  18. // Probably unneeded, but stating this explicitly simplifies the assembly.
  19. if nrBlocks == 0 {
  20. return
  21. }
  22. if isIetf {
  23. var totalBlocks uint64
  24. totalBlocks = uint64(x[12]) + uint64(nrBlocks)
  25. if totalBlocks > math.MaxUint32 {
  26. panic("chacha20: Exceeded keystream per nonce limit")
  27. }
  28. }
  29. if in == nil {
  30. for i := range out {
  31. out[i] = 0
  32. }
  33. in = out
  34. }
  35. // Pointless to call the AVX2 code for just a single block, since half of
  36. // the output gets discarded...
  37. if usingAVX2 && nrBlocks > 1 {
  38. blocksAmd64AVX2(&x[0], &in[0], &out[0], uint(nrBlocks))
  39. } else {
  40. blocksAmd64SSE2(&x[0], &in[0], &out[0], uint(nrBlocks))
  41. }
  42. }
  43. func supportsAVX2() bool {
  44. // https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
  45. const (
  46. osXsaveBit = 1 << 27
  47. avx2Bit = 1 << 5
  48. )
  49. // Check to see if CPUID actually supports the leaf that indicates AVX2.
  50. // CPUID.(EAX=0H, ECX=0H) >= 7
  51. regs := [4]uint32{0x00}
  52. cpuidAmd64(&regs[0])
  53. if regs[0] < 7 {
  54. return false
  55. }
  56. // Check to see if the OS knows how to save/restore XMM/YMM state.
  57. // CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1
  58. regs = [4]uint32{0x01}
  59. cpuidAmd64(&regs[0])
  60. if regs[2]&osXsaveBit == 0 {
  61. return false
  62. }
  63. xcrRegs := [2]uint32{}
  64. xgetbv0Amd64(&xcrRegs[0])
  65. if xcrRegs[0]&6 != 6 {
  66. return false
  67. }
  68. // Check for AVX2 support.
  69. // CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1
  70. regs = [4]uint32{0x07}
  71. cpuidAmd64(&regs[0])
  72. return regs[1]&avx2Bit != 0
  73. }
  74. func init() {
  75. blocksFn = blocksAmd64
  76. usingVectors = true
  77. usingAVX2 = supportsAVX2()
  78. }