hwaccel_amd64.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. // hwaccel_amd64.go - AMD64 optimized routines
  2. //
  3. // To the extent possible under law, Yawning Angel has waived all copyright
  4. // and related or neighboring rights to the software, using the Creative
  5. // Commons "CC0" public domain dedication. See LICENSE or
  6. // <http://creativecommons.org/publicdomain/zero/1.0/> for full details.
  7. // +build amd64,!gccgo,!noasm,go1.10
  8. package norx
  9. //go:noescape
  10. func cpuidAmd64(cpuidParams *uint32)
  11. //go:noescape
  12. func xgetbv0Amd64(xcrVec *uint32)
  13. //go:noescape
  14. func initAVX2(s *uint64, key, nonce *byte, initConsts, instConsts *uint64)
  15. //go:noescape
  16. func absorbBlocksAVX2(s *uint64, in *byte, rounds, blocks uint64, tag *uint64)
  17. //go:noescape
  18. func encryptBlocksAVX2(s *uint64, out, in *byte, rounds, blocks uint64)
  19. //go:noescape
  20. func decryptBlocksAVX2(s *uint64, out, in *byte, rounds, blocks uint64)
  21. //go:noescape
  22. func decryptLastBlockAVX2(s *uint64, out, in *byte, rounds, inLen uint64)
  23. //go:noescape
  24. func finalizeAVX2(s *uint64, out, key *byte, rounds uint64)
  25. func supportsAVX2() bool {
  26. // https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
  27. const (
  28. osXsaveBit = 1 << 27
  29. avx2Bit = 1 << 5
  30. )
  31. // Check to see if CPUID actually supports the leaf that indicates AVX2.
  32. // CPUID.(EAX=0H, ECX=0H) >= 7
  33. regs := [4]uint32{0x00}
  34. cpuidAmd64(&regs[0])
  35. if regs[0] < 7 {
  36. return false
  37. }
  38. // Check to see if the OS knows how to save/restore XMM/YMM state.
  39. // CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1
  40. regs = [4]uint32{0x01}
  41. cpuidAmd64(&regs[0])
  42. if regs[2]&osXsaveBit == 0 {
  43. return false
  44. }
  45. xcrRegs := [2]uint32{}
  46. xgetbv0Amd64(&xcrRegs[0])
  47. if xcrRegs[0]&6 != 6 {
  48. return false
  49. }
  50. // Check for AVX2 support.
  51. // CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1
  52. regs = [4]uint32{0x07}
  53. cpuidAmd64(&regs[0])
  54. return regs[1]&avx2Bit != 0
  55. }
  56. var implAVX2 = &hwaccelImpl{
  57. name: "AVX2",
  58. initFn: initYMM,
  59. absorbDataFn: absorbDataYMM,
  60. encryptDataFn: encryptDataYMM,
  61. decryptDataFn: decryptDataYMM,
  62. finalizeFn: finalizeYMM,
  63. }
  64. func initYMM(s *state, key, nonce []byte) {
  65. var instConsts = [4]uint64{paramW, uint64(s.rounds), paramP, paramT}
  66. initAVX2(&s.s[0], &key[0], &nonce[0], &initializationConstants[8], &instConsts[0])
  67. }
  68. func absorbDataYMM(s *state, in []byte, tag uint64) {
  69. inLen := len(in)
  70. if inLen == 0 {
  71. return
  72. }
  73. var tagVec = [4]uint64{0, 0, 0, tag}
  74. var off int
  75. if inBlocks := inLen / bytesR; inBlocks > 0 {
  76. absorbBlocksAVX2(&s.s[0], &in[0], uint64(s.rounds), uint64(inBlocks), &tagVec[0])
  77. off += inBlocks * bytesR
  78. }
  79. in = in[off:]
  80. var lastBlock [bytesR]byte
  81. padRef(&lastBlock, in)
  82. absorbBlocksAVX2(&s.s[0], &lastBlock[0], uint64(s.rounds), 1, &tagVec[0])
  83. }
  84. func encryptDataYMM(s *state, out, in []byte) {
  85. inLen := len(in)
  86. if inLen == 0 {
  87. return
  88. }
  89. var off int
  90. if inBlocks := inLen / bytesR; inBlocks > 0 {
  91. encryptBlocksAVX2(&s.s[0], &out[0], &in[0], uint64(s.rounds), uint64(inBlocks))
  92. off += inBlocks * bytesR
  93. }
  94. out, in = out[off:], in[off:]
  95. var lastBlock [bytesR]byte
  96. padRef(&lastBlock, in)
  97. encryptBlocksAVX2(&s.s[0], &lastBlock[0], &lastBlock[0], uint64(s.rounds), 1)
  98. copy(out, lastBlock[:len(in)])
  99. }
  100. func decryptDataYMM(s *state, out, in []byte) {
  101. inLen := len(in)
  102. if inLen == 0 {
  103. return
  104. }
  105. var off int
  106. if inBlocks := inLen / bytesR; inBlocks > 0 {
  107. decryptBlocksAVX2(&s.s[0], &out[0], &in[0], uint64(s.rounds), uint64(inBlocks))
  108. off += inBlocks * bytesR
  109. }
  110. out, in = out[off:], in[off:]
  111. var lastBlock [bytesR]byte
  112. var inPtr *byte
  113. if len(in) != 0 {
  114. inPtr = &in[0]
  115. }
  116. decryptLastBlockAVX2(&s.s[0], &lastBlock[0], inPtr, uint64(s.rounds), uint64(len(in)))
  117. copy(out, lastBlock[:len(in)])
  118. burnBytes(lastBlock[:])
  119. }
  120. func finalizeYMM(s *state, tag, key []byte) {
  121. var lastBlock [bytesC]byte
  122. finalizeAVX2(&s.s[0], &lastBlock[0], &key[0], uint64(s.rounds))
  123. copy(tag, lastBlock[:bytesT])
  124. burnBytes(lastBlock[:]) // burn buffer
  125. burnUint64s(s.s[:]) // at this point we can also burn the state
  126. }
  127. func initHardwareAcceleration() {
  128. if supportsAVX2() {
  129. isHardwareAccelerated = true
  130. hardwareAccelImpl = implAVX2
  131. }
  132. }