aez_amd64.s 16 KB


  1. // +build !noasm
  2. // Generated by PeachPy 0.2.0 from aez_amd64.py
  3. // func cpuidAMD64(cpuidParams *uint32)
  4. TEXT ·cpuidAMD64(SB),4,$0-8
  5. MOVQ cpuidParams+0(FP), R15
  6. MOVL 0(R15), AX
  7. MOVL 8(R15), CX
  8. CPUID
  9. MOVL AX, 0(R15)
  10. MOVL BX, 4(R15)
  11. MOVL CX, 8(R15)
  12. MOVL DX, 12(R15)
  13. RET
  14. // func resetAMD64SSE2()
  15. TEXT ·resetAMD64SSE2(SB),4,$0
  16. PXOR X0, X0
  17. PXOR X1, X1
  18. PXOR X2, X2
  19. PXOR X3, X3
  20. PXOR X4, X4
  21. PXOR X5, X5
  22. PXOR X6, X6
  23. PXOR X7, X7
  24. PXOR X8, X8
  25. PXOR X9, X9
  26. PXOR X10, X10
  27. PXOR X10, X11
  28. PXOR X12, X12
  29. PXOR X13, X13
  30. PXOR X14, X14
  31. PXOR X15, X15
  32. RET
  33. // func xorBytes1x16AMD64SSE2(a *uint8, b *uint8, dst *uint8)
  34. TEXT ·xorBytes1x16AMD64SSE2(SB),4,$0-24
  35. MOVQ a+0(FP), AX
  36. MOVQ b+8(FP), BX
  37. MOVQ dst+16(FP), CX
  38. MOVOU 0(AX), X0
  39. MOVOU 0(BX), X1
  40. PXOR X1, X0
  41. MOVOU X0, 0(CX)
  42. RET
  43. // func xorBytes4x16AMD64SSE2(a *uint8, b *uint8, c *uint8, d *uint8, dst *uint8)
  44. TEXT ·xorBytes4x16AMD64SSE2(SB),4,$0-40
  45. MOVQ a+0(FP), AX
  46. MOVQ b+8(FP), BX
  47. MOVQ c+16(FP), CX
  48. MOVQ d+24(FP), DX
  49. MOVQ dst+32(FP), DI
  50. MOVOU 0(AX), X0
  51. MOVOU 0(BX), X1
  52. MOVOU 0(CX), X2
  53. MOVOU 0(DX), X3
  54. PXOR X1, X0
  55. PXOR X3, X2
  56. PXOR X2, X0
  57. MOVOU X0, 0(DI)
  58. RET
  59. // func aezAES4AMD64AESNI(j *uint8, i *uint8, l *uint8, k *uint8, src *uint8, dst *uint8)
  60. TEXT ·aezAES4AMD64AESNI(SB),4,$0-48
  61. MOVQ j+0(FP), AX
  62. MOVQ i+8(FP), BX
  63. MOVQ l+16(FP), CX
  64. MOVQ k+24(FP), DX
  65. MOVQ src+32(FP), DI
  66. MOVQ dst+40(FP), SI
  67. MOVOU 0(DI), X0
  68. MOVO 0(AX), X1
  69. MOVO 0(BX), X2
  70. MOVO 0(CX), X3
  71. PXOR X1, X0
  72. PXOR X3, X2
  73. PXOR X2, X0
  74. PXOR X4, X4
  75. MOVO 0(DX), X2
  76. MOVO 16(DX), X1
  77. MOVO 32(DX), X3
  78. AESENC X1, X0
  79. AESENC X2, X0
  80. AESENC X3, X0
  81. AESENC X4, X0
  82. MOVOU X0, 0(SI)
  83. RET
  84. // func aezAES10AMD64AESNI(l *uint8, k *uint8, src *uint8, dst *uint8)
  85. TEXT ·aezAES10AMD64AESNI(SB),4,$0-32
  86. MOVQ l+0(FP), AX
  87. MOVQ k+8(FP), BX
  88. MOVQ src+16(FP), CX
  89. MOVQ dst+24(FP), DX
  90. MOVOU 0(CX), X0
  91. MOVOU 0(AX), X1
  92. PXOR X1, X0
  93. MOVO 0(BX), X2
  94. MOVO 16(BX), X3
  95. MOVO 32(BX), X1
  96. AESENC X2, X0
  97. AESENC X3, X0
  98. AESENC X1, X0
  99. AESENC X2, X0
  100. AESENC X3, X0
  101. AESENC X1, X0
  102. AESENC X2, X0
  103. AESENC X3, X0
  104. AESENC X1, X0
  105. AESENC X2, X0
  106. MOVOU X0, 0(DX)
  107. RET
  108. // func aezCorePass1AMD64AESNI(src *uint8, dst *uint8, x *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint)
  109. TEXT ·aezCorePass1AMD64AESNI(SB),4,$0-64
  110. MOVQ src+0(FP), AX
  111. MOVQ dst+8(FP), BX
  112. MOVQ x+16(FP), CX
  113. MOVQ l+32(FP), DX
  114. MOVQ sz+56(FP), DI
  115. MOVQ $1, SI
  116. MOVOU 0(CX), X0
  117. MOVQ i+24(FP), BP
  118. MOVOU 0(BP), X1
  119. MOVQ k+40(FP), BP
  120. MOVOU 0(BP), X2
  121. MOVOU 16(BP), X3
  122. MOVOU 32(BP), X4
  123. MOVQ consts+48(FP), BP
  124. PXOR X5, X5
  125. SUBQ $256, DI
  126. JCS vector_loop256_end
  127. vector_loop256_begin:
  128. MOVOU 16(AX), X6
  129. MOVOU 48(AX), X7
  130. MOVOU 80(AX), X8
  131. MOVOU 112(AX), X9
  132. MOVOU 144(AX), X10
  133. MOVOU 176(AX), X11
  134. MOVOU 208(AX), X12
  135. MOVOU 240(AX), X13
  136. MOVO X3, X14
  137. PXOR X1, X14
  138. PXOR X14, X6
  139. PXOR X14, X7
  140. PXOR X14, X8
  141. PXOR X14, X9
  142. PXOR X14, X10
  143. PXOR X14, X11
  144. PXOR X14, X12
  145. PXOR X14, X13
  146. PXOR 16(DX), X6
  147. PXOR 32(DX), X7
  148. PXOR 48(DX), X8
  149. PXOR 64(DX), X9
  150. PXOR 80(DX), X10
  151. PXOR 96(DX), X11
  152. PXOR 112(DX), X12
  153. PXOR 0(DX), X13
  154. AESENC X3, X6
  155. AESENC X3, X7
  156. AESENC X3, X8
  157. AESENC X3, X9
  158. AESENC X3, X10
  159. AESENC X3, X11
  160. AESENC X3, X12
  161. AESENC X3, X13
  162. AESENC X2, X6
  163. AESENC X2, X7
  164. AESENC X2, X8
  165. AESENC X2, X9
  166. AESENC X2, X10
  167. AESENC X2, X11
  168. AESENC X2, X12
  169. AESENC X2, X13
  170. AESENC X4, X6
  171. AESENC X4, X7
  172. AESENC X4, X8
  173. AESENC X4, X9
  174. AESENC X4, X10
  175. AESENC X4, X11
  176. AESENC X4, X12
  177. AESENC X4, X13
  178. AESENC X5, X6
  179. AESENC X5, X7
  180. AESENC X5, X8
  181. AESENC X5, X9
  182. AESENC X5, X10
  183. AESENC X5, X11
  184. AESENC X5, X12
  185. AESENC X5, X13
  186. MOVOU 0(AX), X14
  187. MOVOU 32(AX), X15
  188. PXOR X14, X6
  189. PXOR X15, X7
  190. MOVOU 64(AX), X14
  191. MOVOU 96(AX), X15
  192. PXOR X14, X8
  193. PXOR X15, X9
  194. MOVOU 128(AX), X14
  195. MOVOU 160(AX), X15
  196. PXOR X14, X10
  197. PXOR X15, X11
  198. MOVOU 192(AX), X14
  199. MOVOU 224(AX), X15
  200. PXOR X14, X12
  201. PXOR X15, X13
  202. MOVOU X6, 0(BX)
  203. MOVOU X7, 32(BX)
  204. MOVOU X8, 64(BX)
  205. MOVOU X9, 96(BX)
  206. MOVOU X10, 128(BX)
  207. MOVOU X11, 160(BX)
  208. MOVOU X12, 192(BX)
  209. MOVOU X13, 224(BX)
  210. PXOR X2, X6
  211. PXOR X2, X7
  212. PXOR X2, X8
  213. PXOR X2, X9
  214. PXOR X2, X10
  215. PXOR X2, X11
  216. PXOR X2, X12
  217. PXOR X2, X13
  218. AESENC X3, X6
  219. AESENC X3, X7
  220. AESENC X3, X8
  221. AESENC X3, X9
  222. AESENC X3, X10
  223. AESENC X3, X11
  224. AESENC X3, X12
  225. AESENC X3, X13
  226. AESENC X2, X6
  227. AESENC X2, X7
  228. AESENC X2, X8
  229. AESENC X2, X9
  230. AESENC X2, X10
  231. AESENC X2, X11
  232. AESENC X2, X12
  233. AESENC X2, X13
  234. AESENC X4, X6
  235. AESENC X4, X7
  236. AESENC X4, X8
  237. AESENC X4, X9
  238. AESENC X4, X10
  239. AESENC X4, X11
  240. AESENC X4, X12
  241. AESENC X4, X13
  242. AESENC X5, X6
  243. AESENC X5, X7
  244. AESENC X5, X8
  245. AESENC X5, X9
  246. AESENC X5, X10
  247. AESENC X5, X11
  248. AESENC X5, X12
  249. AESENC X5, X13
  250. MOVOU 16(AX), X14
  251. MOVOU 48(AX), X15
  252. PXOR X14, X6
  253. PXOR X15, X7
  254. MOVOU 80(AX), X14
  255. MOVOU 112(AX), X15
  256. PXOR X14, X8
  257. PXOR X15, X9
  258. MOVOU 144(AX), X14
  259. MOVOU 176(AX), X15
  260. PXOR X14, X10
  261. PXOR X15, X11
  262. MOVOU 208(AX), X14
  263. MOVOU 240(AX), X15
  264. PXOR X14, X12
  265. PXOR X15, X13
  266. MOVOU X6, 16(BX)
  267. MOVOU X7, 48(BX)
  268. MOVOU X8, 80(BX)
  269. MOVOU X9, 112(BX)
  270. MOVOU X10, 144(BX)
  271. MOVOU X11, 176(BX)
  272. MOVOU X12, 208(BX)
  273. MOVOU X13, 240(BX)
  274. PXOR X6, X0
  275. PXOR X7, X0
  276. PXOR X8, X0
  277. PXOR X9, X0
  278. PXOR X10, X0
  279. PXOR X11, X0
  280. PXOR X12, X0
  281. PXOR X13, X0
  282. MOVO 0(BP), X14
  283. PSHUFB X14, X1
  284. MOVO X1, X15
  285. PSRAL $31, X15
  286. PAND 16(BP), X15
  287. PSHUFL $147, X15, X15
  288. PSLLL $1, X1
  289. PXOR X15, X1
  290. PSHUFB X14, X1
  291. ADDQ $256, AX
  292. ADDQ $256, BX
  293. SUBQ $256, DI
  294. JCC vector_loop256_begin
  295. vector_loop256_end:
  296. ADDQ $256, DI
  297. SUBQ $128, DI
  298. JCS process_64bytes
  299. MOVOU 16(AX), X10
  300. MOVOU 48(AX), X11
  301. MOVOU 80(AX), X12
  302. MOVOU 112(AX), X13
  303. MOVO X10, X6
  304. MOVO X11, X7
  305. MOVOU X12, X8
  306. MOVOU X13, X9
  307. MOVO X3, X14
  308. PXOR X1, X14
  309. PXOR X14, X6
  310. PXOR X14, X7
  311. PXOR X14, X8
  312. PXOR X14, X9
  313. PXOR 16(DX), X6
  314. PXOR 32(DX), X7
  315. PXOR 48(DX), X8
  316. PXOR 64(DX), X9
  317. AESENC X3, X6
  318. AESENC X3, X7
  319. AESENC X3, X8
  320. AESENC X3, X9
  321. AESENC X2, X6
  322. AESENC X2, X7
  323. AESENC X2, X8
  324. AESENC X2, X9
  325. AESENC X4, X6
  326. AESENC X4, X7
  327. AESENC X4, X8
  328. AESENC X4, X9
  329. AESENC X5, X6
  330. AESENC X5, X7
  331. AESENC X5, X8
  332. AESENC X5, X9
  333. MOVOU 0(AX), X14
  334. MOVOU 32(AX), X15
  335. PXOR X14, X6
  336. PXOR X15, X7
  337. MOVOU 64(AX), X14
  338. MOVOU 96(AX), X15
  339. PXOR X14, X8
  340. PXOR X15, X9
  341. MOVOU X6, 0(BX)
  342. MOVOU X7, 32(BX)
  343. MOVOU X8, 64(BX)
  344. MOVOU X9, 96(BX)
  345. PXOR X2, X6
  346. PXOR X2, X7
  347. PXOR X2, X8
  348. PXOR X2, X9
  349. AESENC X3, X6
  350. AESENC X3, X7
  351. AESENC X3, X8
  352. AESENC X3, X9
  353. AESENC X2, X6
  354. AESENC X2, X7
  355. AESENC X2, X8
  356. AESENC X2, X9
  357. AESENC X4, X6
  358. AESENC X4, X7
  359. AESENC X4, X8
  360. AESENC X4, X9
  361. AESENC X5, X6
  362. AESENC X5, X7
  363. AESENC X5, X8
  364. AESENC X5, X9
  365. PXOR X10, X6
  366. PXOR X11, X7
  367. PXOR X12, X8
  368. PXOR X13, X9
  369. MOVOU X6, 16(BX)
  370. MOVOU X7, 48(BX)
  371. MOVOU X8, 80(BX)
  372. MOVOU X9, 112(BX)
  373. PXOR X6, X0
  374. PXOR X7, X0
  375. PXOR X8, X0
  376. PXOR X9, X0
  377. ADDQ $128, AX
  378. ADDQ $128, BX
  379. ADDQ $4, SI
  380. SUBQ $128, DI
  381. process_64bytes:
  382. ADDQ $128, DI
  383. SUBQ $64, DI
  384. JCS process_32bytes
  385. MOVQ SI, BP
  386. SHLQ $4, BP
  387. ADDQ DX, BP
  388. MOVOU 16(AX), X10
  389. MOVOU 48(AX), X11
  390. MOVO X10, X6
  391. MOVO X11, X7
  392. PXOR X3, X6
  393. PXOR X3, X7
  394. PXOR X1, X6
  395. PXOR X1, X7
  396. PXOR 0(BP), X6
  397. PXOR 16(BP), X7
  398. AESENC X3, X6
  399. AESENC X3, X7
  400. AESENC X2, X6
  401. AESENC X2, X7
  402. AESENC X4, X6
  403. AESENC X4, X7
  404. AESENC X5, X6
  405. AESENC X5, X7
  406. MOVOU 0(AX), X14
  407. MOVOU 32(AX), X15
  408. PXOR X14, X6
  409. PXOR X15, X7
  410. MOVOU X6, 0(BX)
  411. MOVOU X7, 32(BX)
  412. PXOR X2, X6
  413. PXOR X2, X7
  414. AESENC X3, X6
  415. AESENC X3, X7
  416. AESENC X2, X6
  417. AESENC X2, X7
  418. AESENC X4, X6
  419. AESENC X4, X7
  420. AESENC X5, X6
  421. AESENC X5, X7
  422. PXOR X10, X6
  423. PXOR X11, X7
  424. MOVOU X6, 16(BX)
  425. MOVOU X7, 48(BX)
  426. PXOR X6, X0
  427. PXOR X7, X0
  428. ADDQ $64, AX
  429. ADDQ $64, BX
  430. ADDQ $2, SI
  431. SUBQ $64, DI
  432. process_32bytes:
  433. ADDQ $64, DI
  434. SUBQ $32, DI
  435. JCS out
  436. ANDQ $7, SI
  437. SHLQ $4, SI
  438. ADDQ SI, DX
  439. MOVOU 16(AX), X10
  440. MOVO X10, X6
  441. PXOR X3, X6
  442. PXOR X1, X6
  443. PXOR 0(DX), X6
  444. AESENC X3, X6
  445. AESENC X2, X6
  446. AESENC X4, X6
  447. AESENC X5, X6
  448. MOVOU 0(AX), X14
  449. PXOR X14, X6
  450. MOVOU X6, 0(BX)
  451. PXOR X2, X6
  452. AESENC X3, X6
  453. AESENC X2, X6
  454. AESENC X4, X6
  455. AESENC X5, X6
  456. PXOR X10, X6
  457. MOVOU X6, 16(BX)
  458. PXOR X6, X0
  459. out:
  460. MOVOU X0, 0(CX)
  461. RET
  462. // func aezCorePass2AMD64AESNI(dst *uint8, y *uint8, s *uint8, j *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint)
  463. TEXT ·aezCorePass2AMD64AESNI(SB),4,$0-72
  464. MOVQ dst+0(FP), AX
  465. MOVQ y+8(FP), BX
  466. MOVQ j+24(FP), CX
  467. MOVQ l+40(FP), DX
  468. MOVQ sz+64(FP), DI
  469. MOVQ $1, SI
  470. MOVQ k+48(FP), BP
  471. MOVOU 0(BP), X0
  472. MOVOU 16(BP), X1
  473. MOVOU 32(BP), X2
  474. MOVOU 0(BX), X3
  475. MOVQ i+32(FP), BP
  476. MOVOU 0(BP), X4
  477. MOVQ consts+56(FP), BP
  478. PXOR X5, X5
  479. MOVQ s+16(FP), R8
  480. MOVOU 0(R8), X6
  481. PXOR 16(CX), X6
  482. MOVQ SP, R9
  483. ANDQ $18446744073709551584, SP
  484. SUBQ $256, SP
  485. SUBQ $256, DI
  486. JCS vector_loop256_end
  487. vector_loop256_begin:
  488. MOVO X6, X7
  489. PXOR X4, X7
  490. MOVO X7, X8
  491. MOVO X7, X9
  492. MOVO X7, X10
  493. MOVO X7, X11
  494. MOVO X7, X12
  495. MOVO X7, X13
  496. MOVO X7, X14
  497. PXOR 16(DX), X7
  498. PXOR 32(DX), X8
  499. PXOR 48(DX), X9
  500. PXOR 64(DX), X10
  501. PXOR 80(DX), X11
  502. PXOR 96(DX), X12
  503. PXOR 112(DX), X13
  504. PXOR 0(DX), X14
  505. AESENC X1, X7
  506. AESENC X1, X8
  507. AESENC X1, X9
  508. AESENC X1, X10
  509. AESENC X1, X11
  510. AESENC X1, X12
  511. AESENC X1, X13
  512. AESENC X1, X14
  513. AESENC X0, X7
  514. AESENC X0, X8
  515. AESENC X0, X9
  516. AESENC X0, X10
  517. AESENC X0, X11
  518. AESENC X0, X12
  519. AESENC X0, X13
  520. AESENC X0, X14
  521. AESENC X2, X7
  522. AESENC X2, X8
  523. AESENC X2, X9
  524. AESENC X2, X10
  525. AESENC X2, X11
  526. AESENC X2, X12
  527. AESENC X2, X13
  528. AESENC X2, X14
  529. AESENC X5, X7
  530. AESENC X5, X8
  531. AESENC X5, X9
  532. AESENC X5, X10
  533. AESENC X5, X11
  534. AESENC X5, X12
  535. AESENC X5, X13
  536. AESENC X5, X14
  537. MOVOU 0(AX), X15
  538. MOVOU 32(AX), X6
  539. PXOR X7, X15
  540. PXOR X8, X6
  541. PXOR X15, X3
  542. PXOR X6, X3
  543. MOVO X15, 0(SP)
  544. MOVO X6, 32(SP)
  545. MOVOU 64(AX), X15
  546. MOVOU 96(AX), X6
  547. PXOR X9, X15
  548. PXOR X10, X6
  549. PXOR X15, X3
  550. PXOR X6, X3
  551. MOVO X15, 64(SP)
  552. MOVO X6, 96(SP)
  553. MOVOU 128(AX), X15
  554. MOVOU 160(AX), X6
  555. PXOR X11, X15
  556. PXOR X12, X6
  557. PXOR X15, X3
  558. PXOR X6, X3
  559. MOVO X15, 128(SP)
  560. MOVO X6, 160(SP)
  561. MOVOU 192(AX), X15
  562. MOVOU 224(AX), X6
  563. PXOR X13, X15
  564. PXOR X14, X6
  565. PXOR X15, X3
  566. PXOR X6, X3
  567. MOVO X15, 192(SP)
  568. MOVO X6, 224(SP)
  569. MOVOU 16(AX), X15
  570. MOVOU 48(AX), X6
  571. PXOR X15, X7
  572. PXOR X6, X8
  573. MOVO X7, 16(SP)
  574. MOVO X8, 48(SP)
  575. MOVOU 80(AX), X15
  576. MOVOU 112(AX), X6
  577. PXOR X15, X9
  578. PXOR X6, X10
  579. MOVO X9, 80(SP)
  580. MOVO X10, 112(SP)
  581. MOVOU 144(AX), X15
  582. MOVOU 176(AX), X6
  583. PXOR X15, X11
  584. PXOR X6, X12
  585. MOVO X11, 144(SP)
  586. MOVO X12, 176(SP)
  587. MOVOU 208(AX), X15
  588. MOVOU 240(AX), X6
  589. PXOR X15, X13
  590. PXOR X6, X14
  591. MOVO X13, 208(SP)
  592. MOVO X14, 240(SP)
  593. PXOR X0, X7
  594. PXOR X0, X8
  595. PXOR X0, X9
  596. PXOR X0, X10
  597. PXOR X0, X11
  598. PXOR X0, X12
  599. PXOR X0, X13
  600. PXOR X0, X14
  601. AESENC X1, X7
  602. AESENC X1, X8
  603. AESENC X1, X9
  604. AESENC X1, X10
  605. AESENC X1, X11
  606. AESENC X1, X12
  607. AESENC X1, X13
  608. AESENC X1, X14
  609. AESENC X0, X7
  610. AESENC X0, X8
  611. AESENC X0, X9
  612. AESENC X0, X10
  613. AESENC X0, X11
  614. AESENC X0, X12
  615. AESENC X0, X13
  616. AESENC X0, X14
  617. AESENC X2, X7
  618. AESENC X2, X8
  619. AESENC X2, X9
  620. AESENC X2, X10
  621. AESENC X2, X11
  622. AESENC X2, X12
  623. AESENC X2, X13
  624. AESENC X2, X14
  625. AESENC X5, X7
  626. AESENC X5, X8
  627. AESENC X5, X9
  628. AESENC X5, X10
  629. AESENC X5, X11
  630. AESENC X5, X12
  631. AESENC X5, X13
  632. AESENC X5, X14
  633. PXOR 0(SP), X7
  634. PXOR 32(SP), X8
  635. PXOR 64(SP), X9
  636. PXOR 96(SP), X10
  637. PXOR 128(SP), X11
  638. PXOR 160(SP), X12
  639. PXOR 192(SP), X13
  640. PXOR 224(SP), X14
  641. MOVOU X7, 16(AX)
  642. MOVOU X8, 48(AX)
  643. MOVOU X9, 80(AX)
  644. MOVOU X10, 112(AX)
  645. MOVOU X11, 144(AX)
  646. MOVOU X12, 176(AX)
  647. MOVOU X13, 208(AX)
  648. MOVOU X14, 240(AX)
  649. MOVO 0(CX), X15
  650. PXOR X4, X15
  651. PXOR X15, X7
  652. PXOR X15, X8
  653. PXOR X15, X9
  654. PXOR X15, X10
  655. PXOR X15, X11
  656. PXOR X15, X12
  657. PXOR X15, X13
  658. PXOR X15, X14
  659. PXOR 16(DX), X7
  660. PXOR 32(DX), X8
  661. PXOR 48(DX), X9
  662. PXOR 64(DX), X10
  663. PXOR 80(DX), X11
  664. PXOR 96(DX), X12
  665. PXOR 112(DX), X13
  666. PXOR 0(DX), X14
  667. AESENC X1, X7
  668. AESENC X1, X8
  669. AESENC X1, X9
  670. AESENC X1, X10
  671. AESENC X1, X11
  672. AESENC X1, X12
  673. AESENC X1, X13
  674. AESENC X1, X14
  675. AESENC X0, X7
  676. AESENC X0, X8
  677. AESENC X0, X9
  678. AESENC X0, X10
  679. AESENC X0, X11
  680. AESENC X0, X12
  681. AESENC X0, X13
  682. AESENC X0, X14
  683. AESENC X2, X7
  684. AESENC X2, X8
  685. AESENC X2, X9
  686. AESENC X2, X10
  687. AESENC X2, X11
  688. AESENC X2, X12
  689. AESENC X2, X13
  690. AESENC X2, X14
  691. AESENC X5, X7
  692. AESENC X5, X8
  693. AESENC X5, X9
  694. AESENC X5, X10
  695. AESENC X5, X11
  696. AESENC X5, X12
  697. AESENC X5, X13
  698. AESENC X5, X14
  699. PXOR 16(SP), X7
  700. PXOR 48(SP), X8
  701. PXOR 80(SP), X9
  702. PXOR 112(SP), X10
  703. PXOR 144(SP), X11
  704. PXOR 176(SP), X12
  705. PXOR 208(SP), X13
  706. PXOR 240(SP), X14
  707. MOVOU X7, 0(AX)
  708. MOVOU X8, 32(AX)
  709. MOVOU X9, 64(AX)
  710. MOVOU X10, 96(AX)
  711. MOVOU X11, 128(AX)
  712. MOVOU X12, 160(AX)
  713. MOVOU X13, 192(AX)
  714. MOVOU X14, 224(AX)
  715. MOVO 0(BP), X15
  716. PSHUFB X15, X4
  717. MOVO X4, X6
  718. PSRAL $31, X6
  719. PAND 16(BP), X6
  720. PSHUFL $147, X6, X6
  721. PSLLL $1, X4
  722. PXOR X6, X4
  723. PSHUFB X15, X4
  724. MOVOU 0(R8), X6
  725. PXOR 16(CX), X6
  726. ADDQ $256, AX
  727. SUBQ $256, DI
  728. JCC vector_loop256_begin
  729. MOVO X5, 16(SP)
  730. MOVO X5, 48(SP)
  731. MOVO X5, 80(SP)
  732. MOVO X5, 112(SP)
  733. MOVO X5, 128(SP)
  734. MOVO X5, 144(SP)
  735. MOVO X5, 160(SP)
  736. MOVO X5, 176(SP)
  737. MOVO X5, 192(SP)
  738. MOVO X5, 208(SP)
  739. MOVO X5, 224(SP)
  740. MOVO X5, 240(SP)
  741. vector_loop256_end:
  742. ADDQ $256, DI
  743. SUBQ $128, DI
  744. JCS process_64bytes
  745. MOVO X6, X7
  746. PXOR X4, X7
  747. MOVO X7, X8
  748. MOVO X7, X9
  749. MOVO X7, X10
  750. PXOR 16(DX), X7
  751. PXOR 32(DX), X8
  752. PXOR 48(DX), X9
  753. PXOR 64(DX), X10
  754. AESENC X1, X7
  755. AESENC X1, X8
  756. AESENC X1, X9
  757. AESENC X1, X10
  758. AESENC X0, X7
  759. AESENC X0, X8
  760. AESENC X0, X9
  761. AESENC X0, X10
  762. AESENC X2, X7
  763. AESENC X2, X8
  764. AESENC X2, X9
  765. AESENC X2, X10
  766. AESENC X5, X7
  767. AESENC X5, X8
  768. AESENC X5, X9
  769. AESENC X5, X10
  770. MOVOU 0(AX), X11
  771. MOVOU 32(AX), X13
  772. MOVOU 64(AX), X12
  773. MOVOU 96(AX), X14
  774. PXOR X7, X11
  775. PXOR X8, X13
  776. PXOR X9, X12
  777. PXOR X10, X14
  778. PXOR X11, X3
  779. PXOR X13, X3
  780. PXOR X12, X3
  781. PXOR X14, X3
  782. MOVO X11, 0(SP)
  783. MOVO X13, 32(SP)
  784. MOVO X12, 64(SP)
  785. MOVO X14, 96(SP)
  786. MOVOU 16(AX), X12
  787. MOVOU 48(AX), X14
  788. MOVOU 80(AX), X11
  789. MOVOU 112(AX), X13
  790. PXOR X12, X7
  791. PXOR X14, X8
  792. PXOR X11, X9
  793. PXOR X13, X10
  794. MOVOU X7, 16(AX)
  795. MOVOU X8, 48(AX)
  796. MOVOU X9, 80(AX)
  797. MOVOU X10, 112(AX)
  798. MOVO X7, X12
  799. MOVO X8, X14
  800. MOVO X9, X11
  801. MOVO X10, X13
  802. PXOR X0, X7
  803. PXOR X0, X8
  804. PXOR X0, X9
  805. PXOR X0, X10
  806. AESENC X1, X7
  807. AESENC X1, X8
  808. AESENC X1, X9
  809. AESENC X1, X10
  810. AESENC X0, X7
  811. AESENC X0, X8
  812. AESENC X0, X9
  813. AESENC X0, X10
  814. AESENC X2, X7
  815. AESENC X2, X8
  816. AESENC X2, X9
  817. AESENC X2, X10
  818. AESENC X5, X7
  819. AESENC X5, X8
  820. AESENC X5, X9
  821. AESENC X5, X10
  822. PXOR 0(SP), X7
  823. PXOR 32(SP), X8
  824. PXOR 64(SP), X9
  825. PXOR 96(SP), X10
  826. MOVOU X7, 16(AX)
  827. MOVOU X8, 48(AX)
  828. MOVOU X9, 80(AX)
  829. MOVOU X10, 112(AX)
  830. PXOR 0(CX), X7
  831. PXOR 0(CX), X8
  832. PXOR 0(CX), X9
  833. PXOR 0(CX), X10
  834. PXOR X4, X7
  835. PXOR X4, X8
  836. PXOR X4, X9
  837. PXOR X4, X10
  838. PXOR 16(DX), X7
  839. PXOR 32(DX), X8
  840. PXOR 48(DX), X9
  841. PXOR 64(DX), X10
  842. AESENC X1, X7
  843. AESENC X1, X8
  844. AESENC X1, X9
  845. AESENC X1, X10
  846. AESENC X0, X7
  847. AESENC X0, X8
  848. AESENC X0, X9
  849. AESENC X0, X10
  850. AESENC X2, X7
  851. AESENC X2, X8
  852. AESENC X2, X9
  853. AESENC X2, X10
  854. AESENC X5, X7
  855. AESENC X5, X8
  856. AESENC X5, X9
  857. AESENC X5, X10
  858. PXOR X12, X7
  859. PXOR X14, X8
  860. PXOR X11, X9
  861. PXOR X13, X10
  862. MOVOU X7, 0(AX)
  863. MOVOU X8, 32(AX)
  864. MOVOU X9, 64(AX)
  865. MOVOU X10, 96(AX)
  866. ADDQ $128, AX
  867. ADDQ $4, SI
  868. SUBQ $128, DI
  869. process_64bytes:
  870. ADDQ $128, DI
  871. SUBQ $64, DI
  872. JCS process_32bytes
  873. MOVQ SI, BP
  874. SHLQ $4, BP
  875. ADDQ DX, BP
  876. MOVO X6, X7
  877. PXOR X4, X7
  878. MOVO X7, X8
  879. PXOR 0(BP), X7
  880. PXOR 16(BP), X8
  881. AESENC X1, X7
  882. AESENC X1, X8
  883. AESENC X0, X7
  884. AESENC X0, X8
  885. AESENC X2, X7
  886. AESENC X2, X8
  887. AESENC X5, X7
  888. AESENC X5, X8
  889. MOVOU 0(AX), X11
  890. MOVOU 16(AX), X12
  891. MOVOU 32(AX), X13
  892. MOVOU 48(AX), X14
  893. PXOR X7, X11
  894. PXOR X8, X13
  895. PXOR X11, X3
  896. PXOR X13, X3
  897. PXOR X12, X7
  898. PXOR X14, X8
  899. MOVO X7, X12
  900. MOVO X8, X14
  901. PXOR X0, X7
  902. PXOR X0, X8
  903. AESENC X1, X7
  904. AESENC X1, X8
  905. AESENC X0, X7
  906. AESENC X0, X8
  907. AESENC X2, X7
  908. AESENC X2, X8
  909. AESENC X5, X7
  910. AESENC X5, X8
  911. PXOR X11, X7
  912. PXOR X13, X8
  913. MOVO X7, X11
  914. MOVO X8, X13
  915. PXOR 0(CX), X7
  916. PXOR 0(CX), X8
  917. PXOR X4, X7
  918. PXOR X4, X8
  919. PXOR 0(BP), X7
  920. PXOR 16(BP), X8
  921. AESENC X1, X7
  922. AESENC X1, X8
  923. AESENC X0, X7
  924. AESENC X0, X8
  925. AESENC X2, X7
  926. AESENC X2, X8
  927. AESENC X5, X7
  928. AESENC X5, X8
  929. PXOR X7, X12
  930. PXOR X8, X14
  931. MOVOU X12, 0(AX)
  932. MOVOU X11, 16(AX)
  933. MOVOU X14, 32(AX)
  934. MOVOU X13, 48(AX)
  935. ADDQ $64, AX
  936. ADDQ $2, SI
  937. SUBQ $64, DI
  938. process_32bytes:
  939. ADDQ $64, DI
  940. SUBQ $32, DI
  941. JCS out
  942. ANDQ $7, SI
  943. SHLQ $4, SI
  944. ADDQ SI, DX
  945. MOVO X6, X7
  946. PXOR X4, X7
  947. PXOR 0(DX), X7
  948. AESENC X1, X7
  949. AESENC X0, X7
  950. AESENC X2, X7
  951. AESENC X5, X7
  952. MOVOU 0(AX), X11
  953. MOVOU 16(AX), X12
  954. PXOR X7, X11
  955. PXOR X11, X3
  956. PXOR X12, X7
  957. MOVO X7, X12
  958. PXOR X0, X7
  959. AESENC X1, X7
  960. AESENC X0, X7
  961. AESENC X2, X7
  962. AESENC X5, X7
  963. PXOR X11, X7
  964. MOVO X7, X11
  965. PXOR 0(CX), X7
  966. PXOR X4, X7
  967. PXOR 0(DX), X7
  968. AESENC X1, X7
  969. AESENC X0, X7
  970. AESENC X2, X7
  971. AESENC X5, X7
  972. PXOR X7, X12
  973. MOVOU X12, 0(AX)
  974. MOVOU X11, 16(AX)
  975. out:
  976. MOVOU X3, 0(BX)
  977. MOVO X5, 0(SP)
  978. MOVO X5, 32(SP)
  979. MOVO X5, 64(SP)
  980. MOVO X5, 96(SP)
  981. MOVQ R9, SP
  982. RET