説明なし
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

xxHash3.AVX2.cs 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #if !NET_DOTS
  2. using Unity.Burst.Intrinsics;
  3. using Unity.Collections.LowLevel.Unsafe;
  4. namespace Unity.Collections
  5. {
  6. [BurstCompatible]
  7. public static partial class xxHash3
  8. {
  9. internal static unsafe void Avx2HashLongInternalLoop(ulong* acc, byte* input, byte* dest, long length, byte* secret, int isHash64)
  10. {
  11. if (X86.Avx2.IsAvx2Supported)
  12. {
  13. // Process packets of 512 bits
  14. var nb_blocks = (length-1) / BLOCK_LEN;
  15. for (int n = 0; n < nb_blocks; n++)
  16. {
  17. Avx2Accumulate(acc, input + n * BLOCK_LEN, dest == null ? null : dest + n * BLOCK_LEN, secret, NB_ROUNDS, isHash64);
  18. Avx2ScrambleAcc(acc, secret + SECRET_KEY_SIZE - STRIPE_LEN);
  19. }
  20. var nbStripes = ((length-1) - (BLOCK_LEN * nb_blocks)) / STRIPE_LEN;
  21. Avx2Accumulate(acc, input + nb_blocks * BLOCK_LEN, dest == null ? null : dest + nb_blocks * BLOCK_LEN, secret, nbStripes, isHash64);
  22. var p = input + length - STRIPE_LEN;
  23. Avx2Accumulate512(acc, p, null, secret + SECRET_KEY_SIZE - STRIPE_LEN - SECRET_LASTACC_START);
  24. if (dest != null)
  25. {
  26. var remaining = length % STRIPE_LEN;
  27. if (remaining != 0)
  28. {
  29. UnsafeUtility.MemCpy(dest + length - remaining, input + length - remaining, remaining);
  30. }
  31. }
  32. }
  33. }
  34. internal static unsafe void Avx2ScrambleAcc(ulong* acc, byte* secret)
  35. {
  36. if (X86.Avx2.IsAvx2Supported)
  37. {
  38. var xAcc = (v256*) acc;
  39. var xSecret = (v256*) secret;
  40. var prime32 = X86.Avx.mm256_set1_epi32(unchecked((int) PRIME32_1));
  41. // First bank
  42. var acc_vec = xAcc[0];
  43. var shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47);
  44. var data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted);
  45. var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0);
  46. var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  47. var data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  48. var prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32);
  49. var prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32);
  50. xAcc[0] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32));
  51. // Second bank
  52. acc_vec = xAcc[1];
  53. shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47);
  54. data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted);
  55. key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1);
  56. data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  57. data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  58. prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32);
  59. prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32);
  60. xAcc[1] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32));
  61. }
  62. }
  63. internal static unsafe void Avx2Accumulate(ulong* acc, byte* input, byte* dest, byte* secret, long nbStripes,
  64. int isHash64)
  65. {
  66. if (X86.Avx2.IsAvx2Supported)
  67. {
  68. for (var n = 0; n < nbStripes; n++)
  69. {
  70. var xInput = input + n * STRIPE_LEN;
  71. Avx2Accumulate512(acc, xInput, dest == null ? null : dest + n * STRIPE_LEN,
  72. secret + n * SECRET_CONSUME_RATE);
  73. }
  74. }
  75. }
  76. internal static unsafe void Avx2Accumulate512(ulong* acc, byte* input, byte* dest, byte* secret)
  77. {
  78. if (X86.Avx2.IsAvx2Supported)
  79. {
  80. var xAcc = (v256*) acc;
  81. var xSecret = (v256*) secret;
  82. var xInput = (v256*) input;
  83. // First bank
  84. var data_vec = X86.Avx.mm256_loadu_si256(xInput + 0);
  85. var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0);
  86. var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  87. if (dest != null)
  88. {
  89. X86.Avx.mm256_storeu_si256(dest, data_vec);
  90. }
  91. var data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  92. var product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo);
  93. var data_swap= X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2));
  94. var sum= X86.Avx2.mm256_add_epi64(xAcc[0], data_swap);
  95. xAcc[0] = X86.Avx2.mm256_add_epi64(product, sum);
  96. // Second bank
  97. data_vec = X86.Avx.mm256_loadu_si256(xInput + 1);
  98. key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1);
  99. data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  100. if (dest != null)
  101. {
  102. X86.Avx.mm256_storeu_si256(dest + 32, data_vec);
  103. }
  104. data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  105. product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo);
  106. data_swap = X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2));
  107. sum = X86.Avx2.mm256_add_epi64(xAcc[1], data_swap);
  108. xAcc[1] = X86.Avx2.mm256_add_epi64(product, sum);
  109. }
  110. }
  111. }
  112. }
  113. #endif