Nessuna descrizione
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

xxHash3.AVX2.cs 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. using Unity.Burst.Intrinsics;
  2. using Unity.Collections.LowLevel.Unsafe;
  3. namespace Unity.Collections
  4. {
  5. [GenerateTestsForBurstCompatibility]
  6. public static partial class xxHash3
  7. {
  8. internal static unsafe void Avx2HashLongInternalLoop(ulong* acc, byte* input, byte* dest, long length, byte* secret, int isHash64)
  9. {
  10. if (X86.Avx2.IsAvx2Supported)
  11. {
  12. // Process packets of 512 bits
  13. var nb_blocks = (length-1) / BLOCK_LEN;
  14. for (int n = 0; n < nb_blocks; n++)
  15. {
  16. Avx2Accumulate(acc, input + n * BLOCK_LEN, dest == null ? null : dest + n * BLOCK_LEN, secret, NB_ROUNDS, isHash64);
  17. Avx2ScrambleAcc(acc, secret + SECRET_KEY_SIZE - STRIPE_LEN);
  18. }
  19. var nbStripes = ((length-1) - (BLOCK_LEN * nb_blocks)) / STRIPE_LEN;
  20. Avx2Accumulate(acc, input + nb_blocks * BLOCK_LEN, dest == null ? null : dest + nb_blocks * BLOCK_LEN, secret, nbStripes, isHash64);
  21. var p = input + length - STRIPE_LEN;
  22. Avx2Accumulate512(acc, p, null, secret + SECRET_KEY_SIZE - STRIPE_LEN - SECRET_LASTACC_START);
  23. if (dest != null)
  24. {
  25. var remaining = length % STRIPE_LEN;
  26. if (remaining != 0)
  27. {
  28. UnsafeUtility.MemCpy(dest + length - remaining, input + length - remaining, remaining);
  29. }
  30. }
  31. }
  32. }
  33. internal static unsafe void Avx2ScrambleAcc(ulong* acc, byte* secret)
  34. {
  35. if (X86.Avx2.IsAvx2Supported)
  36. {
  37. var xAcc = (v256*) acc;
  38. var xSecret = (v256*) secret;
  39. var prime32 = X86.Avx.mm256_set1_epi32(unchecked((int) PRIME32_1));
  40. // First bank
  41. var acc_vec = xAcc[0];
  42. var shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47);
  43. var data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted);
  44. var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0);
  45. var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  46. var data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  47. var prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32);
  48. var prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32);
  49. xAcc[0] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32));
  50. // Second bank
  51. acc_vec = xAcc[1];
  52. shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47);
  53. data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted);
  54. key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1);
  55. data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  56. data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  57. prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32);
  58. prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32);
  59. xAcc[1] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32));
  60. }
  61. }
  62. internal static unsafe void Avx2Accumulate(ulong* acc, byte* input, byte* dest, byte* secret, long nbStripes,
  63. int isHash64)
  64. {
  65. if (X86.Avx2.IsAvx2Supported)
  66. {
  67. for (var n = 0; n < nbStripes; n++)
  68. {
  69. var xInput = input + n * STRIPE_LEN;
  70. Avx2Accumulate512(acc, xInput, dest == null ? null : dest + n * STRIPE_LEN,
  71. secret + n * SECRET_CONSUME_RATE);
  72. }
  73. }
  74. }
  75. internal static unsafe void Avx2Accumulate512(ulong* acc, byte* input, byte* dest, byte* secret)
  76. {
  77. if (X86.Avx2.IsAvx2Supported)
  78. {
  79. var xAcc = (v256*) acc;
  80. var xSecret = (v256*) secret;
  81. var xInput = (v256*) input;
  82. // First bank
  83. var data_vec = X86.Avx.mm256_loadu_si256(xInput + 0);
  84. var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0);
  85. var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  86. if (dest != null)
  87. {
  88. X86.Avx.mm256_storeu_si256(dest, data_vec);
  89. }
  90. var data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  91. var product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo);
  92. var data_swap= X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2));
  93. var sum= X86.Avx2.mm256_add_epi64(xAcc[0], data_swap);
  94. xAcc[0] = X86.Avx2.mm256_add_epi64(product, sum);
  95. // Second bank
  96. data_vec = X86.Avx.mm256_loadu_si256(xInput + 1);
  97. key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1);
  98. data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
  99. if (dest != null)
  100. {
  101. X86.Avx.mm256_storeu_si256(dest + 32, data_vec);
  102. }
  103. data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
  104. product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo);
  105. data_swap = X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2));
  106. sum = X86.Avx2.mm256_add_epi64(xAcc[1], data_swap);
  107. xAcc[1] = X86.Avx2.mm256_add_epi64(product, sum);
  108. }
  109. }
  110. }
  111. }