暫無描述
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. using System;
  2. using System.Diagnostics;
  3. using System.Runtime.InteropServices;
  4. namespace Unity.Burst.Intrinsics
  5. {
  6. public unsafe static partial class X86
  7. {
  8. /// <summary>
  9. /// FMA intrinsics
  10. /// </summary>
  11. public static class Fma
  12. {
  13. /// <summary>
  14. /// Evaluates to true at compile time if FMA intrinsics are supported.
  15. ///
  16. /// Burst ties FMA support to AVX2 support to simplify feature sets to support.
  17. /// </summary>
  18. public static bool IsFmaSupported { get { return Avx2.IsAvx2Supported; } }
  19. [DebuggerStepThrough]
  20. private static float FmaHelper(float a, float b, float c)
  21. {
  22. return (float)((((double)a) * b) + c);
  23. }
  24. [StructLayout(LayoutKind.Explicit)]
  25. private struct Union
  26. {
  27. [FieldOffset(0)]
  28. public float f;
  29. [FieldOffset(0)]
  30. public uint u;
  31. }
  32. [DebuggerStepThrough]
  33. private static float FnmaHelper(float a, float b, float c)
  34. {
  35. return FmaHelper(-a, b, c);
  36. }
  37. /// <summary>
  38. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
  39. /// </summary>
  40. /// <remarks>
  41. /// **** vfmadd213pd xmm, xmm, xmm
  42. /// </remarks>
  43. /// <param name="a">Vector a</param>
  44. /// <param name="b">Vector b</param>
  45. /// <param name="c">Vector c</param>
  46. /// <returns>Vector</returns>>
  47. [DebuggerStepThrough]
  48. public static v128 fmadd_pd(v128 a, v128 b, v128 c)
  49. {
  50. throw new Exception("Double-precision FMA not emulated in C#");
  51. }
  52. /// <summary>
  53. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
  54. /// </summary>
  55. /// <remarks>
  56. /// **** vfmadd213pd ymm, ymm, ymm
  57. /// </remarks>
  58. /// <param name="a">Vector a</param>
  59. /// <param name="b">Vector b</param>
  60. /// <param name="c">Vector c</param>
  61. /// <returns>Vector</returns>
  62. [DebuggerStepThrough]
  63. public static v256 mm256_fmadd_pd(v256 a, v256 b, v256 c)
  64. {
  65. throw new Exception("Double-precision FMA not emulated in C#");
  66. }
  67. /// <summary>
  68. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
  69. /// </summary>
  70. /// <remarks>
  71. /// **** vfmadd213ps xmm, xmm, xmm
  72. /// </remarks>
  73. /// <param name="a">Vector a</param>
  74. /// <param name="b">Vector b</param>
  75. /// <param name="c">Vector c</param>
  76. /// <returns>Vector</returns>
  77. [DebuggerStepThrough]
  78. public static v128 fmadd_ps(v128 a, v128 b, v128 c)
  79. {
  80. return new v128(FmaHelper(a.Float0, b.Float0, c.Float0),
  81. FmaHelper(a.Float1, b.Float1, c.Float1),
  82. FmaHelper(a.Float2, b.Float2, c.Float2),
  83. FmaHelper(a.Float3, b.Float3, c.Float3));
  84. }
  85. /// <summary>
  86. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
  87. /// </summary>
  88. /// <remarks>
  89. /// **** vfmadd213ps ymm, ymm, ymm
  90. /// </remarks>
  91. /// <param name="a">Vector a</param>
  92. /// <param name="b">Vector b</param>
  93. /// <param name="c">Vector c</param>
  94. /// <returns>Vector</returns>
  95. [DebuggerStepThrough]
  96. public static v256 mm256_fmadd_ps(v256 a, v256 b, v256 c)
  97. {
  98. return new v256(FmaHelper(a.Float0, b.Float0, c.Float0),
  99. FmaHelper(a.Float1, b.Float1, c.Float1),
  100. FmaHelper(a.Float2, b.Float2, c.Float2),
  101. FmaHelper(a.Float3, b.Float3, c.Float3),
  102. FmaHelper(a.Float4, b.Float4, c.Float4),
  103. FmaHelper(a.Float5, b.Float5, c.Float5),
  104. FmaHelper(a.Float6, b.Float6, c.Float6),
  105. FmaHelper(a.Float7, b.Float7, c.Float7));
  106. }
  107. /// <summary>
  108. /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
  109. /// </summary>
  110. /// <remarks>
  111. /// **** vfmadd213sd xmm, xmm, xmm
  112. /// </remarks>
  113. /// <param name="a">Vector a</param>
  114. /// <param name="b">Vector b</param>
  115. /// <param name="c">Vector c</param>
  116. /// <returns>Vector</returns>
  117. [DebuggerStepThrough]
  118. public static v128 fmadd_sd(v128 a, v128 b, v128 c)
  119. {
  120. throw new Exception("Double-precision FMA not emulated in C#");
  121. }
  122. /// <summary>
  123. /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
  124. /// </summary>
  125. /// <remarks>
  126. /// **** vfmadd213ss xmm, xmm, xmm
  127. /// </remarks>
  128. /// <param name="a">Vector a</param>
  129. /// <param name="b">Vector b</param>
  130. /// <param name="c">Vector c</param>
  131. /// <returns>Vector</returns>
  132. [DebuggerStepThrough]
  133. public static v128 fmadd_ss(v128 a, v128 b, v128 c)
  134. {
  135. var result = a;
  136. result.Float0 = FmaHelper(a.Float0, b.Float0, c.Float0);
  137. return result;
  138. }
  139. /// <summary>
  140. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
  141. /// </summary>
  142. /// <remarks>
  143. /// **** vfmaddsub213pd xmm, xmm, xmm
  144. /// </remarks>
  145. /// <param name="a">Vector a</param>
  146. /// <param name="b">Vector b</param>
  147. /// <param name="c">Vector c</param>
  148. /// <returns>Vector</returns>
  149. [DebuggerStepThrough]
  150. public static v128 fmaddsub_pd(v128 a, v128 b, v128 c)
  151. {
  152. throw new Exception("Double-precision FMA not emulated in C#");
  153. }
  154. /// <summary>
  155. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
  156. /// </summary>
  157. /// <remarks>
  158. /// **** vfmaddsub213pd ymm, ymm, ymm
  159. /// </remarks>
  160. /// <param name="a">Vector a</param>
  161. /// <param name="b">Vector b</param>
  162. /// <param name="c">Vector c</param>
  163. /// <returns>Vector</returns>
  164. [DebuggerStepThrough]
  165. public static v256 mm256_fmaddsub_pd(v256 a, v256 b, v256 c)
  166. {
  167. throw new Exception("Double-precision FMA not emulated in C#");
  168. }
  169. /// <summary>
  170. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
  171. /// </summary>
  172. /// <remarks>
  173. /// **** vfmaddsub213ps xmm, xmm, xmm
  174. /// </remarks>
  175. /// <param name="a">Vector a</param>
  176. /// <param name="b">Vector b</param>
  177. /// <param name="c">Vector c</param>
  178. /// <returns>Vector</returns>
  179. [DebuggerStepThrough]
  180. public static v128 fmaddsub_ps(v128 a, v128 b, v128 c)
  181. {
  182. return new v128(FmaHelper(a.Float0, b.Float0, -c.Float0),
  183. FmaHelper(a.Float1, b.Float1, c.Float1),
  184. FmaHelper(a.Float2, b.Float2, -c.Float2),
  185. FmaHelper(a.Float3, b.Float3, c.Float3));
  186. }
  187. /// <summary>
  188. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
  189. /// </summary>
  190. /// <remarks>
  191. /// **** vfmaddsub213ps ymm, ymm, ymm
  192. /// </remarks>
  193. /// <param name="a">Vector a</param>
  194. /// <param name="b">Vector b</param>
  195. /// <param name="c">Vector c</param>
  196. /// <returns>Vector</returns>
  197. [DebuggerStepThrough]
  198. public static v256 mm256_fmaddsub_ps(v256 a, v256 b, v256 c)
  199. {
  200. return new v256(FmaHelper(a.Float0, b.Float0, -c.Float0),
  201. FmaHelper(a.Float1, b.Float1, c.Float1),
  202. FmaHelper(a.Float2, b.Float2, -c.Float2),
  203. FmaHelper(a.Float3, b.Float3, c.Float3),
  204. FmaHelper(a.Float4, b.Float4, -c.Float4),
  205. FmaHelper(a.Float5, b.Float5, c.Float5),
  206. FmaHelper(a.Float6, b.Float6, -c.Float6),
  207. FmaHelper(a.Float7, b.Float7, c.Float7));
  208. }
  209. /// <summary>
  210. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
  211. /// </summary>
  212. /// <remarks>
  213. /// **** vfmsub213pd xmm, xmm, xmm
  214. /// </remarks>
  215. /// <param name="a">Vector a</param>
  216. /// <param name="b">Vector b</param>
  217. /// <param name="c">Vector c</param>
  218. /// <returns>Vector</returns>
  219. [DebuggerStepThrough]
  220. public static v128 fmsub_pd(v128 a, v128 b, v128 c)
  221. {
  222. throw new Exception("Double-precision FMA not emulated in C#");
  223. }
  224. /// <summary>
  225. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
  226. /// </summary>
  227. /// <remarks>
  228. /// **** vfmsub213pd ymm, ymm, ymm
  229. /// </remarks>
  230. /// <param name="a">Vector a</param>
  231. /// <param name="b">Vector b</param>
  232. /// <param name="c">Vector c</param>
  233. /// <returns>Vector</returns>
  234. [DebuggerStepThrough]
  235. public static v256 mm256_fmsub_pd(v256 a, v256 b, v256 c)
  236. {
  237. throw new Exception("Double-precision FMA not emulated in C#");
  238. }
  239. /// <summary>
  240. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
  241. /// </summary>
  242. /// <remarks>
  243. /// **** vfmsub213ps xmm, xmm, xmm
  244. /// </remarks>
  245. /// <param name="a">Vector a</param>
  246. /// <param name="b">Vector b</param>
  247. /// <param name="c">Vector c</param>
  248. /// <returns>Vector</returns>
  249. [DebuggerStepThrough]
  250. public static v128 fmsub_ps(v128 a, v128 b, v128 c)
  251. {
  252. return new v128(FmaHelper(a.Float0, b.Float0, -c.Float0),
  253. FmaHelper(a.Float1, b.Float1, -c.Float1),
  254. FmaHelper(a.Float2, b.Float2, -c.Float2),
  255. FmaHelper(a.Float3, b.Float3, -c.Float3));
  256. }
  257. /// <summary>
  258. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
  259. /// </summary>
  260. /// <remarks>
  261. /// **** vfmsub213ps ymm, ymm, ymm
  262. /// </remarks>
  263. /// <param name="a">Vector a</param>
  264. /// <param name="b">Vector b</param>
  265. /// <param name="c">Vector c</param>
  266. /// <returns>Vector</returns>
  267. [DebuggerStepThrough]
  268. public static v256 mm256_fmsub_ps(v256 a, v256 b, v256 c)
  269. {
  270. return new v256(FmaHelper(a.Float0, b.Float0, -c.Float0),
  271. FmaHelper(a.Float1, b.Float1, -c.Float1),
  272. FmaHelper(a.Float2, b.Float2, -c.Float2),
  273. FmaHelper(a.Float3, b.Float3, -c.Float3),
  274. FmaHelper(a.Float4, b.Float4, -c.Float4),
  275. FmaHelper(a.Float5, b.Float5, -c.Float5),
  276. FmaHelper(a.Float6, b.Float6, -c.Float6),
  277. FmaHelper(a.Float7, b.Float7, -c.Float7));
  278. }
  279. /// <summary>
  280. /// Multiply the lower double-precision(64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result.Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
  281. /// </summary>
  282. /// <remarks>
  283. /// **** vfmsub213sd xmm, xmm, xmm
  284. /// </remarks>
  285. /// <param name="a">Vector a</param>
  286. /// <param name="b">Vector b</param>
  287. /// <param name="c">Vector c</param>
  288. /// <returns>Vector</returns>
  289. [DebuggerStepThrough]
  290. public static v128 fmsub_sd(v128 a, v128 b, v128 c)
  291. {
  292. throw new Exception("Double-precision FMA not emulated in C#");
  293. }
  294. /// <summary>
  295. /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
  296. /// </summary>
  297. /// <remarks>
  298. /// **** vfmsub213ss xmm, xmm, xmm
  299. /// </remarks>
  300. /// <param name="a">Vector a</param>
  301. /// <param name="b">Vector b</param>
  302. /// <param name="c">Vector c</param>
  303. /// <returns>Vector</returns>
  304. [DebuggerStepThrough]
  305. public static v128 fmsub_ss(v128 a, v128 b, v128 c)
  306. {
  307. var result = a;
  308. result.Float0 = FmaHelper(a.Float0, b.Float0, -c.Float0);
  309. return result;
  310. }
  311. /// <summary>
  312. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
  313. /// </summary>
  314. /// <remarks>
  315. /// **** vfmsubadd213pd xmm, xmm, xmm
  316. /// </remarks>
  317. /// <param name="a">Vector a</param>
  318. /// <param name="b">Vector b</param>
  319. /// <param name="c">Vector c</param>
  320. /// <returns>Vector</returns>
  321. [DebuggerStepThrough]
  322. public static v128 fmsubadd_pd(v128 a, v128 b, v128 c)
  323. {
  324. throw new Exception("Double-precision FMA not emulated in C#");
  325. }
  326. /// <summary>
  327. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
  328. /// </summary>
  329. /// <remarks>
  330. /// **** vfmsubadd213pd ymm, ymm, ymm
  331. /// </remarks>
  332. /// <param name="a">Vector a</param>
  333. /// <param name="b">Vector b</param>
  334. /// <param name="c">Vector c</param>
  335. /// <returns>Vector</returns>
  336. [DebuggerStepThrough]
  337. public static v256 mm256_fmsubadd_pd(v256 a, v256 b, v256 c)
  338. {
  339. throw new Exception("Double-precision FMA not emulated in C#");
  340. }
  341. /// <summary>
  342. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
  343. /// </summary>
  344. /// <remarks>
  345. /// **** vfmsubadd213ps xmm, xmm, xmm
  346. /// </remarks>
  347. /// <param name="a">Vector a</param>
  348. /// <param name="b">Vector b</param>
  349. /// <param name="c">Vector c</param>
  350. /// <returns>Vector</returns>
  351. [DebuggerStepThrough]
  352. public static v128 fmsubadd_ps(v128 a, v128 b, v128 c)
  353. {
  354. return new v128(FmaHelper(a.Float0, b.Float0, c.Float0),
  355. FmaHelper(a.Float1, b.Float1, -c.Float1),
  356. FmaHelper(a.Float2, b.Float2, c.Float2),
  357. FmaHelper(a.Float3, b.Float3, -c.Float3));
  358. }
  359. /// <summary>
  360. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c to/from the intermediate result, and store the results in dst.
  361. /// </summary>
  362. /// <remarks>
  363. /// **** vfmsubadd213ps ymm, ymm, ymm
  364. /// </remarks>
  365. /// <param name="a">Vector a</param>
  366. /// <param name="b">Vector b</param>
  367. /// <param name="c">Vector c</param>
  368. /// <returns>Vector</returns>
  369. [DebuggerStepThrough]
  370. public static v256 mm256_fmsubadd_ps(v256 a, v256 b, v256 c)
  371. {
  372. return new v256(FmaHelper(a.Float0, b.Float0, c.Float0),
  373. FmaHelper(a.Float1, b.Float1, -c.Float1),
  374. FmaHelper(a.Float2, b.Float2, c.Float2),
  375. FmaHelper(a.Float3, b.Float3, -c.Float3),
  376. FmaHelper(a.Float4, b.Float4, c.Float4),
  377. FmaHelper(a.Float5, b.Float5, -c.Float5),
  378. FmaHelper(a.Float6, b.Float6, c.Float6),
  379. FmaHelper(a.Float7, b.Float7, -c.Float7));
  380. }
  381. /// <summary>
  382. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
  383. /// </summary>
  384. /// <remarks>
  385. /// **** vfnmadd213pd xmm, xmm, xmm
  386. /// </remarks>
  387. /// <param name="a">Vector a</param>
  388. /// <param name="b">Vector b</param>
  389. /// <param name="c">Vector c</param>
  390. /// <returns>Vector</returns>
  391. [DebuggerStepThrough]
  392. public static v128 fnmadd_pd(v128 a, v128 b, v128 c)
  393. {
  394. throw new Exception("Double-precision FMA not emulated in C#");
  395. }
  396. /// <summary>
  397. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
  398. /// </summary>
  399. /// <remarks>
  400. /// **** vfnmadd213pd ymm, ymm, ymm
  401. /// </remarks>
  402. /// <param name="a">Vector a</param>
  403. /// <param name="b">Vector b</param>
  404. /// <param name="c">Vector c</param>
  405. /// <returns>Vector</returns>
  406. [DebuggerStepThrough]
  407. public static v256 mm256_fnmadd_pd(v256 a, v256 b, v256 c)
  408. {
  409. throw new Exception("Double-precision FMA not emulated in C#");
  410. }
  411. /// <summary>
  412. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
  413. /// </summary>
  414. /// <remarks>
  415. /// **** vfnmadd213ps xmm, xmm, xmm
  416. /// </remarks>
  417. /// <param name="a">Vector a</param>
  418. /// <param name="b">Vector b</param>
  419. /// <param name="c">Vector c</param>
  420. /// <returns>Vector</returns>
  421. [DebuggerStepThrough]
  422. public static v128 fnmadd_ps(v128 a, v128 b, v128 c)
  423. {
  424. return new v128(FnmaHelper(a.Float0, b.Float0, c.Float0),
  425. FnmaHelper(a.Float1, b.Float1, c.Float1),
  426. FnmaHelper(a.Float2, b.Float2, c.Float2),
  427. FnmaHelper(a.Float3, b.Float3, c.Float3));
  428. }
  429. /// <summary>
  430. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
  431. /// </summary>
  432. /// <remarks>
  433. /// **** vfnmadd213ps ymm, ymm, ymm
  434. /// </remarks>
  435. /// <param name="a">Vector a</param>
  436. /// <param name="b">Vector b</param>
  437. /// <param name="c">Vector c</param>
  438. /// <returns>Vector</returns>
  439. [DebuggerStepThrough]
  440. public static v256 mm256_fnmadd_ps(v256 a, v256 b, v256 c)
  441. {
  442. return new v256(FnmaHelper(a.Float0, b.Float0, c.Float0),
  443. FnmaHelper(a.Float1, b.Float1, c.Float1),
  444. FnmaHelper(a.Float2, b.Float2, c.Float2),
  445. FnmaHelper(a.Float3, b.Float3, c.Float3),
  446. FnmaHelper(a.Float4, b.Float4, c.Float4),
  447. FnmaHelper(a.Float5, b.Float5, c.Float5),
  448. FnmaHelper(a.Float6, b.Float6, c.Float6),
  449. FnmaHelper(a.Float7, b.Float7, c.Float7));
  450. }
  451. /// <summary>
  452. /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
  453. /// </summary>
  454. /// <remarks>
  455. /// **** vfnmadd213sd xmm, xmm, xmm
  456. /// </remarks>
  457. /// <param name="a">Vector a</param>
  458. /// <param name="b">Vector b</param>
  459. /// <param name="c">Vector c</param>
  460. /// <returns>Vector</returns>
  461. [DebuggerStepThrough]
  462. public static v128 fnmadd_sd(v128 a, v128 b, v128 c)
  463. {
  464. throw new Exception("Double-precision FMA not emulated in C#");
  465. }
  466. /// <summary>
  467. /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
  468. /// </summary>
  469. /// <remarks>
  470. /// **** vfnmadd213ss xmm, xmm, xmm
  471. /// </remarks>
  472. /// <param name="a">Vector a</param>
  473. /// <param name="b">Vector b</param>
  474. /// <param name="c">Vector c</param>
  475. /// <returns>Vector</returns>
  476. [DebuggerStepThrough]
  477. public static v128 fnmadd_ss(v128 a, v128 b, v128 c)
  478. {
  479. var result = a;
  480. result.Float0 = FnmaHelper(a.Float0, b.Float0, c.Float0);
  481. return result;
  482. }
  483. /// <summary>
  484. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
  485. /// </summary>
  486. /// <remarks>
  487. /// **** vfnmsub213pd xmm, xmm, xmm
  488. /// </remarks>
  489. /// <param name="a">Vector a</param>
  490. /// <param name="b">Vector b</param>
  491. /// <param name="c">Vector c</param>
  492. /// <returns>Vector</returns>
  493. [DebuggerStepThrough]
  494. public static v128 fnmsub_pd(v128 a, v128 b, v128 c)
  495. {
  496. throw new Exception("Double-precision FMA not emulated in C#");
  497. }
  498. /// <summary>
  499. /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
  500. /// </summary>
  501. /// <remarks>
  502. /// **** vfnmsub213pd ymm, ymm, ymm
  503. /// </remarks>
  504. /// <param name="a">Vector a</param>
  505. /// <param name="b">Vector b</param>
  506. /// <param name="c">Vector c</param>
  507. /// <returns>Vector</returns>
  508. [DebuggerStepThrough]
  509. public static v256 mm256_fnmsub_pd(v256 a, v256 b, v256 c)
  510. {
  511. throw new Exception("Double-precision FMA not emulated in C#");
  512. }
  513. /// <summary>
  514. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
  515. /// </summary>
  516. /// <remarks>
  517. /// **** vfnmsub213ps xmm, xmm, xmm
  518. /// </remarks>
  519. /// <param name="a">Vector a</param>
  520. /// <param name="b">Vector b</param>
  521. /// <param name="c">Vector c</param>
  522. /// <returns>Vector</returns>
  523. [DebuggerStepThrough]
  524. public static v128 fnmsub_ps(v128 a, v128 b, v128 c)
  525. {
  526. return new v128(FnmaHelper(a.Float0, b.Float0, -c.Float0),
  527. FnmaHelper(a.Float1, b.Float1, -c.Float1),
  528. FnmaHelper(a.Float2, b.Float2, -c.Float2),
  529. FnmaHelper(a.Float3, b.Float3, -c.Float3));
  530. }
  531. /// <summary>
  532. /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
  533. /// </summary>
  534. /// <remarks>
  535. /// **** vfnmsub213ps ymm, ymm, ymm
  536. /// </remarks>
  537. /// <param name="a">Vector a</param>
  538. /// <param name="b">Vector b</param>
  539. /// <param name="c">Vector c</param>
  540. /// <returns>Vector</returns>
  541. [DebuggerStepThrough]
  542. public static v256 mm256_fnmsub_ps(v256 a, v256 b, v256 c)
  543. {
  544. return new v256(FnmaHelper(a.Float0, b.Float0, -c.Float0),
  545. FnmaHelper(a.Float1, b.Float1, -c.Float1),
  546. FnmaHelper(a.Float2, b.Float2, -c.Float2),
  547. FnmaHelper(a.Float3, b.Float3, -c.Float3),
  548. FnmaHelper(a.Float4, b.Float4, -c.Float4),
  549. FnmaHelper(a.Float5, b.Float5, -c.Float5),
  550. FnmaHelper(a.Float6, b.Float6, -c.Float6),
  551. FnmaHelper(a.Float7, b.Float7, -c.Float7));
  552. }
  553. /// <summary>
  554. /// Multiply the lower double-precision(64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result.Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
  555. /// </summary>
  556. /// <remarks>
  557. /// **** vfnmsub213sd xmm, xmm, xmm
  558. /// </remarks>
  559. /// <param name="a">Vector a</param>
  560. /// <param name="b">Vector b</param>
  561. /// <param name="c">Vector c</param>
  562. /// <returns>Vector</returns>
  563. [DebuggerStepThrough]
  564. public static v128 fnmsub_sd(v128 a, v128 b, v128 c)
  565. {
  566. throw new Exception("Double-precision FMA not emulated in C#");
  567. }
  568. /// <summary>
  569. /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
  570. /// </summary>
  571. /// <remarks>
  572. /// **** vfnmsub213ss xmm, xmm, xmm
  573. /// </remarks>
  574. /// <param name="a">Vector a</param>
  575. /// <param name="b">Vector b</param>
  576. /// <param name="c">Vector c</param>
  577. /// <returns>Vector</returns>
  578. [DebuggerStepThrough]
  579. public static v128 fnmsub_ss(v128 a, v128 b, v128 c)
  580. {
  581. var result = a;
  582. result.Float0 = FnmaHelper(a.Float0, b.Float0, -c.Float0);
  583. return result;
  584. }
  585. }
  586. }
  587. }