24 #if !defined(SIMDE__MMX_H) 25 #if !defined(SIMDE__MMX_H) 30 #if defined(SIMDE_MMX_FORCE_NATIVE) 31 #define SIMDE_MMX_NATIVE 32 #elif defined(__MMX__) && !defined(SIMDE_MMX_NO_NATIVE) && \ 33 !defined(SIMDE_NO_NATIVE) 34 #define SIMDE_MMX_NATIVE 35 #elif defined(__ARM_NEON) && !defined(SIMDE_MMX_NO_NEON) && \ 36 !defined(SIMDE_NO_NEON) 37 #define SIMDE_MMX_NEON 40 #if defined(SIMDE_MMX_NATIVE) 43 #if defined(SIMDE_MMX_NEON) 55 #if defined(SIMDE__ENABLE_GCC_VEC_EXT) 56 int8_t i8 __attribute__((__vector_size__(8), __may_alias__));
57 int16_t i16 __attribute__((__vector_size__(8), __may_alias__));
58 int32_t i32 __attribute__((__vector_size__(8), __may_alias__));
59 int64_t i64 __attribute__((__vector_size__(8), __may_alias__));
60 uint8_t u8 __attribute__((__vector_size__(8), __may_alias__));
61 uint16_t u16 __attribute__((__vector_size__(8), __may_alias__));
62 uint32_t u32 __attribute__((__vector_size__(8), __may_alias__));
63 uint64_t u64 __attribute__((__vector_size__(8), __may_alias__));
64 simde_float32 f32 __attribute__((__vector_size__(8), __may_alias__));
77 #if defined(SIMDE_MMX_NATIVE) 79 #elif defined(SIMDE_MMX_NEON) 92 #if defined(SIMDE_MMX_NATIVE) 94 "__m64 size doesn't match simde__m64 size");
101 #elif defined(SIMDE_MMX_NEON) 102 #define SIMDE__M64_NEON_C(T, expr) \ 103 (simde__m64) { .neon_##T = (expr) } 110 #if defined(SIMDE_MMX_NATIVE) 111 return SIMDE__M64_C(_mm_add_pi8(a.n, b.n));
115 for (
size_t i = 0; i < 8; i++) {
116 r.
i8[i] = a.
i8[i] + b.
i8[i];
121 #define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) 126 #if defined(SIMDE_MMX_NATIVE) 127 return SIMDE__M64_C(_mm_add_pi16(a.n, b.n));
131 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
137 #define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) 142 #if defined(SIMDE_MMX_NATIVE) 143 return SIMDE__M64_C(_mm_add_pi32(a.n, b.n));
147 for (
size_t i = 0; i < (8 /
sizeof(int32_t)); i++) {
153 #define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) 158 #if defined(SIMDE_MMX_NATIVE) 159 return SIMDE__M64_C(_mm_adds_pi8(a.n, b.n));
163 for (
int i = 0; i < 8; i++) {
164 if ((((b.
i8[i]) > 0) && ((a.
i8[i]) > (INT8_MAX - (b.
i8[i]))))) {
166 }
else if ((((b.
i8[i]) < 0) &&
167 ((a.
i8[i]) < (INT8_MIN - (b.
i8[i]))))) {
170 r.
i8[i] = (a.
i8[i]) + (b.
i8[i]);
176 #define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) 181 #if defined(SIMDE_MMX_NATIVE) 182 return SIMDE__M64_C(_mm_adds_pu8(a.n, b.n));
186 for (
size_t i = 0; i < 8; i++) {
187 const int32_t x = a.
u8[i] + b.
u8[i];
190 else if (x > UINT8_MAX)
193 r.
u8[i] = (uint8_t)x;
198 #define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) 203 #if defined(SIMDE_MMX_NATIVE) 204 return SIMDE__M64_C(_mm_adds_pi16(a.n, b.n));
208 for (
int i = 0; i < 4; i++) {
209 if ((((b.
i16[i]) > 0) &&
210 ((a.
i16[i]) > (INT16_MAX - (b.
i16[i]))))) {
211 r.
i16[i] = INT16_MAX;
212 }
else if ((((b.
i16[i]) < 0) &&
213 ((a.
i16[i]) < (SHRT_MIN - (b.
i16[i]))))) {
222 #define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) 227 #if defined(SIMDE_MMX_NATIVE) 228 return SIMDE__M64_C(_mm_adds_pu16(a.n, b.n));
232 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
233 const uint32_t x = a.
u16[i] + b.
u16[i];
235 r.
u16[i] = UINT16_MAX;
237 r.
u16[i] = (uint16_t)x;
242 #define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) 247 #if defined(SIMDE_MMX_NATIVE) 248 return SIMDE__M64_C(_mm_and_si64(a.n, b.n));
255 #define simde_m_pand(a, b) simde_mm_and_si64(a, b) 260 #if defined(SIMDE_MMX_NATIVE) 261 return SIMDE__M64_C(_mm_andnot_si64(a.n, b.n));
268 #define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) 273 #if defined(SIMDE_MMX_NATIVE) 274 return SIMDE__M64_C(_mm_cmpeq_pi8(a.n, b.n));
278 for (
int i = 0; i < 8; i++) {
279 r.
i8[i] = (a.
i8[i] == b.
i8[i]) * 0xff;
284 #define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) 289 #if defined(SIMDE_MMX_NATIVE) 290 return SIMDE__M64_C(_mm_cmpeq_pi16(a.n, b.n));
294 for (
int i = 0; i < 4; i++) {
295 r.
i16[i] = (a.
i16[i] == b.
i16[i]) * 0xffff;
300 #define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) 305 #if defined(SIMDE_MMX_NATIVE) 306 return SIMDE__M64_C(_mm_cmpeq_pi32(a.n, b.n));
310 for (
int i = 0; i < 2; i++) {
311 r.
i32[i] = (a.
i32[i] == b.
i32[i]) * 0xffffffff;
316 #define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) 321 #if defined(SIMDE_MMX_NATIVE) 322 return SIMDE__M64_C(_mm_cmpgt_pi8(a.n, b.n));
326 for (
int i = 0; i < 8; i++) {
327 r.
i8[i] = (a.
i8[i] > b.
i8[i]) * 0xff;
332 #define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) 337 #if defined(SIMDE_MMX_NATIVE) 338 return SIMDE__M64_C(_mm_cmpgt_pi16(a.n, b.n));
342 for (
int i = 0; i < 4; i++) {
343 r.
i16[i] = (a.
i16[i] > b.
i16[i]) * 0xffff;
348 #define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) 353 #if defined(SIMDE_MMX_NATIVE) 354 return SIMDE__M64_C(_mm_cmpgt_pi32(a.n, b.n));
358 for (
int i = 0; i < 2; i++) {
359 r.
i32[i] = (a.
i32[i] > b.
i32[i]) * 0xffffffff;
364 #define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) 369 #if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) 370 return _mm_cvtm64_si64(a.n);
375 #define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) 380 #if defined(SIMDE_MMX_NATIVE) 381 return SIMDE__M64_C(_mm_cvtsi32_si64(a));
389 #define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) 394 #if defined(SIMDE_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) 395 return SIMDE__M64_C(_mm_cvtsi64_m64(a));
402 #define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) 407 #if defined(SIMDE_MMX_NATIVE) 408 return _mm_cvtsi64_si32(a.n);
417 #if defined(SIMDE_MMX_NATIVE) 422 #define simde_m_empty() simde_mm_empty() 427 #if defined(SIMDE_MMX_NATIVE) 428 return SIMDE__M64_C(_mm_madd_pi16(a.n, b.n));
432 for (
int i = 0; i < 4; i += 2) {
439 #define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) 444 #if defined(SIMDE_MMX_NATIVE) 445 return SIMDE__M64_C(_mm_mulhi_pi16(a.n, b.n));
449 for (
int i = 0; i < 4; i++) {
450 r.
i16[i] = (int16_t)((a.
i16[i] * b.
i16[i]) >> 16);
455 #define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) 460 #if defined(SIMDE_MMX_NATIVE) 461 return SIMDE__M64_C(_mm_mullo_pi16(a.n, b.n));
465 for (
int i = 0; i < 4; i++) {
466 r.
i16[i] = (int16_t)((a.
i16[i] * b.
i16[i]) & 0xffff);
471 #define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) 476 #if defined(SIMDE_MMX_NATIVE) 477 return SIMDE__M64_C(_mm_or_si64(a.n, b.n));
484 #define simde_m_por(a, b) simde_mm_or_si64(a, b) 489 #if defined(SIMDE_MMX_NATIVE) 490 return SIMDE__M64_C(_mm_packs_pi16(a.n, b.n));
495 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
496 if (a.
i16[i] < INT8_MIN) {
498 }
else if (a.
i16[i] > INT8_MAX) {
501 r.
i8[i] = (int8_t)a.
i16[i];
506 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
507 if (b.
i16[i] < INT8_MIN) {
508 r.
i8[i + 4] = INT8_MIN;
509 }
else if (b.
i16[i] > INT8_MAX) {
510 r.
i8[i + 4] = INT8_MAX;
512 r.
i8[i + 4] = (int8_t)b.
i16[i];
519 #define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) 524 #if defined(SIMDE_MMX_NATIVE) 525 return SIMDE__M64_C(_mm_packs_pi32(a.n, b.n));
530 for (
size_t i = 0; i < (8 /
sizeof(a.
i32[0])); i++) {
531 if (a.
i32[i] < SHRT_MIN) {
533 }
else if (a.
i32[i] > INT16_MAX) {
534 r.
i16[i] = INT16_MAX;
536 r.
i16[i] = (int16_t)a.
i32[i];
541 for (
size_t i = 0; i < (8 /
sizeof(b.
i32[0])); i++) {
542 if (b.
i32[i] < SHRT_MIN) {
543 r.
i16[i + 2] = SHRT_MIN;
544 }
else if (b.
i32[i] > INT16_MAX) {
545 r.
i16[i + 2] = INT16_MAX;
547 r.
i16[i + 2] = (int16_t)b.
i32[i];
554 #define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) 559 #if defined(SIMDE_MMX_NATIVE) 560 return SIMDE__M64_C(_mm_packs_pu16(a.n, b.n));
565 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
566 if (a.
i16[i] > UINT8_MAX) {
568 }
else if (a.
i16[i] < 0) {
571 r.
u8[i] = (int8_t)a.
i16[i];
576 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
577 if (b.
i16[i] > UINT8_MAX) {
578 r.
u8[i + 4] = UINT8_MAX;
579 }
else if (b.
i16[i] < 0) {
582 r.
u8[i + 4] = (int8_t)b.
i16[i];
589 #define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) 593 int8_t e3, int8_t e2, int8_t e1, int8_t e0)
595 #if defined(SIMDE_MMX_NATIVE) 596 return SIMDE__M64_C(_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0));
613 uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
615 #if defined(SIMDE_MMX_NATIVE) 616 return SIMDE__M64_C(_mm_set_pi8((int8_t)e7, (int8_t)e6, (int8_t)e5,
617 (int8_t)e4, (int8_t)e3, (int8_t)e2,
618 (int8_t)e1, (int8_t)e0));
636 #if defined(SIMDE_MMX_NATIVE) 637 return SIMDE__M64_C(_mm_set_pi16(e3, e2, e1, e0));
652 #if defined(SIMDE_MMX_NATIVE) 653 return SIMDE__M64_C(_mm_set_pi16((int16_t)e3, (int16_t)e2, (int16_t)e1,
668 #if defined(SIMDE_MMX_NATIVE) 669 return SIMDE__M64_C(_mm_set_pi32((int32_t)e1, (int32_t)e0));
681 #if defined(SIMDE_MMX_NATIVE) 682 return SIMDE__M64_C(_mm_set_pi32(e1, e0));
694 #if defined(SIMDE_MMX_NATIVE) 695 return SIMDE__M64_C(_mm_set1_pi8(a));
704 #if defined(SIMDE_MMX_NATIVE) 705 return SIMDE__M64_C(_mm_set1_pi16(a));
714 #if defined(SIMDE_MMX_NATIVE) 715 return SIMDE__M64_C(_mm_set1_pi32(a));
723 int8_t e3, int8_t e2, int8_t e1, int8_t e0)
725 #if defined(SIMDE_MMX_NATIVE) 726 return SIMDE__M64_C(_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0));
735 #if defined(SIMDE_MMX_NATIVE) 736 return SIMDE__M64_C(_mm_setr_pi16(e3, e2, e1, e0));
745 #if defined(SIMDE_MMX_NATIVE) 746 return SIMDE__M64_C(_mm_setr_pi32(e1, e0));
755 #if defined(SIMDE_MMX_NATIVE) 756 return SIMDE__M64_C(_mm_setzero_si64());
765 #if defined(SIMDE_MMX_NATIVE) 766 return SIMDE__M64_C(_mm_sll_pi16(a.n, count.n));
771 memset(&r, 0,
sizeof(r));
776 for (
size_t i = 0; i < (
sizeof(r.
u16) /
sizeof(r.
u16[0])); i++) {
782 #define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) 787 #if defined(SIMDE_MMX_NATIVE) 788 return SIMDE__M64_C(_mm_sll_pi32(a.n, count.n));
793 memset(&r, 0,
sizeof(r));
798 for (
size_t i = 0; i < (
sizeof(r.
u32) /
sizeof(r.
u32[0])); i++) {
804 #define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) 809 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 810 return SIMDE__M64_C(_mm_slli_pi16(a.n, count));
815 for (
size_t i = 0; i < (
sizeof(r.
u16) /
sizeof(r.
u16[0])); i++) {
816 r.
u16[i] = a.
u16[i] << count;
822 #define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) 827 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 828 return SIMDE__M64_C(_mm_slli_pi32(a.n, count));
833 for (
size_t i = 0; i < (8 /
sizeof(int)); i++) {
834 r.
u32[i] = a.
u32[i] << count;
840 #define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) 845 #if defined(SIMDE_MMX_NATIVE) 846 return SIMDE__M64_C(_mm_slli_si64(a.n, count));
849 r.
u64[0] = a.
u64[0] << count;
853 #define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) 858 #if defined(SIMDE_MMX_NATIVE) 859 return SIMDE__M64_C(_mm_sll_si64(a.n, count.n));
864 memset(&r, 0,
sizeof(r));
873 #define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) 878 #if defined(SIMDE_MMX_NATIVE) 879 return SIMDE__M64_C(_mm_srl_pi16(a.n, count.n));
884 memset(&r, 0,
sizeof(r));
889 for (
size_t i = 0; i <
sizeof(r.
u16) /
sizeof(r.
u16[0]); i++) {
895 #define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) 900 #if defined(SIMDE_MMX_NATIVE) 901 return SIMDE__M64_C(_mm_srl_pi32(a.n, count.n));
906 memset(&r, 0,
sizeof(r));
911 for (
size_t i = 0; i <
sizeof(r.
u32) /
sizeof(r.
u32[0]); i++) {
917 #define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) 922 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 923 return SIMDE__M64_C(_mm_srli_pi16(a.n, count));
928 for (
size_t i = 0; i < (8 /
sizeof(uint16_t)); i++) {
929 r.
u16[i] = a.
u16[i] >> count;
935 #define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) 940 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 941 return SIMDE__M64_C(_mm_srli_pi32(a.n, count));
946 for (
size_t i = 0; i < (8 /
sizeof(int)); i++) {
947 r.
u32[i] = a.
u32[i] >> count;
953 #define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) 958 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 959 return SIMDE__M64_C(_mm_srli_si64(a.n, count));
962 r.
u64[0] = a.
u64[0] >> count;
966 #define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) 971 #if defined(SIMDE_MMX_NATIVE) 972 return SIMDE__M64_C(_mm_srl_si64(a.n, count.n));
977 memset(&r, 0,
sizeof(r));
985 #define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) 990 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 991 return SIMDE__M64_C(_mm_srai_pi16(a.n, count));
996 (uint16_t)((~0U) << ((
sizeof(int16_t) * CHAR_BIT) - count));
999 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
1000 const uint16_t is_neg = ((uint16_t)(
1001 ((a.
u16[i]) >> ((
sizeof(int16_t) * CHAR_BIT) - 1))));
1002 r.
u16[i] = (a.
u16[i] >> count) | (m * is_neg);
1008 #define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) 1013 #if defined(SIMDE_MMX_NATIVE) && !defined(__PGI) 1014 return SIMDE__M64_C(_mm_srai_pi32(a.n, count));
1019 (uint32_t)((~0U) << ((
sizeof(int) * CHAR_BIT) - count));
1021 for (
size_t i = 0; i < (8 /
sizeof(int)); i++) {
1022 const uint32_t is_neg = ((uint32_t)(
1023 ((a.
u32[i]) >> ((
sizeof(int) * CHAR_BIT) - 1))));
1024 r.
u32[i] = (a.
u32[i] >> count) | (m * is_neg);
1030 #define simde_m_srai_pi32(a, count) simde_mm_srai_pi32(a, count) 1035 #if defined(SIMDE_MMX_NATIVE) 1036 return SIMDE__M64_C(_mm_sra_pi16(a.n, count.n));
1039 int cnt = (int)count.
i64[0];
1041 if (cnt > 15 || cnt < 0) {
1042 for (
size_t i = 0; i < (
sizeof(r.
i16) /
sizeof(r.
i16[0]));
1044 r.
u16[i] = (a.
i16[i] < 0) ? 0xffff : 0x0000;
1047 const uint16_t m = (uint16_t)(
1048 (~0U) << ((
sizeof(int16_t) * CHAR_BIT) - cnt));
1049 for (
size_t i = 0; i < (
sizeof(r.
i16) /
sizeof(r.
i16[0]));
1051 const uint16_t is_neg = a.
i16[i] < 0;
1052 r.
u16[i] = (a.
u16[i] >> cnt) | (m * is_neg);
1059 #define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) 1064 #if defined(SIMDE_MMX_NATIVE) 1065 return SIMDE__M64_C(_mm_sra_pi32(a.n, count.n));
1068 const uint64_t cnt = count.
u64[0];
1071 for (
size_t i = 0; i < (
sizeof(r.
i32) /
sizeof(r.
i32[0]));
1073 r.
u32[i] = (a.
i32[i] < 0) ? UINT32_MAX : 0;
1075 }
else if (cnt == 0) {
1076 memcpy(&r, &a,
sizeof(r));
1078 const uint32_t m = (uint32_t)(
1079 (~0U) << ((
sizeof(int32_t) * CHAR_BIT) - cnt));
1080 for (
size_t i = 0; i < (
sizeof(r.
i32) /
sizeof(r.
i32[0]));
1082 const uint32_t is_neg = a.
i32[i] < 0;
1083 r.
u32[i] = (a.
u32[i] >> cnt) | (m * is_neg);
1090 #define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) 1095 #if defined(SIMDE_MMX_NATIVE) 1096 return SIMDE__M64_C(_mm_sub_pi8(a.n, b.n));
1100 for (
size_t i = 0; i < 8; i++) {
1101 r.
i8[i] = a.
i8[i] - b.
i8[i];
1106 #define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) 1111 #if defined(SIMDE_MMX_NATIVE) 1112 return SIMDE__M64_C(_mm_sub_pi16(a.n, b.n));
1116 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
1122 #define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) 1127 #if defined(SIMDE_MMX_NATIVE) 1128 return SIMDE__M64_C(_mm_sub_pi32(a.n, b.n));
1132 for (
size_t i = 0; i < (8 /
sizeof(int)); i++) {
1138 #define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) 1143 #if defined(SIMDE_MMX_NATIVE) 1144 return SIMDE__M64_C(_mm_subs_pi8(a.n, b.n));
1148 for (
size_t i = 0; i < (8); i++) {
1149 if (((b.
i8[i]) > 0 && (a.
i8[i]) < INT8_MIN + (b.
i8[i]))) {
1151 }
else if ((b.
i8[i]) < 0 && (a.
i8[i]) > INT8_MAX + (b.
i8[i])) {
1154 r.
i8[i] = (a.
i8[i]) - (b.
i8[i]);
1160 #define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) 1165 #if defined(SIMDE_MMX_NATIVE) 1166 return SIMDE__M64_C(_mm_subs_pu8(a.n, b.n));
1170 for (
size_t i = 0; i < (8); i++) {
1171 const int32_t x = a.
u8[i] - b.
u8[i];
1174 }
else if (x > UINT8_MAX) {
1175 r.
u8[i] = UINT8_MAX;
1177 r.
u8[i] = (uint8_t)x;
1183 #define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) 1188 #if defined(SIMDE_MMX_NATIVE) 1189 return SIMDE__M64_C(_mm_subs_pi16(a.n, b.n));
1193 for (
size_t i = 0; i < (8 /
sizeof(int16_t)); i++) {
1194 if (((b.
i16[i]) > 0 && (a.
i16[i]) < SHRT_MIN + (b.
i16[i]))) {
1195 r.
i16[i] = SHRT_MIN;
1196 }
else if ((b.
i16[i]) < 0 &&
1197 (a.
i16[i]) > INT16_MAX + (b.
i16[i])) {
1198 r.
i16[i] = INT16_MAX;
1206 #define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) 1211 #if defined(SIMDE_MMX_NATIVE) 1212 return SIMDE__M64_C(_mm_subs_pu16(a.n, b.n));
1216 for (
size_t i = 0; i < (8 /
sizeof(uint16_t)); i++) {
1217 const int x = a.
u16[i] - b.
u16[i];
1220 }
else if (x > UINT16_MAX) {
1221 r.
u16[i] = UINT16_MAX;
1223 r.
u16[i] = (uint16_t)x;
1229 #define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) 1234 #if defined(SIMDE_MMX_NATIVE) 1235 return SIMDE__M64_C(_mm_unpackhi_pi8(a.n, b.n));
1249 #define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) 1254 #if defined(SIMDE_MMX_NATIVE) 1255 return SIMDE__M64_C(_mm_unpackhi_pi16(a.n, b.n));
1265 #define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) 1270 #if defined(SIMDE_MMX_NATIVE) 1271 return SIMDE__M64_C(_mm_unpackhi_pi32(a.n, b.n));
1279 #define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) 1284 #if defined(SIMDE_MMX_NATIVE) 1285 return SIMDE__M64_C(_mm_unpacklo_pi8(a.n, b.n));
1299 #define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) 1304 #if defined(SIMDE_MMX_NATIVE) 1305 return SIMDE__M64_C(_mm_unpacklo_pi16(a.n, b.n));
1315 #define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) 1320 #if defined(SIMDE_MMX_NATIVE) 1321 return SIMDE__M64_C(_mm_unpacklo_pi32(a.n, b.n));
1329 #define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) 1334 #if defined(SIMDE_MMX_NATIVE) 1335 return SIMDE__M64_C(_mm_xor_si64(a.n, b.n));
1342 #define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) 1347 #if defined(SIMDE_MMX_NATIVE) 1348 return _m_to_int(a.n);
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b)
Definition: mmx.h:225
SIMDE_FLOAT32_TYPE simde_float32
Definition: simde-common.h:150
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:140
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b)
Definition: mmx.h:258
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:1252
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0)
Definition: mmx.h:679
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b)
Definition: mmx.h:179
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0)
Definition: mmx.h:649
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b)
Definition: mmx.h:245
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:1093
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:487
#define HEDLEY_UNLIKELY(expr)
Definition: hedley.h:1066
#define SIMDE__END_DECLS
Definition: simde-common.h:131
int16_t i16[4]
Definition: mmx.h:67
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:319
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:303
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:425
int64_t i64[1]
Definition: mmx.h:69
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0)
Definition: mmx.h:722
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count)
Definition: mmx.h:969
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:1232
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:201
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_pi16(simde__m64 a, int count)
Definition: mmx.h:920
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count)
Definition: mmx.h:785
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi16(int16_t a)
Definition: mmx.h:702
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count)
Definition: mmx.h:1062
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_setzero_si64(void)
Definition: mmx.h:753
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srai_pi32(simde__m64 a, int count)
Definition: mmx.h:1011
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count)
Definition: mmx.h:856
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:335
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0)
Definition: mmx.h:592
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0)
Definition: mmx.h:612
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_si64(simde__m64 a, int count)
Definition: mmx.h:956
SIMDE__FUNCTION_ATTRIBUTES void simde_mm_empty(void)
Definition: mmx.h:415
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_pi32(simde__m64 a, int count)
Definition: mmx.h:825
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:1302
SIMDE__FUNCTION_ATTRIBUTES int64_t simde_mm_cvtm64_si64(simde__m64 a)
Definition: mmx.h:367
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtsi32_si64(int32_t a)
Definition: mmx.h:378
SIMDE__FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsi64_si32(simde__m64 a)
Definition: mmx.h:405
uint32_t u32[2]
Definition: mmx.h:72
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srai_pi16(simde__m64 a, int count)
Definition: mmx.h:988
int32_t i32[2]
Definition: mmx.h:68
#define SIMDE__BEGIN_DECLS
Definition: simde-common.h:130
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count)
Definition: mmx.h:763
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b)
Definition: mmx.h:557
#define SIMDE__FUNCTION_ATTRIBUTES
Definition: simde-common.h:121
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0)
Definition: mmx.h:666
uint8_t u8[8]
Definition: mmx.h:70
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:124
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
Definition: mmx.h:733
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0)
Definition: mmx.h:634
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:271
HEDLEY_STATIC_ASSERT(8==sizeof(simde__m64), "__m64 size incorrect")
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:156
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtsi64_m64(int64_t a)
Definition: mmx.h:392
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count)
Definition: mmx.h:876
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi8(int8_t a)
Definition: mmx.h:692
SIMDE__FUNCTION_ATTRIBUTES int32_t simde_m_to_int(simde__m64 a)
Definition: mmx.h:1345
uint16_t u16[4]
Definition: mmx.h:71
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:1125
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:442
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi32(int32_t a)
Definition: mmx.h:712
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b)
Definition: mmx.h:1163
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:458
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:287
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:1268
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:1282
uint64_t u64[1]
Definition: mmx.h:73
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count)
Definition: mmx.h:1033
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:108
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_si64(simde__m64 a, int count)
Definition: mmx.h:843
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b)
Definition: mmx.h:474
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0)
Definition: mmx.h:743
#define SIMDE__VECTORIZE
Definition: simde-common.h:98
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_pi16(simde__m64 a, int count)
Definition: mmx.h:807
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b)
Definition: mmx.h:1209
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count)
Definition: mmx.h:898
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b)
Definition: mmx.h:1141
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:522
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:351
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b)
Definition: mmx.h:1318
int8_t i8[8]
Definition: mmx.h:66
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b)
Definition: mmx.h:1332
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:1109
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b)
Definition: mmx.h:1186
SIMDE__FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_pi32(simde__m64 a, int count)
Definition: mmx.h:938