@@ -1311,6 +1311,17 @@ let x86_VPADDW = new_definition
13111311 let res:(128 )word = simd8 word_add (word_zx x) (word_zx y) in
13121312 (dest := (word_zx res):N word) s`;;
13131313
1314+ let x86_VPADDD = new_definition
1315+ `x86_VPADDD dest src1 src2 (s:x86state) =
1316+ let (x:N word) = read src1 s
1317+ and (y:N word) = read src2 s in
1318+ if dimindex(:N) = 256 then
1319+ let res:(256 )word = simd8 word_add (word_zx x) (word_zx y) in
1320+ (dest := (word_zx res):N word) s
1321+ else
1322+ let res:(128 )word = simd4 word_add (word_zx x) (word_zx y) in
1323+ (dest := (word_zx res):N word) s`;;
1324+
13141325let x86_VPMULHW = new_definition
13151326 `x86_VPMULHW dest src1 src2 (s:x86state) =
13161327 let (x:N word) = read src1 s
@@ -1345,6 +1356,17 @@ let x86_VPSLLQ = new_definition
13451356 let res:(128 )word = usimd2 (\z. word_shl z count) (word_zx x) in
13461357 (dest := (word_zx res):N word) s`;;
13471358
1359+ let x86_VPSUBD = new_definition
1360+ `x86_VPSUBD dest src1 src2 (s:x86state) =
1361+ let (x:N word) = read src1 s
1362+ and (y:N word) = read src2 s in
1363+ if dimindex(:N) = 256 then
1364+ let res:(256 )word = simd8 word_sub (word_zx x) (word_zx y) in
1365+ (dest := (word_zx res):N word) s
1366+ else
1367+ let res:(128 )word = simd4 word_sub (word_zx x) (word_zx y) in
1368+ (dest := (word_zx res):N word) s`;;
1369+
13481370let x86_VPSUBW = new_definition
13491371 `x86_VPSUBW dest src1 src2 (s:x86state) =
13501372 let (x:N word) = read src1 s
@@ -2073,6 +2095,10 @@ let x86_execute = define
20732095 (match operand_size dest with
20742096 256 -> x86_VPADDW (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
20752097 | 128 -> x86_VPADDW (OPERAND128 dest s) (OPERAND128 src1 s) (OPERAND128 src2 s)) s
2098+ | VPADDD dest src1 src2 ->
2099+ (match operand_size dest with
2100+ 256 -> x86_VPADDD (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
2101+ | 128 -> x86_VPADDD (OPERAND128 dest s) (OPERAND128 src1 s) (OPERAND128 src2 s)) s
20762102 | VPMULHW dest src1 src2 ->
20772103 (match operand_size dest with
20782104 256 -> x86_VPMULHW (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
@@ -2085,6 +2111,10 @@ let x86_execute = define
20852111 (match operand_size dest with
20862112 256 -> x86_VPSLLQ (OPERAND256 dest s) (OPERAND256 src s) (OPERAND8 imm8 s)
20872113 | 128 -> x86_VPSLLQ (OPERAND128 dest s) (OPERAND128 src s) (OPERAND8 imm8 s)) s
2114+ | VPSUBD dest src1 src2 ->
2115+ (match operand_size dest with
2116+ 256 -> x86_VPSUBD (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
2117+ | 128 -> x86_VPSUBD (OPERAND128 dest s) (OPERAND128 src1 s) (OPERAND128 src2 s)) s
20882118 | VPSUBW dest src1 src2 ->
20892119 (match operand_size dest with
20902120 256 -> x86_VPSUBW (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
@@ -2827,10 +2857,12 @@ let x86_PADDQ_ALT = EXPAND_SIMD_RULE x86_PADDQ;;
28272857let x86_PCMPGTD_ALT = EXPAND_SIMD_RULE x86_PCMPGTD;;
28282858let x86_PSHUFD_ALT = EXPAND_SIMD_RULE x86_PSHUFD;;
28292859let x86_PSRAD_ALT = EXPAND_SIMD_RULE x86_PSRAD;;
2860+ let x86_VPADDD_ALT = EXPAND_SIMD_RULE x86_VPADDD;;
28302861let x86_VPADDW_ALT = EXPAND_SIMD_RULE x86_VPADDW;;
28312862let x86_VPMULHW_ALT = EXPAND_SIMD_RULE x86_VPMULHW;;
28322863let x86_VPMULLW_ALT = EXPAND_SIMD_RULE x86_VPMULLW;;
28332864let x86_VPSLLQ_ALT = EXPAND_SIMD_RULE x86_VPSLLQ;;
2865+ let x86_VPSUBD_ALT = EXPAND_SIMD_RULE x86_VPSUBD;;
28342866let x86_VPSUBW_ALT = EXPAND_SIMD_RULE x86_VPSUBW;;
28352867let x86_VPSRAD_ALT = EXPAND_SIMD_RULE x86_VPSRAD;;
28362868let x86_VPSRAW_ALT = EXPAND_SIMD_RULE x86_VPSRAW;;
@@ -2853,8 +2885,8 @@ let X86_OPERATION_CLAUSES =
28532885 x86_SAR; x86_SBB_ALT; x86_SET; x86_SHL; x86_SHLD; x86_SHR; x86_SHRD;
28542886 x86_STC; x86_SUB_ALT; x86_TEST; x86_TZCNT; x86_XCHG; x86_XOR;
28552887 (* ** AVX2 instructions ***)
2856- x86_VPADDW_ALT; x86_VPMULHW_ALT; x86_VPMULLW_ALT; x86_VPSUBW_ALT ;
2857- x86_VPXOR; x86_VPAND; x86_VPSRAD_ALT; x86_VPSRAW_ALT; x86_VPSRLW_ALT;
2888+ x86_VPADDD_ALT; x86_VPADDW_ALT; x86_VPMULHW_ALT; x86_VPMULLW_ALT; x86_VPSUBD_ALT ;
2889+ x86_VPSUBW_ALT; x86_VPXOR; x86_VPAND; x86_VPSRAD_ALT; x86_VPSRAW_ALT; x86_VPSRLW_ALT;
28582890 x86_VPSLLQ_ALT;
28592891 (* ** 32-bit backups since the ALT forms are 64-bit only ***)
28602892 INST_TYPE[`:32 `,`:N`] x86_ADC;
0 commit comments