Skip to content

Commit e802c15

Browse files
authored
Merge branch 'main' into add-avx2-vpsllq
2 parents b72e387 + e022984 commit e802c15

File tree

8 files changed

+202
-22
lines changed

8 files changed

+202
-22
lines changed

.github/workflows/ci.yml

Lines changed: 125 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,16 @@ on:
44
pull_request:
55
branches: [ main ]
66

7+
env:
8+
HOLLIGHT_COMMIT: "157c99b7bb3a485116dc2bfc4ef3ad00912d883b"
9+
710
jobs:
8-
s2n-bignum-arm-tests:
9-
runs-on: ubuntu-24.04-arm
11+
s2n-bignum-tests:
12+
strategy:
13+
matrix:
14+
arch: [arm, x86]
15+
16+
runs-on: ${{ matrix.arch == 'arm' && 'ubuntu-24.04-arm' || matrix.arch == 'x86' && 'ubuntu-24.04' }}
1017

1118
steps:
1219
- name: Checkout code
@@ -19,28 +26,131 @@ jobs:
1926
2027
- name: Run tests
2128
run: |
22-
cd arm
29+
cd ${{ matrix.arch }}
2330
make
31+
${{ matrix.arch == 'x86' && 'cd ../x86_att && make clobber && make && git diff --exit-code .' || true}}
32+
echo "Make test"
2433
cd ../tests
25-
make complete
34+
CC=gcc make complete
2635
make ctCheck
36+
echo "Make test with valgrind"
37+
make clean
38+
CC=gcc VALGRIND="valgrind --" make complete || echo "(incomplete test, but proceeding)"
39+
echo "Make test (clang, build only)"
40+
make clean
41+
CC=clang make
42+
echo "Make benchmark (build only, for both of those)"
2743
cd ../benchmarks
28-
make
44+
CC=gcc make
45+
make clean
46+
CC=clang make
2947
30-
s2n-bignum-x86-tests:
31-
runs-on: ubuntu-24.04
48+
s2n-bignum-aws-lc-integration:
49+
strategy:
50+
matrix:
51+
arch: [arm, x86]
52+
53+
runs-on: ${{ matrix.arch == 'arm' && 'ubuntu-24.04-arm' || matrix.arch == 'x86' && 'ubuntu-24.04' }}
3254

3355
steps:
3456
- name: Checkout code
3557
uses: actions/checkout@v4
3658

59+
- name: Install dependent packages
60+
run: |
61+
sudo apt update
62+
sudo apt install valgrind cmake golang ninja-build
63+
which ninja
64+
sudo ln -s /usr/bin/ninja /usr/bin/ninja-build
65+
which cmake
66+
ln -s /usr/local/bin/cmake /usr/local/bin/cmake3
67+
3768
- name: Run tests
3869
run: |
39-
cd x86
40-
make
41-
cd ../x86_att && make clobber && make && git diff --exit-code .
42-
cd ../tests
43-
make complete
44-
make ctCheck
45-
cd ../benchmarks
46-
make
70+
echo "Run AWS-LC integration test (GITHUB_REPOSITORY: ${{ github.event.pull_request.head.repo.full_name }}, GITHUB_TARGET: $GITHUB_HEAD_REF)"
71+
git clone https://github.com/aws/aws-lc.git --depth=1
72+
cd aws-lc/third_party/s2n-bignum
73+
rm -rf ./s2n-bignum-imported
74+
GITHUB_REPOSITORY=${{ github.event.pull_request.head.repo.full_name }}.git GITHUB_TARGET=$GITHUB_HEAD_REF ./import.sh
75+
cd ../../../
76+
mkdir aws-lc-build && cd aws-lc-build
77+
cmake3 -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DFIPS=On ../aws-lc
78+
ninja-build run_tests
79+
cmake3 -GNinja -DCMAKE_BUILD_TYPE=RelWithDebInfo -DFIPS=Off ../aws-lc
80+
ninja-build run_tests
81+
82+
s2n-bignum-sematest:
83+
strategy:
84+
matrix:
85+
arch: [arm, x86]
86+
87+
runs-on: ${{ matrix.arch == 'arm' && 'ubuntu-24.04-arm' || matrix.arch == 'x86' && 'ubuntu-24.04' }}
88+
89+
steps:
90+
- name: Checkout code
91+
uses: actions/checkout@v4
92+
93+
- name: Install dependent packages
94+
run: |
95+
sudo apt update
96+
sudo apt install libpcre2-dev ocaml libstring-shellquote-perl libgmp-dev xdot
97+
wget https://raw.githubusercontent.com/ocaml/opam/master/shell/install.sh
98+
chmod +x install.sh
99+
echo "/usr/local/bin" | sh ./install.sh
100+
opam init --disable-sandboxing
101+
102+
- name: Install HOL Light
103+
run: |
104+
git clone https://github.com/jrh13/hol-light.git
105+
cd hol-light
106+
git checkout ${{ env.HOLLIGHT_COMMIT }}
107+
make switch-5
108+
eval $(opam env)
109+
echo $(ocamlc -version)
110+
echo $(camlp5 -v)
111+
HOLLIGHT_USE_MODULE=1 make
112+
cd ..
113+
114+
- name: Run tests
115+
run: |
116+
export HOLDIR=`pwd`/hol-light
117+
cd ${{ matrix.arch }}
118+
make sematest
119+
120+
s2n-bignum-tutorial:
121+
strategy:
122+
matrix:
123+
arch: [arm, x86]
124+
125+
runs-on: ${{ matrix.arch == 'arm' && 'ubuntu-24.04-arm' || matrix.arch == 'x86' && 'ubuntu-24.04' }}
126+
127+
steps:
128+
- name: Checkout code
129+
uses: actions/checkout@v4
130+
131+
- name: Install dependent packages
132+
run: |
133+
sudo apt update
134+
sudo apt install libpcre2-dev ocaml libstring-shellquote-perl libgmp-dev xdot
135+
wget https://raw.githubusercontent.com/ocaml/opam/master/shell/install.sh
136+
chmod +x install.sh
137+
echo "/usr/local/bin" | sh ./install.sh
138+
opam init --disable-sandboxing
139+
140+
- name: Install HOL Light
141+
run: |
142+
git clone https://github.com/jrh13/hol-light.git
143+
cd hol-light
144+
git checkout ${{ env.HOLLIGHT_COMMIT }}
145+
make switch-5
146+
eval $(opam env)
147+
echo $(ocamlc -version)
148+
echo $(camlp5 -v)
149+
HOLLIGHT_USE_MODULE=1 make
150+
cd ..
151+
152+
- name: Run tutorial
153+
run: |
154+
export HOLDIR=`pwd`/hol-light
155+
cd ${{ matrix.arch }}
156+
make tutorial

benchmarks/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ endif
2828

2929
# Benchmarking progfram
3030

31-
benchmark: benchmark.c ../include/s2n-bignum.h ../$(ARCH)/libs2nbignum.a; gcc -O3 -o benchmark benchmark.c -L../$(ARCH) -ls2nbignum -lm
31+
benchmark: benchmark.c ../include/s2n-bignum.h ../$(ARCH)/libs2nbignum.a; $(CC) -O3 -o benchmark benchmark.c -L../$(ARCH) -ls2nbignum -lm
3232

3333
go: benchmark ; ./benchmark
3434

tests/Makefile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,12 @@
1111
# setting: ARCH=x86 or ARCH=arm as appropriate
1212

1313
ARCH=unknown_architecture
14-
VALGRIND=valgrind --
14+
VALGRIND?=
1515

1616
UNAME_RESULT=$(shell uname -m)
1717

1818
ifeq ($(UNAME_RESULT),x86_64)
1919
ARCH=x86
20-
# Enabling valgrind makes test unable to detect BMI/ADX
21-
VALGRIND=
2220
endif
2321

2422
ifeq ($(UNAME_RESULT),aarch64)
@@ -47,7 +45,7 @@ go: test; ./test +100 | grep -v OK
4745

4846
complete: test ctCheck; $(VALGRIND) ./test +500
4947

50-
test: test.c known_value_tests_p384.h tweetnacl_excerpt.c ../include/s2n-bignum.h ../$(ARCH)/libs2nbignum.a; gcc -Wall -std=gnu99 -o test test.c -L../$(ARCH) -ls2nbignum
48+
test: test.c known_value_tests_p384.h tweetnacl_excerpt.c ../include/s2n-bignum.h ../$(ARCH)/libs2nbignum.a; $(CC) -Wall -std=gnu99 -o test test.c -L../$(ARCH) -ls2nbignum -lm
5149

5250
clean: ;
5351
rm -f test

x86/allowed_asm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,13 @@
120120
: testq$
121121
: vpxor$
122122
: vpaddw$
123+
: vpaddd$
123124
: vpand$
124125
: vpmulhw$
125126
: vpmullw$
126127
: vpsllq$
127128
: vpsrad$
129+
: vpsubd$
128130
: vpsubw$
129131
: vpsraw$
130132
: vpsrlw$

x86/proofs/decode.ml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,10 +610,18 @@ let decode_aux = new_definition `!pfxs rex l. decode_aux pfxs rex l =
610610
let sz = vexL_size L in
611611
(read_ModRM rex l >>= \((reg,rm),l).
612612
SOME (VPSUBW (mmreg reg sz) (mmreg v sz) (simd_of_RM sz rm),l))
613+
| [0xfa:8] ->
614+
let sz = vexL_size L in
615+
(read_ModRM rex l >>= \((reg,rm),l).
616+
SOME (VPSUBD (mmreg reg sz) (mmreg v sz) (simd_of_RM sz rm),l))
613617
| [0xfd:8] ->
614618
let sz = vexL_size L in
615619
(read_ModRM rex l >>= \((reg,rm),l).
616620
SOME (VPADDW (mmreg reg sz) (mmreg v sz) (simd_of_RM sz rm),l))
621+
| [0xfe:8] ->
622+
let sz = vexL_size L in
623+
(read_ModRM rex l >>= \((reg,rm),l).
624+
SOME (VPADDD (mmreg reg sz) (mmreg v sz) (simd_of_RM sz rm),l))
617625
| [0x71:8] ->
618626
let sz = vexL_size L in
619627
read_ModRM rex l >>= (\((reg,rm),l).

x86/proofs/instruction.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,13 +305,15 @@ let instruction_INDUCTION,instruction_RECURSION = define_type
305305
| TEST operand operand
306306
| TZCNT operand operand
307307
| VPADDW operand operand operand
308+
| VPADDD operand operand operand
308309
| VPAND operand operand operand
309310
| VPMULHW operand operand operand
310311
| VPMULLW operand operand operand
311312
| VPSLLQ operand operand operand
312313
| VPSRAD operand operand operand
313314
| VPSRAW operand operand operand
314315
| VPSRLW operand operand operand
316+
| VPSUBD operand operand operand
315317
| VPSUBW operand operand operand
316318
| VPXOR operand operand operand
317319
| XCHG operand operand

x86/proofs/simulator.ml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,34 @@ let iclasses = iclasses @
292292
[0x66; 0x0f; 0x1f; 0x84; 0x00; 0x00; 0x00; 0x00; 0x00]; (* NOP_N (Memop Word (%%%% (rax,0,rax))) *)
293293
[0x66; 0x2e; 0x0f; 0x1f; 0x84; 0x00; 0x00; 0x00; 0x00; 0x00]; (* NOP_N (Memop Word (%%%% (rax,0,rax))) *)
294294
[0x66; 0x66; 0x2e; 0x0f; 0x1f; 0x84; 0x00; 0x00; 0x00; 0x00; 0x00]; (* NOP_N (Memop Word (%%%% (rax,0,rax))) *)
295+
[0xc5; 0xfd; 0xfe; 0xc1]; (* VPADDD ymm0, ymm0, ymm1 *)
296+
[0xc5; 0xf5; 0xfe; 0xca]; (* VPADDD ymm1, ymm1, ymm2 *)
297+
[0xc5; 0xed; 0xfe; 0xd3]; (* VPADDD ymm2, ymm2, ymm3 *)
298+
[0xc5; 0xe5; 0xfe; 0xdc]; (* VPADDD ymm3, ymm3, ymm4 *)
299+
[0xc5; 0xe9; 0xfe; 0xcb]; (* VPADDD (%_% xmm1) (%_% xmm2) (%_% xmm3) *)
300+
[0xc5; 0xf9; 0xfe; 0xff]; (* VPADDD (%_% xmm7) (%_% xmm0) (%_% xmm7) *)
301+
[0xc4; 0x41; 0x7d; 0xfe; 0xc1]; (* VPADDD ymm8, ymm8, ymm9 *)
302+
[0xc4; 0x41; 0x6d; 0xfe; 0xd3]; (* VPADDD ymm10, ymm10, ymm11 *)
303+
[0xc4; 0x41; 0x5d; 0xfe; 0xe5]; (* VPADDD ymm12, ymm12, ymm13 *)
304+
[0xc4; 0x41; 0x4d; 0xfe; 0xf7]; (* VPADDD ymm14, ymm14, ymm15 *)
305+
[0xc4; 0x41; 0x19; 0xfe; 0xdd]; (* VPADDD (%_% xmm11) (%_% xmm12) (%_% xmm13) *)
306+
[0xc4; 0x41; 0x11; 0xfe; 0xfc]; (* VPADDD (%_% xmm15) (%_% xmm13) (%_% xmm12) *)
307+
[0xc5; 0xfd; 0xfa; 0xc1]; (* VPSUBD ymm0, ymm0, ymm1 *)
308+
[0xc5; 0xf5; 0xfa; 0xca]; (* VPSUBD ymm1, ymm1, ymm2 *)
309+
[0xc5; 0xed; 0xfa; 0xd3]; (* VPSUBD ymm2, ymm2, ymm3 *)
310+
[0xc5; 0xe5; 0xfa; 0xdc]; (* VPSUBD ymm3, ymm3, ymm4 *)
311+
[0xc5; 0xf9; 0xfa; 0xc1]; (* VPSUBD xmm0, xmm0, xmm1 *)
312+
[0xc5; 0xf1; 0xfa; 0xca]; (* VPSUBD xmm1, xmm1, xmm2 *)
313+
[0xc5; 0xe9; 0xfa; 0xd3]; (* VPSUBD xmm2, xmm2, xmm3 *)
314+
[0xc5; 0xe1; 0xfa; 0xdc]; (* VPSUBD xmm3, xmm3, xmm4 *)
315+
[0xc4; 0x41; 0x7d; 0xfa; 0xc1]; (* VPSUBD ymm8, ymm8, ymm9 *)
316+
[0xc4; 0x41; 0x6d; 0xfa; 0xd3]; (* VPSUBD ymm10, ymm10, ymm11 *)
317+
[0xc4; 0x41; 0x5d; 0xfa; 0xe5]; (* VPSUBD ymm12, ymm12, ymm13 *)
318+
[0xc4; 0x41; 0x4d; 0xfa; 0xf7]; (* VPSUBD ymm14, ymm14, ymm15 *)
319+
[0xc4; 0x41; 0x39; 0xfa; 0xc1]; (* VPSUBD xmm8, xmm8, xmm9 *)
320+
[0xc4; 0x41; 0x29; 0xfa; 0xd3]; (* VPSUBD xmm10, xmm10, xmm11 *)
321+
[0xc4; 0x41; 0x19; 0xfa; 0xe5]; (* VPSUBD xmm12, xmm12, xmm13 *)
322+
[0xc4; 0x41; 0x09; 0xfa; 0xf7]; (* VPSUBD xmm14, xmm14, xmm15 *)
295323
[0xc5; 0x81; 0xf9; 0xf6]; (* VPSUBW (%_% xmm6) (%_% xmm15) (%_% xmm6) *)
296324
[0xc5; 0x85; 0xf9; 0xf6]; (* VPSUBW (%_% ymm6) (%_% ymm15) (%_% ymm6) *)
297325
[0xc5; 0x45; 0xfd; 0xd9]; (* VPADDW (%_% ymm11) (%_% ymm7) (%_% ymm11) *)

x86/proofs/x86.ml

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,17 @@ let x86_VPADDW = new_definition
13111311
let res:(128)word = simd8 word_add (word_zx x) (word_zx y) in
13121312
(dest := (word_zx res):N word) s`;;
13131313

1314+
let x86_VPADDD = new_definition
1315+
`x86_VPADDD dest src1 src2 (s:x86state) =
1316+
let (x:N word) = read src1 s
1317+
and (y:N word) = read src2 s in
1318+
if dimindex(:N) = 256 then
1319+
let res:(256)word = simd8 word_add (word_zx x) (word_zx y) in
1320+
(dest := (word_zx res):N word) s
1321+
else
1322+
let res:(128)word = simd4 word_add (word_zx x) (word_zx y) in
1323+
(dest := (word_zx res):N word) s`;;
1324+
13141325
let x86_VPMULHW = new_definition
13151326
`x86_VPMULHW dest src1 src2 (s:x86state) =
13161327
let (x:N word) = read src1 s
@@ -1345,6 +1356,17 @@ let x86_VPSLLQ = new_definition
13451356
let res:(128)word = usimd2 (\z. word_shl z count) (word_zx x) in
13461357
(dest := (word_zx res):N word) s`;;
13471358

1359+
let x86_VPSUBD = new_definition
1360+
`x86_VPSUBD dest src1 src2 (s:x86state) =
1361+
let (x:N word) = read src1 s
1362+
and (y:N word) = read src2 s in
1363+
if dimindex(:N) = 256 then
1364+
let res:(256)word = simd8 word_sub (word_zx x) (word_zx y) in
1365+
(dest := (word_zx res):N word) s
1366+
else
1367+
let res:(128)word = simd4 word_sub (word_zx x) (word_zx y) in
1368+
(dest := (word_zx res):N word) s`;;
1369+
13481370
let x86_VPSUBW = new_definition
13491371
`x86_VPSUBW dest src1 src2 (s:x86state) =
13501372
let (x:N word) = read src1 s
@@ -2073,6 +2095,10 @@ let x86_execute = define
20732095
(match operand_size dest with
20742096
256 -> x86_VPADDW (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
20752097
| 128 -> x86_VPADDW (OPERAND128 dest s) (OPERAND128 src1 s) (OPERAND128 src2 s)) s
2098+
| VPADDD dest src1 src2 ->
2099+
(match operand_size dest with
2100+
256 -> x86_VPADDD (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
2101+
| 128 -> x86_VPADDD (OPERAND128 dest s) (OPERAND128 src1 s) (OPERAND128 src2 s)) s
20762102
| VPMULHW dest src1 src2 ->
20772103
(match operand_size dest with
20782104
256 -> x86_VPMULHW (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
@@ -2085,6 +2111,10 @@ let x86_execute = define
20852111
(match operand_size dest with
20862112
256 -> x86_VPSLLQ (OPERAND256 dest s) (OPERAND256 src s) (OPERAND8 imm8 s)
20872113
| 128 -> x86_VPSLLQ (OPERAND128 dest s) (OPERAND128 src s) (OPERAND8 imm8 s)) s
2114+
| VPSUBD dest src1 src2 ->
2115+
(match operand_size dest with
2116+
256 -> x86_VPSUBD (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
2117+
| 128 -> x86_VPSUBD (OPERAND128 dest s) (OPERAND128 src1 s) (OPERAND128 src2 s)) s
20882118
| VPSUBW dest src1 src2 ->
20892119
(match operand_size dest with
20902120
256 -> x86_VPSUBW (OPERAND256 dest s) (OPERAND256 src1 s) (OPERAND256 src2 s)
@@ -2827,10 +2857,12 @@ let x86_PADDQ_ALT = EXPAND_SIMD_RULE x86_PADDQ;;
28272857
let x86_PCMPGTD_ALT = EXPAND_SIMD_RULE x86_PCMPGTD;;
28282858
let x86_PSHUFD_ALT = EXPAND_SIMD_RULE x86_PSHUFD;;
28292859
let x86_PSRAD_ALT = EXPAND_SIMD_RULE x86_PSRAD;;
2860+
let x86_VPADDD_ALT = EXPAND_SIMD_RULE x86_VPADDD;;
28302861
let x86_VPADDW_ALT = EXPAND_SIMD_RULE x86_VPADDW;;
28312862
let x86_VPMULHW_ALT = EXPAND_SIMD_RULE x86_VPMULHW;;
28322863
let x86_VPMULLW_ALT = EXPAND_SIMD_RULE x86_VPMULLW;;
28332864
let x86_VPSLLQ_ALT = EXPAND_SIMD_RULE x86_VPSLLQ;;
2865+
let x86_VPSUBD_ALT = EXPAND_SIMD_RULE x86_VPSUBD;;
28342866
let x86_VPSUBW_ALT = EXPAND_SIMD_RULE x86_VPSUBW;;
28352867
let x86_VPSRAD_ALT = EXPAND_SIMD_RULE x86_VPSRAD;;
28362868
let x86_VPSRAW_ALT = EXPAND_SIMD_RULE x86_VPSRAW;;
@@ -2853,8 +2885,8 @@ let X86_OPERATION_CLAUSES =
28532885
x86_SAR; x86_SBB_ALT; x86_SET; x86_SHL; x86_SHLD; x86_SHR; x86_SHRD;
28542886
x86_STC; x86_SUB_ALT; x86_TEST; x86_TZCNT; x86_XCHG; x86_XOR;
28552887
(*** AVX2 instructions ***)
2856-
x86_VPADDW_ALT; x86_VPMULHW_ALT; x86_VPMULLW_ALT; x86_VPSUBW_ALT;
2857-
x86_VPXOR; x86_VPAND; x86_VPSRAD_ALT; x86_VPSRAW_ALT; x86_VPSRLW_ALT;
2888+
x86_VPADDD_ALT; x86_VPADDW_ALT; x86_VPMULHW_ALT; x86_VPMULLW_ALT; x86_VPSUBD_ALT;
2889+
x86_VPSUBW_ALT; x86_VPXOR; x86_VPAND; x86_VPSRAD_ALT; x86_VPSRAW_ALT; x86_VPSRLW_ALT;
28582890
x86_VPSLLQ_ALT;
28592891
(*** 32-bit backups since the ALT forms are 64-bit only ***)
28602892
INST_TYPE[`:32`,`:N`] x86_ADC;

0 commit comments

Comments
 (0)