-
Couldn't load subscription status.
- Fork 15k
[AArch64] Add intrinsics for 9.6 crypto instructions #165545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: None (Lukacma) ChangesThis patch add intrinsics for crpyto instructions defined in ARM-software/acle#411 ACLE proposal Patch is 34.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165545.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d2b7b78b9970f..605cf93d196ff 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1841,6 +1841,23 @@ def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; } +let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in { +def SVAESD_X2 : SInst<"svaesd_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESIMC_X2 : SInst<"svaesdimc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESE_X2 : SInst<"svaese_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESeMC_X2 : SInst<"svaesemc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVAESD_X4 : SInst<"svaesd_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESIMC_X4 : SInst<"svaesdimc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESE_X4 : SInst<"svaese_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESeMC_X4 : SInst<"svaesemc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVPMULL_U64 : SInst<"svpmull[_u64]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMULL_N_U64 : SInst<"svpmull[_n_u64]", "2da", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_U64 : SInst<"svpmlal[_u64]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_N_U64 : SInst<"svpmlal[_n_u64]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +} + let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in { def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>; } diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c new file mode 100644 index 0000000000000..bbe71d9db92cf --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c @@ -0,0 +1,215 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: @test_svaesd_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesdimc_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaese_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaese_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesemc_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesd_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesdimc_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaese_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaese_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesemc_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svpmull_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svpmull_u64u12__SVUint64_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmull_u64(svuint64_t op1, svuint64_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svpmull,_u64,,)(op1, op2); +} + +// CHECK-LABEL: @test_svpmull_n_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svpmull_n_u64u12__SVUint64_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmull_n_u64(svuint64_t op1, uint64_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svpmull,_n_u64,,)(op1, op2); +} + +// CHECK-LABEL: @test_svpmlal_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svpmlal_u6412svuint64x2_tu12__SVUint64_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmlal_u64(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING +{ + return SVE_ACLE_FUNC(svpmlal,_u64,,)(op1, op2, op3); +} + +// CHECK-LABEL: @test_svpmlal_n_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svpmlal_n_u6412svuint64x2_tu12__SVUint64_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmlal_n_u64(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING +{ + return SVE_ACLE_FUNC(svpmlal,_n_u64,,)(op1, op2, op3); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index b1e94e0ce0975..bbeed4402da1b 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -272,9 +272,16 @@ class Intrinsic { Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!') break; + if (Proto[I] == '2') + Param += 1; + if (Proto[I] == '4') + Param += 3; + // Multivector modifier can be skipped - if (Proto[I] == '.') + if (Proto[I] == '.') { + Param -= 1; // Adjust for the increment at the top of the loop I += 2; + } } assert(I != Proto.size() && "Prototype has no splat operand"); return Param; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b81edc385cd43..71949c93f83b0 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + + // AES2 + class SVE2_Crypto_LANE_X2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<3>>, IntrNoMem]>; + class SVE2_Crypto_LANE_X4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<5>>, IntrNoMem]>; + + def int_aarch64_sve_aesd_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_sve_aesdimc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_s... [truncated] |
| @llvm/pr-subscribers-backend-aarch64 Author: None (Lukacma) ChangesThis patch add intrinsics for crpyto instructions defined in ARM-software/acle#411 ACLE proposal Patch is 34.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165545.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d2b7b78b9970f..605cf93d196ff 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1841,6 +1841,23 @@ def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; } +let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in { +def SVAESD_X2 : SInst<"svaesd_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESIMC_X2 : SInst<"svaesdimc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESE_X2 : SInst<"svaese_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESeMC_X2 : SInst<"svaesemc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVAESD_X4 : SInst<"svaesd_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESIMC_X4 : SInst<"svaesdimc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESE_X4 : SInst<"svaese_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESeMC_X4 : SInst<"svaesemc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVPMULL_U64 : SInst<"svpmull[_u64]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMULL_N_U64 : SInst<"svpmull[_n_u64]", "2da", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_U64 : SInst<"svpmlal[_u64]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_N_U64 : SInst<"svpmlal[_n_u64]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +} + let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in { def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>; } diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c new file mode 100644 index 0000000000000..bbe71d9db92cf --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c @@ -0,0 +1,215 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: @test_svaesd_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesdimc_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaese_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaese_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesemc_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesd_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesdimc_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaese_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaese_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesemc_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svpmull_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svpmull_u64u12__SVUint64_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmull_u64(svuint64_t op1, svuint64_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svpmull,_u64,,)(op1, op2); +} + +// CHECK-LABEL: @test_svpmull_n_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svpmull_n_u64u12__SVUint64_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmull_n_u64(svuint64_t op1, uint64_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svpmull,_n_u64,,)(op1, op2); +} + +// CHECK-LABEL: @test_svpmlal_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svpmlal_u6412svuint64x2_tu12__SVUint64_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmlal_u64(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING +{ + return SVE_ACLE_FUNC(svpmlal,_u64,,)(op1, op2, op3); +} + +// CHECK-LABEL: @test_svpmlal_n_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svpmlal_n_u6412svuint64x2_tu12__SVUint64_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmlal_n_u64(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING +{ + return SVE_ACLE_FUNC(svpmlal,_n_u64,,)(op1, op2, op3); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index b1e94e0ce0975..bbeed4402da1b 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -272,9 +272,16 @@ class Intrinsic { Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!') break; + if (Proto[I] == '2') + Param += 1; + if (Proto[I] == '4') + Param += 3; + // Multivector modifier can be skipped - if (Proto[I] == '.') + if (Proto[I] == '.') { + Param -= 1; // Adjust for the increment at the top of the loop I += 2; + } } assert(I != Proto.size() && "Prototype has no splat operand"); return Param; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b81edc385cd43..71949c93f83b0 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + + // AES2 + class SVE2_Crypto_LANE_X2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<3>>, IntrNoMem]>; + class SVE2_Crypto_LANE_X4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<5>>, IntrNoMem]>; + + def int_aarch64_sve_aesd_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_sve_aesdimc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_s... [truncated] |
This patch add intrinsics for crpyto instructions defined in ARM-software/acle#411 ACLE proposal