Skip to content

Commit 660e87d

Browse files
Milad FaCommit Bot
authored andcommitted
S390 [simd]: optimize vector multiply extend on codegen
Implantation now includes using a combination of multiplly even and odd flowed by a vector merge low or high. vector merge instructions are also added to the simulator. Change-Id: I144c5d07e5e6bd978788a70aacabd61463f93289 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2815562 Commit-Queue: Milad Fa <mfarazma@redhat.com> Reviewed-by: Junliang Yan <junyan@redhat.com> Cr-Commit-Position: refs/heads/master@{#73868}
1 parent a6a2773 commit 660e87d

2 files changed

Lines changed: 74 additions & 40 deletions

File tree

src/compiler/backend/s390/code-generator-s390.cc

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3672,80 +3672,65 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
36723672
Condition(0), Condition(0), Condition(2));
36733673
break;
36743674
}
3675-
#define ASSEMBLE_SIMD_I64X2_EXT_MUL(UNPACK_INSTR) \
3676-
__ UNPACK_INSTR(kScratchDoubleReg, i.InputSimd128Register(0), Condition(0), \
3677-
Condition(0), Condition(2)); \
3678-
__ UNPACK_INSTR(i.OutputSimd128Register(), i.InputSimd128Register(1), \
3679-
Condition(0), Condition(0), Condition(2)); \
3680-
Register scratch_0 = r0; \
3681-
Register scratch_1 = r1; \
3682-
for (int lane = 0; lane < 2; lane++) { \
3683-
__ vlgv(scratch_0, kScratchDoubleReg, MemOperand(r0, lane), Condition(3)); \
3684-
__ vlgv(scratch_1, i.OutputSimd128Register(), MemOperand(r0, lane), \
3685-
Condition(3)); \
3686-
__ MulS64(scratch_0, scratch_1); \
3687-
scratch_0 = r1; \
3688-
scratch_1 = ip; \
3689-
} \
3690-
__ vlvgp(i.OutputSimd128Register(), r0, r1);
3675+
#define EXT_MUL(mul_even, mul_odd, merge, mode) \
3676+
Simd128Register dst = i.OutputSimd128Register(), \
3677+
src0 = i.InputSimd128Register(0), \
3678+
src1 = i.InputSimd128Register(1); \
3679+
__ mul_even(dst, src0, src1, Condition(0), Condition(0), Condition(mode)); \
3680+
__ mul_odd(kScratchDoubleReg, src0, src1, Condition(0), Condition(0), \
3681+
Condition(mode)); \
3682+
__ merge(dst, dst, kScratchDoubleReg, Condition(0), Condition(0), \
3683+
Condition(mode + 1));
36913684
case kS390_I64x2ExtMulLowI32x4S: {
3692-
ASSEMBLE_SIMD_I64X2_EXT_MUL(vupl)
3685+
EXT_MUL(vme, vmo, vmrl, 2)
36933686
break;
36943687
}
36953688
case kS390_I64x2ExtMulHighI32x4S: {
3696-
ASSEMBLE_SIMD_I64X2_EXT_MUL(vuph)
3689+
EXT_MUL(vme, vmo, vmrh, 2)
36973690
break;
36983691
}
36993692
case kS390_I64x2ExtMulLowI32x4U: {
3700-
ASSEMBLE_SIMD_I64X2_EXT_MUL(vupll)
3693+
EXT_MUL(vmle, vmlo, vmrl, 2)
37013694
break;
37023695
}
37033696
case kS390_I64x2ExtMulHighI32x4U: {
3704-
ASSEMBLE_SIMD_I64X2_EXT_MUL(vuplh)
3705-
break;
3706-
}
3707-
#undef ASSEMBLE_SIMD_I64X2_EXT_MUL
3708-
#define ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(UNPACK_INSTR, MODE) \
3709-
__ UNPACK_INSTR(kScratchDoubleReg, i.InputSimd128Register(0), Condition(0), \
3710-
Condition(0), Condition(MODE)); \
3711-
__ UNPACK_INSTR(i.OutputSimd128Register(), i.InputSimd128Register(1), \
3712-
Condition(0), Condition(0), Condition(MODE)); \
3713-
__ vml(i.OutputSimd128Register(), kScratchDoubleReg, \
3714-
i.OutputSimd128Register(), Condition(0), Condition(0), \
3715-
Condition(MODE + 1));
3697+
EXT_MUL(vmle, vmlo, vmrh, 2)
3698+
break;
3699+
}
37163700
case kS390_I32x4ExtMulLowI16x8S: {
3717-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vupl, 1)
3701+
EXT_MUL(vme, vmo, vmrl, 1)
37183702
break;
37193703
}
37203704
case kS390_I32x4ExtMulHighI16x8S: {
3721-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vuph, 1)
3705+
EXT_MUL(vme, vmo, vmrh, 1)
37223706
break;
37233707
}
37243708
case kS390_I32x4ExtMulLowI16x8U: {
3725-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vupll, 1)
3709+
EXT_MUL(vmle, vmlo, vmrl, 1)
37263710
break;
37273711
}
37283712
case kS390_I32x4ExtMulHighI16x8U: {
3729-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vuplh, 1)
3713+
EXT_MUL(vmle, vmlo, vmrh, 1)
37303714
break;
37313715
}
3716+
37323717
case kS390_I16x8ExtMulLowI8x16S: {
3733-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vupl, 0)
3718+
EXT_MUL(vme, vmo, vmrl, 0)
37343719
break;
37353720
}
37363721
case kS390_I16x8ExtMulHighI8x16S: {
3737-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vuph, 0)
3722+
EXT_MUL(vme, vmo, vmrh, 0)
37383723
break;
37393724
}
37403725
case kS390_I16x8ExtMulLowI8x16U: {
3741-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vupll, 0)
3726+
EXT_MUL(vmle, vmlo, vmrl, 0)
37423727
break;
37433728
}
37443729
case kS390_I16x8ExtMulHighI8x16U: {
3745-
ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL(vuplh, 0)
3730+
EXT_MUL(vmle, vmlo, vmrh, 0)
37463731
break;
37473732
}
3748-
#undef ASSEMBLE_SIMD_I32X4_I16X8_EXT_MUL
3733+
#undef EXT_MUL
37493734
#define EXT_ADD_PAIRWISE(lane_size, mul_even, mul_odd) \
37503735
Simd128Register src = i.InputSimd128Register(0); \
37513736
Simd128Register dst = i.OutputSimd128Register(); \

src/execution/s390/simulator-s390.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,8 @@ void Simulator::EvalTableInit() {
772772
V(vsum, VSUM, 0xE764) /* type = VRR_C VECTOR SUM ACROSS WORD */ \
773773
V(vsumg, VSUMG, 0xE765) /* type = VRR_C VECTOR SUM ACROSS DOUBLEWORD */ \
774774
V(vpk, VPK, 0xE794) /* type = VRR_C VECTOR PACK */ \
775+
V(vmrl, VMRL, 0xE760) /* type = VRR_C VECTOR MERGE LOW */ \
776+
V(vmrh, VMRH, 0xE761) /* type = VRR_C VECTOR MERGE HIGH */ \
775777
V(vpks, VPKS, 0xE797) /* type = VRR_B VECTOR PACK SATURATE */ \
776778
V(vpkls, VPKLS, 0xE795) /* type = VRR_B VECTOR PACK LOGICAL SATURATE */ \
777779
V(vupll, VUPLL, 0xE7D4) /* type = VRR_A VECTOR UNPACK LOGICAL LOW */ \
@@ -3397,6 +3399,53 @@ EVALUATE(VSUMG) {
33973399
}
33983400
#undef CASE
33993401

3402+
#define VECTOR_MERGE(type, is_low_side) \
3403+
constexpr size_t index_limit = (kSimd128Size / sizeof(type)) / 2; \
3404+
for (size_t i = 0, source_index = is_low_side ? i + index_limit : i; \
3405+
i < index_limit; i++, source_index++) { \
3406+
set_simd_register_by_lane<type>( \
3407+
r1, 2 * i, get_simd_register_by_lane<type>(r2, source_index)); \
3408+
set_simd_register_by_lane<type>( \
3409+
r1, (2 * i) + 1, get_simd_register_by_lane<type>(r3, source_index)); \
3410+
}
3411+
#define CASE(i, type, is_low_side) \
3412+
case i: { \
3413+
VECTOR_MERGE(type, is_low_side) \
3414+
} break;
3415+
EVALUATE(VMRL) {
3416+
DCHECK_OPCODE(VMRL);
3417+
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
3418+
USE(m6);
3419+
USE(m5);
3420+
switch (m4) {
3421+
CASE(0, int8_t, true);
3422+
CASE(1, int16_t, true);
3423+
CASE(2, int32_t, true);
3424+
CASE(3, int64_t, true);
3425+
default:
3426+
UNREACHABLE();
3427+
}
3428+
return length;
3429+
}
3430+
3431+
EVALUATE(VMRH) {
3432+
DCHECK_OPCODE(VMRH);
3433+
DECODE_VRR_C_INSTRUCTION(r1, r2, r3, m6, m5, m4);
3434+
USE(m6);
3435+
USE(m5);
3436+
switch (m4) {
3437+
CASE(0, int8_t, false);
3438+
CASE(1, int16_t, false);
3439+
CASE(2, int32_t, false);
3440+
CASE(3, int64_t, false);
3441+
default:
3442+
UNREACHABLE();
3443+
}
3444+
return length;
3445+
}
3446+
#undef CASE
3447+
#undef VECTOR_MERGE
3448+
34003449
template <class S, class D>
34013450
void VectorPack(Simulator* sim, int dst, int src1, int src2, bool saturate,
34023451
const D& max = 0, const D& min = 0) {

0 commit comments

Comments
 (0)