// Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. // Copyright (c) 2020, 2022, Arm Limited. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License version 2 only, as // published by the Free Software Foundation. // // This code is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // version 2 for more details (a copy is included in the LICENSE file that // accompanied this code). // // You should have received a copy of the GNU General Public License version // 2 along with this work; if not, write to the Free Software Foundation, // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. // // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. // // dnl Generate the warning // This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ---- dnl // AArch64 NEON Architecture Description File dnl define(`ORL2I', `ifelse($1,I,orL2I)')dnl dnl define(`error', `__program__:__file__:__line__: Invalid argument ``$1''m4exit(`1')')dnl dnl define(`iTYPE2SIMD', `ifelse($1, `B', `B', $1, `S', `H', $1, `I', `S', $1, `L', `D', `error($1)')')dnl dnl define(`fTYPE2SIMD', `ifelse($1, `F', `S', $1, `D', `D', `error($1)')')dnl dnl define(`TYPE2DATATYPE', `ifelse($1, `B', `BYTE', $1, `S', `SHORT', $1, `I', `INT', $1, `L', `LONG', $1, `F', `FLOAT', $1, `D', `DOUBLE', `error($1)')')dnl dnl // ====================VECTOR INSTRUCTIONS================================== // ------------------------------ Load/store/reinterpret ----------------------- define(`VLoadStore', ` // ifelse(load, $3, Load, Store) Vector ($6 bits) instruct $3V$4`'(vec$5 $7, vmem$4 mem) %{ predicate($8`n->as_'ifelse(load, $3, Load, Store)Vector()->memory_size() == $4); match(Set ifelse(load, $3, dst (LoadVector mem), mem (StoreVector mem src))); ins_cost(4 * INSN_COST); format %{ "$1 ifelse(load, $3, `$dst,$mem', `$mem,$src')\t# vector ($6 bits)" %} ins_encode( `aarch64_enc_'ifelse(load, $3, ldr, str)v$2($7, mem) ); ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64)); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 $8 VLoadStore(ldrh, H, load, 2, D, 16, dst, UseSVE == 0 && ) VLoadStore(ldrs, S, load, 4, D, 32, dst, UseSVE == 0 && ) VLoadStore(ldrd, D, load, 8, D, 64, dst, UseSVE == 0 && ) VLoadStore(ldrq, Q, load, 16, X, 128, dst, UseSVE == 0 && ) VLoadStore(strh, H, store, 2, D, 16, src, ) VLoadStore(strs, S, store, 4, D, 32, src, ) VLoadStore(strd, D, store, 8, D, 64, src, ) VLoadStore(strq, Q, store, 16, X, 128, src, ) dnl define(`REINTERPRET', ` instruct reinterpret$1`'(vec$1 dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2); match(Set dst (VectorReinterpret dst)); ins_cost(0); format %{ " # reinterpret $dst" %} ins_encode %{ // empty %} ins_pipe(pipe_class_empty); %}')dnl dnl $1 $2 REINTERPRET(D, 8) REINTERPRET(X, 16) dnl define(`REINTERPRET_DX', ` instruct reinterpret$1`'2$2`'(vec$2 dst, vec$1 src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $3 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $4); match(Set dst (VectorReinterpret src)); ins_cost(INSN_COST); format %{ " # reinterpret $dst,$src\t# $1 to $2" %} ins_encode %{ // The higher 64-bits of the "dst" register must be cleared to zero. __ orr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); %} ins_pipe(vlogical64); %}')dnl dnl $1 $2 $3 $4 REINTERPRET_DX(D, X, 16, 8) REINTERPRET_DX(X, D, 8, 16) dnl define(`REINTERPRET_SX', ` instruct reinterpret$1`'2$2`'(vec$3 dst, vec$4 src) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $5 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $6); match(Set dst (VectorReinterpret src)); ins_cost(INSN_COST); format %{ " # reinterpret $dst,$src\t# $1 to $2" %} ins_encode %{ // The higher bits of the "dst" register must be cleared to zero. __ dup(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 $5 $6 REINTERPRET_SX(S, X, X, D, 16, 4) REINTERPRET_SX(X, S, D, X, 4, 16) REINTERPRET_SX(S, D, D, D, 8, 4) REINTERPRET_SX(D, S, D, D, 4, 8) dnl // ------------------------------ Vector cast ------------------------------- dnl define(`VECTOR_CAST_I2I', ` instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); match(Set dst (VectorCast$2`'2X src)); format %{ "$6 $dst, T$8, $src, T$7\t# convert $1$2 to $1$3 vector" %} ins_encode %{ __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 $8 VECTOR_CAST_I2I(8, B, S, X, D, sxtl, 8B, 8H) VECTOR_CAST_I2I(4, B, S, D, D, sxtl, 8B, 8H) VECTOR_CAST_I2I(8, S, B, D, X, xtn, 8H, 8B) VECTOR_CAST_I2I(4, S, B, D, D, xtn, 8H, 8B) VECTOR_CAST_I2I(4, S, I, X, D, sxtl, 4H, 4S) VECTOR_CAST_I2I(4, I, S, D, X, xtn, 4S, 4H) VECTOR_CAST_I2I(2, I, L, X, D, sxtl, 2S, 2D) VECTOR_CAST_I2I(2, L, I, D, X, xtn, 2D, 2S) dnl define(`VECTOR_CAST_I2I_L', ` instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); match(Set dst (VectorCast$2`'2X src)); format %{ "$6 $dst, T$8, $src, T$7\n\t" "$6 $dst, T$10, $dst, T$9\t# convert $1$2 to $1$3 vector" %} ins_encode %{ __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7); __ $6(as_FloatRegister($dst$$reg), __ T$10, as_FloatRegister($dst$$reg), __ T$9); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 VECTOR_CAST_I2I_L(4, I, B, D, X, xtn, 4S, 4H, 8H, 8B) VECTOR_CAST_I2I_L(4, B, I, X, D, sxtl, 8B, 8H, 4H, 4S) dnl instruct vcvt2Lto2F(vecD dst, vecX src, vRegF tmp) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastL2X src)); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov rscratch1, $src, D, 0\n\t" "scvtfs $dst, rscratch1\n\t" "umov rscratch1, $src, D, 1\n\t" "scvtfs $tmp, rscratch1\n\t" "ins $dst, S, $tmp, 1, 0\t# convert 2L to 2F vector" %} ins_encode %{ __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0); __ scvtfs(as_FloatRegister($dst$$reg), rscratch1); __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1); __ scvtfs(as_FloatRegister($tmp$$reg), rscratch1); __ ins(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg), 1, 0); %} ins_pipe(pipe_slow); %} dnl define(`VECTOR_CAST_I2F', ` instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); match(Set dst (VectorCast$2`'2X src)); format %{ "scvtfv T$5, $dst, $src\t# convert $1$2 to $1$3 vector" %} ins_encode %{ __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 VECTOR_CAST_I2F(2, I, F, D, 2S) VECTOR_CAST_I2F(4, I, F, X, 4S) VECTOR_CAST_I2F(2, L, D, X, 2D) dnl define(`VECTOR_CAST_I2F_L', ` instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); match(Set dst (VectorCast$2`'2X src)); format %{ "sxtl $dst, T$7, $src, T$6\n\t" "scvtfv T$7, $dst, $dst\t# convert $1$2 to $1$3 vector" %} ins_encode %{ __ sxtl(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6); __ scvtfv(__ T$7, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 VECTOR_CAST_I2F_L(4, S, F, X, D, 4H, 4S) VECTOR_CAST_I2F_L(2, I, D, X, D, 2S, 2D) dnl instruct vcvt4Bto4F(vecX dst, vecD src) %{ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); match(Set dst (VectorCastB2X src)); format %{ "sxtl $dst, T8H, $src, T8B\n\t" "sxtl $dst, T4S, $dst, T4H\n\t" "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector" %} ins_encode %{ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} instruct vcvt2Fto2L(vecX dst, vecD src) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (VectorCastF2X src)); format %{ "fcvtl $dst, T2D, $src, T2S\n\t" "fcvtzs $dst, T2D, $dst\t# convert 2F to 2L vector" %} ins_encode %{ __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S); __ fcvtzs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} dnl define(`VECTOR_CAST_F2I', ` instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); match(Set dst (VectorCast$2`'2X src)); format %{ "fcvtzs $dst, T$5, $src\t# convert $1$2 to $1$3 vector" %} ins_encode %{ __ fcvtzs(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 VECTOR_CAST_F2I(2, F, I, D, 2S) VECTOR_CAST_F2I(4, F, I, X, 4S) VECTOR_CAST_F2I(2, D, L, X, 2D) instruct vcvt4Fto4S(vecD dst, vecX src) %{ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorCastF2X src)); format %{ "fcvtzs $dst, T4S, $src\n\t" "xtn $dst, T4H, $dst, T4S\t# convert 4F to 4S vector" %} ins_encode %{ __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S); %} ins_pipe(pipe_slow); %} instruct vcvt2Dto2I(vecD dst, vecX src) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (VectorCastD2X src)); effect(TEMP_DEF dst); format %{ "ins $dst, D, $src, 0, 1\n\t" "fcvtzdw rscratch1, $src\n\t" "fcvtzdw rscratch2, $dst\n\t" "fmovs $dst, rscratch1\n\t" "mov $dst, S, 1, rscratch2\t#convert 2D to 2I vector" %} ins_encode %{ __ ins(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), 0, 1); // We can't use fcvtzs(vector, integer) instruction here because we need // saturation arithmetic. See JDK-8276151. __ fcvtzdw(rscratch1, as_FloatRegister($src$$reg)); __ fcvtzdw(rscratch2, as_FloatRegister($dst$$reg)); __ fmovs(as_FloatRegister($dst$$reg), rscratch1); __ mov(as_FloatRegister($dst$$reg), __ S, 1, rscratch2); %} ins_pipe(pipe_slow); %} instruct vcvt4Fto4B(vecD dst, vecX src) %{ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorCastF2X src)); format %{ "fcvtzs $dst, T4S, $src\n\t" "xtn $dst, T4H, $dst, T4S\n\t" "xtn $dst, T8B, $dst, T8H\t# convert 4F to 4B vector" %} ins_encode %{ __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S); __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); %} ins_pipe(pipe_slow); %} dnl define(`VECTOR_CAST_F2F', ` instruct vcvt2$1to2$2`'(vec$3 dst, vec$4 src) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); match(Set dst (VectorCast$1`'2X src)); format %{ "$5 $dst, T$7, $src, T$6\t# convert 2$1 to 2$2 vector" %} ins_encode %{ __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D) VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S) dnl define(`VECTOR_JAVA_FROUND', ` instruct vround$7$2to$5$3($7 dst, $7 src, $7 tmp1, $7 tmp2, $7 tmp3) %{ predicate(UseSVE == 0 && n->as_Vector()->length() == $5 && n->bottom_type()->is_vect()->element_basic_type() == T_$6); match(Set dst (RoundV$1 src)); effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); format %{ "vround $dst, $4, $src\t# round $7 $2 to $5$3 vector" %} ins_encode %{ __ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg), as_FloatRegister($tmp3$$reg), __ $4); %} ins_pipe(pipe_class_default); %}')dnl $1 $2 $3 $4 $5 $6 $7 VECTOR_JAVA_FROUND(F, 2F, I, T2S, 2, INT, vecD) VECTOR_JAVA_FROUND(F, 4F, I, T4S, 4, INT, vecX) VECTOR_JAVA_FROUND(D, 2D, L, T2D, 2, LONG, vecX) // ------------------------------ Reduction ------------------------------- dnl define(`REDUCE_ADD_BORS', ` instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); match(Set dst (AddReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "addv $tmp, T$1`'iTYPE2SIMD($2), $vsrc\n\t" "smov $dst, $tmp, iTYPE2SIMD($2), 0\n\t" "addw $dst, $dst, $isrc\n\t" "sxt$4 $dst, $dst\t# add reduction$1$2" %} ins_encode %{ __ addv(as_FloatRegister($tmp$$reg), __ T$1`'iTYPE2SIMD($2), as_FloatRegister($vsrc$$reg)); __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($2), 0); __ addw($dst$$Register, $dst$$Register, $isrc$$Register); __ sxt$4($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 REDUCE_ADD_BORS(8, B, D, b) REDUCE_ADD_BORS(16, B, X, b) REDUCE_ADD_BORS(4, S, D, h) REDUCE_ADD_BORS(8, S, X, h) dnl instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp) %{ match(Set dst (AddReductionVL isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "addpd $tmp, $vsrc\n\t" "umov $dst, $tmp, D, 0\n\t" "add $dst, $isrc, $dst\t# add reduction2L" %} ins_encode %{ __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg)); __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0); __ add($dst$$Register, $isrc$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %} instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t" "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t" "ins $vtmp2, H, $vtmp1, 0, 1\n\t" "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t" "umov $itmp, $vtmp2, B, 0\n\t" "mulw $dst, $itmp, $isrc\n\t" "sxtb $dst, $dst\n\t" "umov $itmp, $vtmp2, B, 1\n\t" "mulw $dst, $itmp, $dst\n\t" "sxtb $dst, $dst\t# mul reduction8B" %} ins_encode %{ __ ins(as_FloatRegister($vtmp1$$reg), __ S, as_FloatRegister($vsrc$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); __ ins(as_FloatRegister($vtmp2$$reg), __ H, as_FloatRegister($vtmp1$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B, as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0); __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); __ sxtb($dst$$Register, $dst$$Register); __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1); __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); __ sxtb($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %} instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t" "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t" "ins $vtmp2, S, $vtmp1, 0, 1\n\t" "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t" "ins $vtmp2, H, $vtmp1, 0, 1\n\t" "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t" "umov $itmp, $vtmp2, B, 0\n\t" "mulw $dst, $itmp, $isrc\n\t" "sxtb $dst, $dst\n\t" "umov $itmp, $vtmp2, B, 1\n\t" "mulw $dst, $itmp, $dst\n\t" "sxtb $dst, $dst\t# mul reduction16B" %} ins_encode %{ __ ins(as_FloatRegister($vtmp1$$reg), __ D, as_FloatRegister($vsrc$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); __ ins(as_FloatRegister($vtmp2$$reg), __ S, as_FloatRegister($vtmp1$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); __ ins(as_FloatRegister($vtmp2$$reg), __ H, as_FloatRegister($vtmp1$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B, as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0); __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); __ sxtb($dst$$Register, $dst$$Register); __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1); __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); __ sxtb($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %} instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp); format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t" "mulv $vtmp, T4H, $vtmp, $vsrc\n\t" "umov $itmp, $vtmp, H, 0\n\t" "mulw $dst, $itmp, $isrc\n\t" "sxth $dst, $dst\n\t" "umov $itmp, $vtmp, H, 1\n\t" "mulw $dst, $itmp, $dst\n\t" "sxth $dst, $dst\t# mul reduction4S" %} ins_encode %{ __ ins(as_FloatRegister($vtmp$$reg), __ S, as_FloatRegister($vsrc$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp$$reg), __ T4H, as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0); __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); __ sxth($dst$$Register, $dst$$Register); __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1); __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); __ sxth($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %} instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t" "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t" "ins $vtmp2, S, $vtmp1, 0, 1\n\t" "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t" "umov $itmp, $vtmp2, H, 0\n\t" "mulw $dst, $itmp, $isrc\n\t" "sxth $dst, $dst\n\t" "umov $itmp, $vtmp2, H, 1\n\t" "mulw $dst, $itmp, $dst\n\t" "sxth $dst, $dst\t# mul reduction8S" %} ins_encode %{ __ ins(as_FloatRegister($vtmp1$$reg), __ D, as_FloatRegister($vsrc$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H, as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); __ ins(as_FloatRegister($vtmp2$$reg), __ S, as_FloatRegister($vtmp1$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H, as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0); __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); __ sxth($dst$$Register, $dst$$Register); __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1); __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); __ sxth($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %} instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) %{ match(Set dst (MulReductionVL isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, D, 0\n\t" "mul $dst, $isrc, $tmp\n\t" "umov $tmp, $vsrc, D, 1\n\t" "mul $dst, $dst, $tmp\t# mul reduction2L" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); __ mul($dst$$Register, $isrc$$Register, $tmp$$Register); __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); __ mul($dst$$Register, $dst$$Register, $tmp$$Register); %} ins_pipe(pipe_slow); %} dnl define(`REDUCE_MAX_MIN_INT', ` instruct reduce_$1$2$3`'(iRegINoSp dst, iRegIorL2I isrc, vec$4 vsrc, vec$4 tmp, rFlagsReg cr) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); match(Set dst ($5ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp, KILL cr); format %{ "s$1v $tmp, T$2`'iTYPE2SIMD($3), $vsrc\n\t" "$6mov $dst, $tmp, iTYPE2SIMD($3), 0\n\t" "cmpw $dst, $isrc\n\t" "cselw $dst, $dst, $isrc $7\t# $1 reduction$2$3" %} ins_encode %{ __ s$1v(as_FloatRegister($tmp$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($vsrc$$reg)); __ $6mov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($3), 0); __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$7); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 REDUCE_MAX_MIN_INT(max, 8, B, D, Max, s, GT) REDUCE_MAX_MIN_INT(max, 16, B, X, Max, s, GT) REDUCE_MAX_MIN_INT(max, 4, S, D, Max, s, GT) REDUCE_MAX_MIN_INT(max, 8, S, X, Max, s, GT) REDUCE_MAX_MIN_INT(max, 4, I, X, Max, u, GT) REDUCE_MAX_MIN_INT(min, 8, B, D, Min, s, LT) REDUCE_MAX_MIN_INT(min, 16, B, X, Min, s, LT) REDUCE_MAX_MIN_INT(min, 4, S, D, Min, s, LT) REDUCE_MAX_MIN_INT(min, 8, S, X, Min, s, LT) REDUCE_MAX_MIN_INT(min, 4, I, X, Min, u, LT) dnl define(`REDUCE_MAX_MIN_2I', ` instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp, KILL cr); format %{ "s$1p $tmp, T2S, $vsrc, $vsrc\n\t" "umov $dst, $tmp, S, 0\n\t" "cmpw $dst, $isrc\n\t" "cselw $dst, $dst, $isrc $3\t# $1 reduction2I" %} ins_encode %{ __ s$1p(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg)); __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0); __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 REDUCE_MAX_MIN_2I(max, Max, GT) REDUCE_MAX_MIN_2I(min, Min, LT) dnl define(`REDUCE_MAX_MIN_2L', ` instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp, KILL cr); format %{ "umov $tmp, $vsrc, D, 0\n\t" "cmp $isrc,$tmp\n\t" "csel $dst, $isrc, $tmp $3\n\t" "umov $tmp, $vsrc, D, 1\n\t" "cmp $dst, $tmp\n\t" "csel $dst, $dst, $tmp $3\t# $1 reduction2L" %} ins_encode %{ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0); __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg)); __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::$3); __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1); __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg)); __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::$3); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 REDUCE_MAX_MIN_2L(max, Max, GT) REDUCE_MAX_MIN_2L(min, Min, LT) dnl define(`REDUCE_MINMAX_FORD', ` instruct reduce_$1$4$5(vReg$5 dst, vReg$5 $6src, vec$7 vsrc) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'ifelse($5, F, FLOAT, DOUBLE)); match(Set dst (ifelse($1, max, Max, Min)ReductionV $6src vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst); format %{ "$2 $dst, ifelse($4, 2, $vsrc`, 'ifelse($5, F, S, D), ` T4S, $vsrc')\n\t" "$3 $dst, $dst, $$6src\t# $1 reduction$4$5" %} ins_encode %{ __ $2(as_FloatRegister($dst$$reg), ifelse($4, 4, `__ T4S, as_FloatRegister($vsrc$$reg))', $4$5, 2F, `as_FloatRegister($vsrc$$reg), __ S)', $4$5, 2D, `as_FloatRegister($vsrc$$reg), __ D)'); __ $3(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($$6src$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 REDUCE_MINMAX_FORD(max, fmaxp, fmaxs, 2, F, f, D) REDUCE_MINMAX_FORD(max, fmaxv, fmaxs, 4, F, f, X) REDUCE_MINMAX_FORD(max, fmaxp, fmaxd, 2, D, d, X) REDUCE_MINMAX_FORD(min, fminp, fmins, 2, F, f, D) REDUCE_MINMAX_FORD(min, fminv, fmins, 4, F, f, X) REDUCE_MINMAX_FORD(min, fminp, fmind, 2, D, d, X) dnl define(`REDUCE_LOGIC_OP_8B', ` instruct reduce_$1`'8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, S, 0\n\t" "umov $dst, $vsrc, S, 1\n\t" "$1w $dst, $dst, $tmp\n\t" "$1w $dst, $dst, $dst, LSR #16\n\t" "$1w $dst, $dst, $dst, LSR #8\n\t" "$1w $dst, $isrc, $dst\n\t" "sxtb $dst, $dst\t# $1 reduction8B" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); __ $1w($dst$$Register, $dst$$Register, $tmp$$Register); __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); __ sxtb($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 REDUCE_LOGIC_OP_8B(and, And) REDUCE_LOGIC_OP_8B(orr, Or) REDUCE_LOGIC_OP_8B(eor, Xor) define(`REDUCE_LOGIC_OP_16B', ` instruct reduce_$1`'16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, D, 0\n\t" "umov $dst, $vsrc, D, 1\n\t" "$3 $dst, $dst, $tmp\n\t" "$3 $dst, $dst, $dst, LSR #32\n\t" "$1w $dst, $dst, $dst, LSR #16\n\t" "$1w $dst, $dst, $dst, LSR #8\n\t" "$1w $dst, $isrc, $dst\n\t" "sxtb $dst, $dst\t# $1 reduction16B" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); __ $3($dst$$Register, $dst$$Register, $tmp$$Register); __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); __ sxtb($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 REDUCE_LOGIC_OP_16B(and, And, andr) REDUCE_LOGIC_OP_16B(orr, Or, orr ) REDUCE_LOGIC_OP_16B(eor, Xor, eor ) dnl define(`REDUCE_LOGIC_OP_4S', ` instruct reduce_$1`'4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, S, 0\n\t" "umov $dst, $vsrc, S, 1\n\t" "$1w $dst, $dst, $tmp\n\t" "$1w $dst, $dst, $dst, LSR #16\n\t" "$1w $dst, $isrc, $dst\n\t" "sxth $dst, $dst\t# $1 reduction4S" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); __ $1w($dst$$Register, $dst$$Register, $tmp$$Register); __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); __ sxth($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 REDUCE_LOGIC_OP_4S(and, And) REDUCE_LOGIC_OP_4S(orr, Or) REDUCE_LOGIC_OP_4S(eor, Xor) dnl define(`REDUCE_LOGIC_OP_8S', ` instruct reduce_$1`'8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, D, 0\n\t" "umov $dst, $vsrc, D, 1\n\t" "$3 $dst, $dst, $tmp\n\t" "$3 $dst, $dst, $dst, LSR #32\n\t" "$1w $dst, $dst, $dst, LSR #16\n\t" "$1w $dst, $isrc, $dst\n\t" "sxth $dst, $dst\t# $1 reduction8S" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); __ $3($dst$$Register, $dst$$Register, $tmp$$Register); __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); __ sxth($dst$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 REDUCE_LOGIC_OP_8S(and, And, andr) REDUCE_LOGIC_OP_8S(orr, Or, orr ) REDUCE_LOGIC_OP_8S(eor, Xor, eor ) dnl define(`REDUCE_LOGIC_OP_2I', ` instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, S, 0\n\t" "$1w $dst, $tmp, $isrc\n\t" "umov $tmp, $vsrc, S, 1\n\t" "$1w $dst, $tmp, $dst\t# $1 reduction2I" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); __ $1w($dst$$Register, $tmp$$Register, $isrc$$Register); __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); __ $1w($dst$$Register, $tmp$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 REDUCE_LOGIC_OP_2I(and, And) REDUCE_LOGIC_OP_2I(orr, Or) REDUCE_LOGIC_OP_2I(eor, Xor) dnl define(`REDUCE_LOGIC_OP_4I', ` instruct reduce_$1`'4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, D, 0\n\t" "umov $dst, $vsrc, D, 1\n\t" "$3 $dst, $dst, $tmp\n\t" "$3 $dst, $dst, $dst, LSR #32\n\t" "$1w $dst, $isrc, $dst\t# $1 reduction4I" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); __ $3($dst$$Register, $dst$$Register, $tmp$$Register); __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 REDUCE_LOGIC_OP_4I(and, And, andr) REDUCE_LOGIC_OP_4I(orr, Or, orr ) REDUCE_LOGIC_OP_4I(eor, Xor, eor ) dnl define(`REDUCE_LOGIC_OP_2L', ` instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst ($2ReductionV isrc vsrc)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "umov $tmp, $vsrc, D, 0\n\t" "$3 $dst, $isrc, $tmp\n\t" "umov $tmp, $vsrc, D, 1\n\t" "$3 $dst, $dst, $tmp\t# $1 reduction2L" %} ins_encode %{ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); __ $3($dst$$Register, $isrc$$Register, $tmp$$Register); __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); __ $3($dst$$Register, $dst$$Register, $tmp$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 REDUCE_LOGIC_OP_2L(and, And, andr) REDUCE_LOGIC_OP_2L(orr, Or, orr ) REDUCE_LOGIC_OP_2L(eor, Xor, eor ) dnl // ------------------------------ Vector insert --------------------------------- dnl VECTOR_INSERT_I($1, $2, $3, $4, $5) dnl VECTOR_INSERT_I(rule_name, vector_length_in_bytes, reg_variant, vreg, ireg) define(`VECTOR_INSERT_I', ` instruct $1($4 dst, $4 src, $5 val, immI idx) %{ predicate(ifelse($3, D, n->bottom_type()->is_vect()->element_basic_type() == T_LONG, (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE || n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || n->bottom_type()->is_vect()->element_basic_type() == T_INT))); match(Set dst (VectorInsert (Binary src val) idx)); ins_cost(2 * INSN_COST); format %{ "orr $dst, T$2B, $src, $src\n\t" "mov $dst, $3, $idx, $val\t`#' insert into vector ($3)" %} ins_encode %{ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { __ orr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); } __ mov(as_FloatRegister($dst$$reg), __ ifelse($3, D, D, elemType_to_regVariant(Matcher::vector_element_basic_type(this))), $idx$$constant, $val$$Register); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 $5 VECTOR_INSERT_I(insertID, 8, B/H/S, vecD, iRegIorL2I) VECTOR_INSERT_I(insertIX, 16, B/H/S, vecX, iRegIorL2I) VECTOR_INSERT_I(insert2L, 16, D, vecX, iRegL) dnl define(`VECTOR_INSERT_F', ` instruct insert$3`'(vec$2 dst, vec$2 src, vReg$1 val, immI idx) %{ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($1)); match(Set dst (VectorInsert (Binary src val) idx)); ins_cost(2 * INSN_COST); effect(TEMP_DEF dst); format %{ "orr $dst, ifelse($2, D, T8B, T16B), $src, $src\n\t" "ins $dst, ifelse($1, F, S, D), $val, $idx, 0\t# insert into vector($3)" %} ins_encode %{ __ orr(as_FloatRegister($dst$$reg), __ ifelse($2, D, T8B, T16B), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); __ ins(as_FloatRegister($dst$$reg), __ ifelse($1, F, S, D), as_FloatRegister($val$$reg), $idx$$constant, 0); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 VECTOR_INSERT_F(F, D, 2F) VECTOR_INSERT_F(F, X, 4F) VECTOR_INSERT_F(D, X, 2D) dnl // ------------------------------ Vector extract --------------------------------- define(`VECTOR_EXTRACT_I', ` instruct extract$1$2`'(iReg$3NoSp dst, vec$4 src, immI idx) %{ predicate(n->in(1)->bottom_type()->is_vect()->length() == $1); match(Set dst (Extract$2 src idx)); ins_cost(INSN_COST); format %{ "$5mov $dst, $src, $6, $idx\t# extract from vector($1$2)" %} ins_encode %{ __ $5mov($dst$$Register, as_FloatRegister($src$$reg), __ $6, $idx$$constant); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 VECTOR_EXTRACT_I(8, B, I, D, s, B) VECTOR_EXTRACT_I(16, B, I, X, s, B) VECTOR_EXTRACT_I(4, S, I, D, s, H) VECTOR_EXTRACT_I(8, S, I, X, s, H) VECTOR_EXTRACT_I(2, I, I, D, u, S) VECTOR_EXTRACT_I(4, I, I, X, u, S) VECTOR_EXTRACT_I(2, L, L, X, u, D) dnl define(`VECTOR_EXTRACT_F', ` instruct extract$1$2`'(vReg$2 dst, vec$3 src, immI idx) %{ predicate(n->in(1)->bottom_type()->is_vect()->length() == $1); match(Set dst (Extract$2 src idx)); ins_cost(INSN_COST); format %{ "ins $dst, $4, $src, 0, $idx\t# extract from vector($1$2)" %} ins_encode %{ if ((0 == $idx$$constant) && (as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg))) { /* empty */ } else if ($idx$$constant == 0) { __ ifelse($2, F, fmovs, fmovd)(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); } else { __ ins(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), 0, $idx$$constant); } %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 VECTOR_EXTRACT_F(2, F, D, S) VECTOR_EXTRACT_F(4, F, X, S) VECTOR_EXTRACT_F(2, D, X, D) dnl // ------------------------------ Vector comparison --------------------------------- instruct vcmpD(vecD dst, vecD src1, vecD src2, immI cond) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); format %{ "vcmpD $dst, $src1, $src2\t# vector compare " %} ins_cost(INSN_COST); ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); assert(type2aelembytes(bt) != 8, "not supported"); __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ false); %} ins_pipe(vdop64); %} instruct vcmpX(vecX dst, vecX src1, vecX src2, immI cond) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); format %{ "vcmpX $dst, $src1, $src2\t# vector compare " %} ins_cost(INSN_COST); ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ true); %} ins_pipe(vdop128); %} // ------------------------------ Vector mul ----------------------------------- instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (MulVL src1 src2)); ins_cost(INSN_COST); effect(TEMP tmp1, TEMP tmp2); format %{ "umov $tmp1, $src1, D, 0\n\t" "umov $tmp2, $src2, D, 0\n\t" "mul $tmp2, $tmp2, $tmp1\n\t" "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t" "umov $tmp1, $src1, D, 1\n\t" "umov $tmp2, $src2, D, 1\n\t" "mul $tmp2, $tmp2, $tmp1\n\t" "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)" %} ins_encode %{ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0); __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0); __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg)); __ mov(as_FloatRegister($dst$$reg), __ D, 0, $tmp2$$Register); __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1); __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1); __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg)); __ mov(as_FloatRegister($dst$$reg), __ D, 1, $tmp2$$Register); %} ins_pipe(pipe_slow); %} // --------------------------------- Vector not -------------------------------- dnl define(`MATCH_RULE', `ifelse($1, I, `match(Set dst (XorV src (ReplicateB m1))); match(Set dst (XorV src (ReplicateS m1))); match(Set dst (XorV src (ReplicateI m1)));', `match(Set dst (XorV src (ReplicateL m1)));')')dnl dnl define(`VECTOR_NOT', ` instruct vnot$1$2`'(vec$3 dst, vec$3 src, imm$2_M1 m1) %{ predicate(n->as_Vector()->length_in_bytes() == $4); MATCH_RULE($2) ins_cost(INSN_COST); format %{ "not $dst, T$5, $src\t# vector ($5)" %} ins_encode %{ __ notr(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 VECTOR_NOT(2, I, D, 8, 8B) VECTOR_NOT(4, I, X, 16, 16B) VECTOR_NOT(2, L, X, 16, 16B) undefine(MATCH_RULE) // ------------------------------ Vector and_not ------------------------------- dnl define(`MATCH_RULE', `ifelse($1, I, `match(Set dst (AndV src1 (XorV src2 (ReplicateB m1)))); match(Set dst (AndV src1 (XorV src2 (ReplicateS m1)))); match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));', `match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));')')dnl dnl define(`VECTOR_AND_NOT', ` instruct vand_not$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, imm$2_M1 m1) %{ predicate(n->as_Vector()->length_in_bytes() == $4); MATCH_RULE($2) ins_cost(INSN_COST); format %{ "bic $dst, T$5, $src1, $src2\t# vector ($5)" %} ins_encode %{ __ bic(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 VECTOR_AND_NOT(2, I, D, 8, 8B) VECTOR_AND_NOT(4, I, X, 16, 16B) VECTOR_AND_NOT(2, L, X, 16, 16B) undefine(MATCH_RULE) dnl // ------------------------------ Vector max/min ------------------------------- dnl define(`PREDICATE', `ifelse($1, 8B, `predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_$3);')')dnl dnl define(`VECTOR_MAX_MIN_INT', ` instruct v$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ PREDICATE(`$2$3', $2, TYPE2DATATYPE($3)) match(Set dst ($5V src1 src2)); ins_cost(INSN_COST); format %{ "$1v $dst, T$2`'iTYPE2SIMD($3), $src1, $src2\t# vector ($2$3)" %} ins_encode %{ __ $1v(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vdop$6); %}')dnl dnl $1 $2 $3 $4 $5 $6 VECTOR_MAX_MIN_INT(max, 8, B, D, Max, 64) VECTOR_MAX_MIN_INT(max, 16, B, X, Max, 128) VECTOR_MAX_MIN_INT(max, 4, S, D, Max, 64) VECTOR_MAX_MIN_INT(max, 8, S, X, Max, 128) VECTOR_MAX_MIN_INT(max, 2, I, D, Max, 64) VECTOR_MAX_MIN_INT(max, 4, I, X, Max, 128) VECTOR_MAX_MIN_INT(min, 8, B, D, Min, 64) VECTOR_MAX_MIN_INT(min, 16, B, X, Min, 128) VECTOR_MAX_MIN_INT(min, 4, S, D, Min, 64) VECTOR_MAX_MIN_INT(min, 8, S, X, Min, 128) VECTOR_MAX_MIN_INT(min, 2, I, D, Min, 64) VECTOR_MAX_MIN_INT(min, 4, I, X, Min, 128) undefine(PREDICATE) dnl define(`VECTOR_MAX_MIN_LONG', ` instruct v$1`'2L`'(vecX dst, vecX src1, vecX src2) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst ($2V src1 src2)); ins_cost(INSN_COST); effect(TEMP dst); format %{ "cmgt $dst, T2D, $src1, $src2\t# vector (2L)\n\t" "bsl $dst, T16B, $$3, $$4\t# vector (16B)" %} ins_encode %{ __ cmgt(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); __ bsl(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($$3$$reg), as_FloatRegister($$4$$reg)); %} ins_pipe(vdop128); %}')dnl dnl $1 $2 $3 $4 VECTOR_MAX_MIN_LONG(max, Max, src1, src2) VECTOR_MAX_MIN_LONG(min, Min, src2, src1) dnl // --------------------------------- blend (bsl) ---------------------------- dnl define(`VECTOR_BSL', ` instruct vbsl$1B`'(vec$2 dst, vec$2 src1, vec$2 src2) %{ predicate(n->as_Vector()->length_in_bytes() == $1); match(Set dst (VectorBlend (Binary src1 src2) dst)); ins_cost(INSN_COST); format %{ "bsl $dst, T$1B, $src2, $src1\t# vector ($1B)" %} ins_encode %{ __ bsl(as_FloatRegister($dst$$reg), __ T$1B, as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); %} ins_pipe(vlogical$3); %}')dnl dnl $1 $2 $3 VECTOR_BSL(8, D, 64) VECTOR_BSL(16, X, 128) dnl // --------------------------------- Load/store Mask ---------------------------- dnl define(`PREDICATE', `ifelse($1, load, `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', `predicate(n->as_Vector()->length() == $2);')')dnl dnl define(`VECTOR_LOAD_STORE_MASK_B', ` instruct $1mask$2B`'(vec$3 dst, vec$3 src $5 $6) %{ PREDICATE($1, $2) match(Set dst (Vector$4Mask src $6)); ins_cost(INSN_COST); format %{ "negr $dst, T$2B, $src\t# $1 mask ($2B to $2B)" %} ins_encode %{ __ negr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 VECTOR_LOAD_STORE_MASK_B(load, 8, D, Load) VECTOR_LOAD_STORE_MASK_B(load, 16, X, Load) VECTOR_LOAD_STORE_MASK_B(store, 8, D, Store, `, immI_1', size) VECTOR_LOAD_STORE_MASK_B(store, 16, X, Store, `, immI_1', size) undefine(PREDICATE)dnl dnl define(`PREDICATE', `ifelse($1, load, `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);', `predicate(n->as_Vector()->length() == $2);')')dnl dnl define(`VECTOR_LOAD_STORE_MASK_S', ` instruct $1mask$2S`'(vec$3 dst, vec$4 src $9 $10) %{ PREDICATE($1, $2) match(Set dst (Vector$5Mask src $10)); ins_cost(INSN_COST); format %{ "$6 $dst, T8$8, $src, T8$7\n\t" "negr $dst, T8$8, $dst\t# $1 mask ($2$7 to $2$8)" %} ins_encode %{ __ $6(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($src$$reg), __ T8$7); __ negr(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 VECTOR_LOAD_STORE_MASK_S(load, 4, D, D, Load, uxtl, B, H) VECTOR_LOAD_STORE_MASK_S(load, 8, X, D, Load, uxtl, B, H) VECTOR_LOAD_STORE_MASK_S(store, 4, D, D, Store, xtn, H, B, `, immI_2', size) VECTOR_LOAD_STORE_MASK_S(store, 8, D, X, Store, xtn, H, B, `, immI_2', size) undefine(PREDICATE)dnl dnl define(`PREDICATE', `ifelse($1, load, `predicate(n->as_Vector()->length() == $2 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));', `predicate(n->as_Vector()->length() == $2);')')dnl dnl define(`VECTOR_LOAD_STORE_MASK_I', ` instruct $1mask$2I`'(vec$3 dst, vec$4 src $12 $13) %{ PREDICATE($1, $2) match(Set dst (Vector$5Mask src $13)); ins_cost(INSN_COST); format %{ "$6 $dst, T$10$8, $src, T$10$7\t# $2$7 to $2$8\n\t" "$6 $dst, T$11$9, $dst, T$11$8\t# $2$8 to $2$9\n\t" "negr $dst, T$11$9, $dst\t# $1 mask ($2$7 to $2$9)" %} ins_encode %{ __ $6(as_FloatRegister($dst$$reg), __ T$10$8, as_FloatRegister($src$$reg), __ T$10$7); __ $6(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg), __ T$11$8); __ negr(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10$11 $12 $13 VECTOR_LOAD_STORE_MASK_I(load, 2, D, D, Load, uxtl, B, H, S, 8, 4) VECTOR_LOAD_STORE_MASK_I(load, 4, X, D, Load, uxtl, B, H, S, 8, 4) VECTOR_LOAD_STORE_MASK_I(store, 2, D, D, Store, xtn, S, H, B, 4, 8, `, immI_4', size) VECTOR_LOAD_STORE_MASK_I(store, 4, D, X, Store, xtn, S, H, B, 4, 8, `, immI_4', size) undefine(PREDICATE) dnl instruct loadmask2L(vecX dst, vecD src) %{ predicate(n->as_Vector()->length() == 2 && (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); match(Set dst (VectorLoadMask src)); ins_cost(INSN_COST); format %{ "uxtl $dst, T8H, $src, T8B\t# 2B to 2S\n\t" "uxtl $dst, T4S, $dst, T4H\t# 2S to 2I\n\t" "uxtl $dst, T2D, $dst, T2S\t# 2I to 2L\n\t" "neg $dst, T2D, $dst\t# load mask (2B to 2L)" %} ins_encode %{ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S); __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} instruct storemask2L(vecD dst, vecX src, immI_8 size) %{ predicate(n->as_Vector()->length() == 2); match(Set dst (VectorStoreMask src size)); ins_cost(INSN_COST); format %{ "xtn $dst, T2S, $src, T2D\t# 2L to 2I\n\t" "xtn $dst, T4H, $dst, T4S\t# 2I to 2S\n\t" "xtn $dst, T8B, $dst, T8H\t# 2S to 2B\n\t" "neg $dst, T8B, $dst\t# store mask (2L to 2B)" %} ins_encode %{ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D); __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S); __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} // vector mask cast dnl define(`VECTOR_MASK_CAST', ` instruct vmaskcast$1`'(vec$1 dst) %{ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2 && n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length()); match(Set dst (VectorMaskCast dst)); ins_cost(0); format %{ "vmaskcast $dst\t# empty" %} ins_encode %{ // empty %} ins_pipe(pipe_class_empty); %}')dnl dnl $1 $2 VECTOR_MASK_CAST(D, 8) VECTOR_MASK_CAST(X, 16) dnl //-------------------------------- LOAD_IOTA_INDICES---------------------------------- dnl define(`PREDICATE', `ifelse($1, 8, `predicate(UseSVE == 0 && (n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', `predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl dnl define(`VECTOR_LOAD_CON', ` instruct loadcon$1B`'(vec$2 dst, immI0 src) %{ PREDICATE($1) match(Set dst (VectorLoadConst src)); ins_cost(INSN_COST); format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %} ins_encode %{ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices())); __ ldr$3(as_FloatRegister($dst$$reg), rscratch1); %} ins_pipe(pipe_class_memory); %}')dnl dnl $1 $2 $3 VECTOR_LOAD_CON(8, D, d) VECTOR_LOAD_CON(16, X, q) undefine(PREDICATE) dnl //-------------------------------- LOAD_SHUFFLE ---------------------------------- dnl define(`VECTOR_LOAD_SHUFFLE_B', ` instruct loadshuffle$1B`'(vec$2 dst, vec$2 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorLoadShuffle src)); ins_cost(INSN_COST); format %{ "mov $dst, T$1B, $src\t# get $1B shuffle" %} ins_encode %{ __ orr(as_FloatRegister($dst$$reg), __ T$1B, as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 VECTOR_LOAD_SHUFFLE_B(8, D) VECTOR_LOAD_SHUFFLE_B(16, X) dnl define(`VECTOR_LOAD_SHUFFLE_S', ` instruct loadshuffle$1S`'(vec$2 dst, vec$3 src) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorLoadShuffle src)); ins_cost(INSN_COST); format %{ "uxtl $dst, T8H, $src, T8B\t# $1B to $1H" %} ins_encode %{ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 VECTOR_LOAD_SHUFFLE_S(4, D, D) VECTOR_LOAD_SHUFFLE_S(8, X, D) dnl instruct loadshuffle4I(vecX dst, vecD src) %{ predicate(n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorLoadShuffle src)); ins_cost(INSN_COST); format %{ "uxtl $dst, T8H, $src, T8B\t# 4B to 4H \n\t" "uxtl $dst, T4S, $dst, T4H\t# 4H to 4S" %} ins_encode %{ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); %} ins_pipe(pipe_slow); %} //-------------------------------- Rearrange ------------------------------------- // Here is an example that rearranges a NEON vector with 4 ints: // Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1] // 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3]. // 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1]. // 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1]. // 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404] // and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404]. // 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100] // and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504] // 6. Use Vm as index register, and use V1 as table register. // Then get V2 as the result by tbl NEON instructions. // Notes: // Step 1 matches VectorLoadConst. // Step 3 matches VectorLoadShuffle. // Step 4, 5, 6 match VectorRearrange. // For VectorRearrange short/int, the reason why such complex calculation is // required is because NEON tbl supports bytes table only, so for short/int, we // need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl // to implement rearrange. define(`VECTOR_REARRANGE_B', ` instruct rearrange$1B`'(vec$2 dst, vec$2 src, vec$2 shuffle) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); match(Set dst (VectorRearrange src shuffle)); ins_cost(INSN_COST); effect(TEMP_DEF dst); format %{ "tbl $dst, T$1B, {$dst}, $shuffle\t# rearrange $1B" %} ins_encode %{ __ tbl(as_FloatRegister($dst$$reg), __ T$1B, as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 VECTOR_REARRANGE_B(8, D) VECTOR_REARRANGE_B(16, X) dnl define(`VECTOR_REARRANGE_S', ` instruct rearrange$1S`'(vec$2 dst, vec$2 src, vec$2 shuffle, vec$2 tmp0, vec$2 tmp1) %{ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (VectorRearrange src shuffle)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); format %{ "mov $tmp0, T$3B, CONSTANT\t# constant 0x0202020202020202\n\t" "mov $tmp1, T$1H, CONSTANT\t# constant 0x0100010001000100\n\t" "mulv $dst, T$1H, T$1H, $shuffle, $tmp0\n\t" "addv $dst, T$3B, T$3B, $dst, $tmp1\n\t" "tbl $dst, T$3B, {$src}, 1, $dst\t# rearrange $1S" %} ins_encode %{ __ mov(as_FloatRegister($tmp0$$reg), __ T$3B, 0x02); __ mov(as_FloatRegister($tmp1$$reg), __ T$1H, 0x0100); __ mulv(as_FloatRegister($dst$$reg), __ T$1H, as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); __ addv(as_FloatRegister($dst$$reg), __ T$3B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); __ tbl(as_FloatRegister($dst$$reg), __ T$3B, as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 $3 VECTOR_REARRANGE_S(4, D, 8) VECTOR_REARRANGE_S(8, X, 16) instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1) %{ predicate(n->as_Vector()->length() == 4 && (n->bottom_type()->is_vect()->element_basic_type() == T_INT || n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); match(Set dst (VectorRearrange src shuffle)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); format %{ "mov $tmp0, T16B, CONSTANT\t# constant 0x0404040404040404\n\t" "mov $tmp1, T4S, CONSTANT\t# constant 0x0302010003020100\n\t" "mulv $dst, T4S, $shuffle, $tmp0\n\t" "addv $dst, T16B, $dst, $tmp1\n\t" "tbl $dst, T16B, {$src}, 1, $dst\t# rearrange 4I" %} ins_encode %{ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04); __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100); __ mulv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); __ addv(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); __ tbl(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_slow); %} //-------------------------------- Anytrue/alltrue ----------------------------- dnl define(`ANYTRUE_IN_MASK', ` instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr) %{ predicate(static_cast(n)->get_predicate() == BoolTest::ne); match(Set dst (VectorTest src1 src2 )); ins_cost(INSN_COST); effect(TEMP tmp, KILL cr); format %{ "addv $tmp, T$1B, $src1\n\t" "umov $dst, $tmp, B, 0\n\t" "cmp $dst, 0\n\t" "cset $dst\t# anytrue $1B" %} ins_encode %{ // No need to use src2. __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg)); __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); __ cmpw($dst$$Register, zr); __ csetw($dst$$Register, Assembler::NE); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 ANYTRUE_IN_MASK(8, D) ANYTRUE_IN_MASK(16, X) dnl define(`ALLTRUE_IN_MASK', ` instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr) %{ predicate(static_cast(n)->get_predicate() == BoolTest::overflow); match(Set dst (VectorTest src1 src2 )); ins_cost(INSN_COST); effect(TEMP tmp, KILL cr); format %{ "uminv $tmp, T$1B, $src1\n\t" "umov $dst, $tmp, B, 0\n\t" "cmp $dst, 0xff\n\t" "cset $dst\t# alltrue $1B" %} ins_encode %{ // No need to use src2. __ uminv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg)); __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); __ cmpw($dst$$Register, 0xff); __ csetw($dst$$Register, Assembler::EQ); %} ins_pipe(pipe_slow); %}')dnl dnl $1 $2 ALLTRUE_IN_MASK(8, D) ALLTRUE_IN_MASK(16, X) // --------------------------------- ABS -------------------------------------- dnl define(`VABS', ` instruct vabs$3$4`'(vec$5 dst, vec$5 src) %{ predicate(ifelse($3$4, 8B, n->as_Vector()->length() == 4 || )n->as_Vector()->length() == $3); match(Set dst (AbsV$4 src)); ins_cost(ifelse($4, F, INSN_COST * 3, $4, D, INSN_COST * 3, INSN_COST)); format %{ "$1 $dst, T$3$6, $src\t# vector ($3$6)" %} ins_encode %{ __ $2(as_FloatRegister($dst$$reg), __ T$3$6, as_FloatRegister($src$$reg)); %} ins_pipe(ifelse($4, F, vunop_fp$7, $4, D, vunop_fp$7, vlogical$7)); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 VABS(abs, absr, 8, B, D, B, 64) VABS(abs, absr, 16, B, X, B, 128) VABS(abs, absr, 4, S, D, H, 64) VABS(abs, absr, 8, S, X, H, 128) VABS(abs, absr, 2, I, D, S, 64) VABS(abs, absr, 4, I, X, S, 128) VABS(abs, absr, 2, L, X, D, 128) VABS(fabs, fabs, 2, F, D, S, 64) VABS(fabs, fabs, 4, F, X, S, 128) VABS(fabs, fabs, 2, D, X, D, 128) // --------------------------------- FABS DIFF -------------------------------- dnl define(`VFABD', ` instruct vabd$3$4`'(vec$5 dst, vec$5 src1, vec$5 src2) %{ predicate(n->as_Vector()->length() == $3); match(Set dst (AbsV$4 (SubV$4 src1 src2))); ins_cost(INSN_COST * 3); format %{ "$1 $dst, T$3$6, $src1, $src2\t# vector ($3$6)" %} ins_encode %{ __ $2(as_FloatRegister($dst$$reg), __ T$3$6, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vunop_fp$7); %}')dnl dnl $1 $2 $3 $4 $5 $6 $7 VFABD(fabd, fabd, 2, F, D, S, 64) VFABD(fabd, fabd, 4, F, X, S, 128) VFABD(fabd, fabd, 2, D, X, D, 128) dnl define(`VREPLICATE_REG', ` instruct replicate$2$3`'(vec$4 dst, $5 src) %{ predicate(UseSVE == 0 && ifelse($2$3, 8B, `(n->as_Vector()->length() == 8 || n->as_Vector()->length() == 4)', $2$3, 4S, `(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 2)', n->as_Vector()->length() == $2)); match(Set dst (Replicate$3 src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector ($2$3)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T$2$1, $6($src$$reg)); %} ins_pipe(ifelse($5, iRegIorL2I, vdup_reg_reg, $5, iRegL, vdup_reg_reg, $3, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($4, X, 128, 64)); %}')dnl define(`VREPLICATE_IMM', ` instruct replicate$2$3_imm`'(vec$4 dst, $5 con) %{ predicate(UseSVE == 0 && ifelse($2$3, 8B, `(n->as_Vector()->length() == 8 || n->as_Vector()->length() == 4)', $2$3, 4S, `(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 2)', n->as_Vector()->length() == $2)); match(Set dst (Replicate$3 con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t`#' vector ($2`'ifelse($3, S, H, $3))" %} ins_encode %{ __ mov(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3), $con$$constant`'$6); %} ins_pipe(vmovi_reg_imm`'ifelse($4, X, 128, 64)); %}')dnl dnl $1 $2 $3 $4 $5 $6 VREPLICATE_REG(B, 8, B, D, iRegIorL2I, as_Register) VREPLICATE_REG(B, 16, B, X, iRegIorL2I, as_Register) VREPLICATE_IMM(B, 8, B, D, immI, ` & 0xff') VREPLICATE_IMM(B, 16, B, X, immI, ` & 0xff') VREPLICATE_REG(H, 4, S, D, iRegIorL2I, as_Register) VREPLICATE_REG(H, 8, S, X, iRegIorL2I, as_Register) VREPLICATE_IMM(H, 4, S, D, immI, ` & 0xffff') VREPLICATE_IMM(H, 8, S, X, immI, ` & 0xffff') VREPLICATE_REG(S, 2, I, D, iRegIorL2I, as_Register) VREPLICATE_REG(S, 4, I, X, iRegIorL2I, as_Register) VREPLICATE_IMM(S, 2, I, D, immI) VREPLICATE_IMM(S, 4, I, X, immI) VREPLICATE_REG(D, 2, L, X, iRegL, as_Register) VREPLICATE_IMM(D, 2, L, X, immL) VREPLICATE_REG(S, 2, F, D, vRegF, as_FloatRegister) VREPLICATE_REG(S, 4, F, X, vRegF, as_FloatRegister) VREPLICATE_REG(D, 2, D, X, vRegD, as_FloatRegister) dnl // ====================REDUCTION ARITHMETIC==================================== dnl define(`REDUCE_ADD_INT', ` instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 vtmp, iRegINoSp itmp) %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP vtmp, TEMP itmp); format %{ ifelse($1, 2, `"addpv $vtmp, T2S, $vsrc, $vsrc\n\t"',`"addv $vtmp, T4S, $vsrc\n\t"') "umov $itmp, $vtmp, S, 0\n\t" "addw $dst, $itmp, $isrc\t# add reduction$1I" %} ins_encode %{ ifelse($1, 2, `__ addpv(as_FloatRegister($vtmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));', `__ addv(as_FloatRegister($vtmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));') __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); __ addw($dst$$Register, $itmp$$Register, $isrc$$Register); %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 REDUCE_ADD_INT(2, I, D) REDUCE_ADD_INT(4, I, X) dnl define(`REDUCE_MUL_INT', ` instruct reduce_mul$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, ifelse($1, 2, iRegINoSp tmp`)', vecX vtmp`,' iRegINoSp itmp`)') %{ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP ifelse($1, 2, tmp, vtmp), TEMP ifelse($1, 2, dst, itmp`,' TEMP dst)); format %{ ifelse($1, 2, `"umov $tmp, $vsrc, S, 0\n\t" "mul $dst, $tmp, $isrc\n\t" "umov $tmp, $vsrc, S, 1\n\t" "mul $dst, $tmp, $dst\t# mul reduction2I"',`"ins $vtmp, D, $vsrc, 0, 1\n\t" "mulv $vtmp, T2S, $vtmp, $vsrc\n\t" "umov $itmp, $vtmp, S, 0\n\t" "mul $dst, $itmp, $isrc\n\t" "umov $itmp, $vtmp, S, 1\n\t" "mul $dst, $itmp, $dst\t# mul reduction4I"') %} ins_encode %{ ifelse($1, 2, `__ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); __ mul($dst$$Register, $tmp$$Register, $isrc$$Register); __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); __ mul($dst$$Register, $tmp$$Register, $dst$$Register);', `__ ins(as_FloatRegister($vtmp$$reg), __ D, as_FloatRegister($vsrc$$reg), 0, 1); __ mulv(as_FloatRegister($vtmp$$reg), __ T2S, as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0); __ mul($dst$$Register, $itmp$$Register, $isrc$$Register); __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1); __ mul($dst$$Register, $itmp$$Register, $dst$$Register);') %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 REDUCE_MUL_INT(2, I, D) REDUCE_MUL_INT(4, I, X) dnl define(`REDUCE_MULORADD_FORD', ` instruct reduce_$6$2$3`'(vReg$3 dst, vReg$3 $4src, vec$5 vsrc, vec$5 tmp) %{ match(Set dst (ifelse($6, add, Add, Mul)ReductionV$3 $4src vsrc)); ins_cost(INSN_COST); effect(TEMP tmp, TEMP dst); format %{ "$1 $dst, $$4src, $vsrc\n\t" "ins $tmp, ifelse($3, F, S, D), $vsrc, 0, 1\n\t" ifelse($2, 2, `"$1 $dst, $dst, $tmp\t# $6 reduction$2$3"', `"$1 $dst, $dst, $tmp\n\t" "ins $tmp, S, $vsrc, 0, 2\n\t" "$1 $dst, $dst, $tmp\n\t" "ins $tmp, S, $vsrc, 0, 3\n\t" "$1 $dst, $dst, $tmp\t# $6 reduction4F"') %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), as_FloatRegister($$4src$$reg), as_FloatRegister($vsrc$$reg)); __ ins(as_FloatRegister($tmp$$reg), __ ifelse($3, F, S, D), as_FloatRegister($vsrc$$reg), 0, 1); __ $1(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));ifelse($2, 4, ` __ ins(as_FloatRegister($tmp$$reg), __ ifelse($3, F, S, D), as_FloatRegister($vsrc$$reg), 0, 2); __ $1(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); __ ins(as_FloatRegister($tmp$$reg), __ S, as_FloatRegister($vsrc$$reg), 0, 3); __ $1(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));') %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 REDUCE_MULORADD_FORD(fadds, 2, F, f, D, add) REDUCE_MULORADD_FORD(fadds, 4, F, f, X, add) REDUCE_MULORADD_FORD(fmuls, 2, F, f, D, mul) REDUCE_MULORADD_FORD(fmuls, 4, F, f, X, mul) REDUCE_MULORADD_FORD(faddd, 2, D, d, X, add) REDUCE_MULORADD_FORD(fmuld, 2, D, d, X, mul) // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- define(`VADD', ` instruct vadd$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 8B, ` predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);', $2$3, 4S, ` predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);', $2$5, 2D, , ` predicate(n->as_Vector()->length() == $2);') match(Set dst (AddV$3 src1 src2)); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vdop`'ifelse($3, F, _fp, $3, D, _fp)`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VADD(addv, 8, B, D, B) VADD(addv, 16, B, X, B) VADD(addv, 4, S, D, H) VADD(addv, 8, S, X, H) VADD(addv, 2, I, D, S) VADD(addv, 4, I, X, S) VADD(addv, 2, L, X, L) VADD(fadd, 2, F, D, S) VADD(fadd, 4, F, X, S) VADD(fadd, 2, D, X, D) // --------------------------------- SUB -------------------------------------- define(`VSUB', ` instruct vsub$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 8B, ` predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);', $2$3, 4S, ` predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);',` predicate(n->as_Vector()->length() == $2);') match(Set dst (SubV$3 src1 src2)); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vdop`'ifelse($3, F, _fp, $3, D, _fp)`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VSUB(subv, 8, B, D, B) VSUB(subv, 16, B, X, B) VSUB(subv, 4, S, D, H) VSUB(subv, 8, S, X, H) VSUB(subv, 2, I, D, S) VSUB(subv, 4, I, X, S) VSUB(subv, 2, L, X, L) VSUB(fsub, 2, F, D, S) VSUB(fsub, 4, F, X, S) VSUB(fsub, 2, D, X, D) // --------------------------------- MUL -------------------------------------- define(`VMUL', ` instruct vmul$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 8B, ` predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);', $2$3, 4S, ` predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);',` predicate(n->as_Vector()->length() == $2);') match(Set dst (MulV$3 src1 src2)); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vmul`'ifelse($3, F, div_fp, $3, D, div_fp)`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VMUL(mulv, 8, B, D, B) VMUL(mulv, 16, B, X, B) VMUL(mulv, 4, S, D, H) VMUL(mulv, 8, S, X, H) VMUL(mulv, 2, I, D, S) VMUL(mulv, 4, I, X, S) VMUL(fmul, 2, F, D, S) VMUL(fmul, 4, F, X, S) VMUL(fmul, 2, D, X, D) // --------------------------------- MLA -------------------------------------- define(`VMLA', `ifelse($1, fmla, ` // dst + src1 * src2') instruct vmla$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 4S, ` predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);', $1, fmla, ` predicate(UseFMA && n->as_Vector()->length() == $2);', ` predicate(n->as_Vector()->length() == $2);') match(Set dst (ifelse($1, mlav, `AddV'$3` dst (MulV$3 src1 src2)', FmaV$3 `dst (Binary src1 src2)'))); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vm`'ifelse($3, F, uldiv_fp, $3, D, uldiv_fp, la)`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VMLA(mlav, 4, S, D, H) VMLA(mlav, 8, S, X, H) VMLA(mlav, 2, I, D, S) VMLA(mlav, 4, I, X, S) VMLA(fmla, 2, F, D, S) VMLA(fmla, 4, F, X, S) VMLA(fmla, 2, D, X, D) // --------------------------------- MLS -------------------------------------- define(`VMLS', `ifelse($1, fmls, ` // dst - src1 * src2') instruct vmls$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ifelse($2$3, 4S, ` predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);', $1, fmls, ` predicate(UseFMA && n->as_Vector()->length() == $2);', ` predicate(n->as_Vector()->length() == $2);') match(Set dst (ifelse($1, mlsv, `SubV'$3` dst (MulV$3 src1 src2)', FmaV$3 `dst (Binary (NegV'$3 `src1) src2))); match(Set dst (FmaV$3 dst (Binary src1 (NegV'$3 `src2))'))); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vm`'ifelse($3, F, uldiv_fp, $3, D, uldiv_fp, la)`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VMLS(mlsv, 4, S, D, H) VMLS(mlsv, 8, S, X, H) VMLS(mlsv, 2, I, D, S) VMLS(mlsv, 4, I, X, S) VMLS(fmls, 2, F, D, S) VMLS(fmls, 4, F, X, S) VMLS(fmls, 2, D, X, D) // --------------- Vector Multiply-Add Shorts into Integer -------------------- instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{ predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); match(Set dst (MulAddVS2VI src1 src2)); ins_cost(INSN_COST); effect(TEMP_DEF dst, TEMP tmp); format %{ "smullv $tmp, $src1, $src2\t# vector (4H)\n\t" "smullv $dst, $src1, $src2\t# vector (8H)\n\t" "addpv $dst, $tmp, $dst\t# vector (4S)" %} ins_encode %{ __ smullv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); __ smullv(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); __ addpv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($tmp$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(vmuldiv_fp128); %} // --------------------------------- DIV -------------------------------------- define(`VDIV', ` instruct vdiv$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) %{ predicate(n->as_Vector()->length() == $2); match(Set dst (DivV$3 src1 src2)); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2`'ifelse($5, L, D, $5), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vmuldiv_fp`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VDIV(fdiv, 2, F, D, S) VDIV(fdiv, 4, F, X, S) VDIV(fdiv, 2, D, X, D) // --------------------------------- SQRT ------------------------------------- define(`VSQRT', ` instruct vsqrt$2$3`'(vec$4 dst, vec$4 src) %{ predicate(n->as_Vector()->length() == $2); match(Set dst (SqrtV$3 src)); format %{ "$1 $dst, $src\t# vector ($2$3)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2$5, as_FloatRegister($src$$reg)); %} ins_pipe(v`'ifelse($2$3, 2F, unop, sqrt)_fp`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VSQRT(fsqrt, 2, F, D, S) VSQRT(fsqrt, 4, F, X, S) VSQRT(fsqrt, 2, D, X, D) // --------------------------------- NEG -------------------------------------- define(`VNEGI', ` instruct vnegI$1(vec$1 dst, vec$1 src) %{ predicate(n->as_Vector()->length_in_bytes() ifelse($1, D, <, ==) 16); match(Set dst (NegVI src)); ins_cost(INSN_COST); format %{ "negr $dst, $src\t# vector ($2)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), ifelse($1, D, false, true)); __ negr(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp`'ifelse($1, D, 64, 128)); %}')dnl dnl $1 $2 VNEGI(D, 8B/4H/2S) VNEGI(X, 16B/8H/4S) dnl define(`VNEG', ` instruct vneg$2$3`'(vec$4 dst, vec$4 src) %{ predicate(n->as_Vector()->length() == $2); match(Set dst (NegV$3 src)); ins_cost(INSN_COST`'ifelse($3, L, `',` * 3')); format %{ "$1 $dst,$src\t# vector ($2$5)" %} ins_encode %{ __ $1(as_FloatRegister($dst$$reg), __ T$2$5, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp`'ifelse($4, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 VNEG(negr, 2, L, X, D) VNEG(fneg, 2, F, D, S) VNEG(fneg, 4, F, X, S) VNEG(fneg, 2, D, X, D) dnl define(`VLOGICAL', ` instruct v$3$5$6`'(vec$7 dst, vec$7 src1, vec$7 src2) %{ predicate(ifelse($5, 8, n->as_Vector()->length_in_bytes() == 4 ||` ')n->as_Vector()->length_in_bytes() == $5); match(Set dst ($4V src1 src2)); ins_cost(INSN_COST); format %{ "$1 $dst,$src1,$src2\t# vector ($5$6)" %} ins_encode %{ __ $2(as_FloatRegister($dst$$reg), __ T$5$6, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vlogical`'ifelse($7, D, 64, 128)); %}')dnl // --------------------------------- AND -------------------------------------- dnl $1 $2 $3 $4 $5 $6 $7 VLOGICAL(and, andr, and, And, 8, B, D) VLOGICAL(and, andr, and, And, 16, B, X) // --------------------------------- OR --------------------------------------- VLOGICAL(orr, orr, or, Or, 8, B, D) VLOGICAL(orr, orr, or, Or, 16, B, X) // --------------------------------- XOR -------------------------------------- VLOGICAL(xor, eor, xor, Xor, 8, B, D) VLOGICAL(xor, eor, xor, Xor, 16, B, X) // ------------------------------ Shift --------------------------------------- dnl define(`VSLCNT', ` instruct vslcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{ predicate(UseSVE == 0 && ifelse($1, 8, (n->as_Vector()->length_in_bytes() == 4 ||` 'n->as_Vector()->length_in_bytes() == $1), n->as_Vector()->length_in_bytes() == $1)); match(Set dst (LShiftCntV cnt)); ins_cost(INSN_COST); format %{ "dup $dst, $cnt\t# shift count vector ($1$2)" %} ins_encode %{ __ dup(as_FloatRegister($dst$$reg), __ T$1$2, as_Register($cnt$$reg)); %} ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128)); %}')dnl dnl define(`VSRCNT', ` instruct vsrcnt$1$2`'(vec$3 dst, iRegIorL2I cnt) %{ predicate(UseSVE == 0 && ifelse($1, 8, (n->as_Vector()->length_in_bytes() == 4 ||` 'n->as_Vector()->length_in_bytes() == $1), n->as_Vector()->length_in_bytes() == $1)); match(Set dst (RShiftCntV cnt)); ins_cost(INSN_COST * 2); format %{ "negw rscratch1, $cnt\t" "dup $dst, rscratch1\t# shift count vector ($1$2)" %} ins_encode %{ __ negw(rscratch1, as_Register($cnt$$reg)); __ dup(as_FloatRegister($dst$$reg), __ T$1$2, rscratch1); %} ins_pipe(vdup_reg_reg`'ifelse($3, D, 64, 128)); %}')dnl dnl // Vector shift count // Note-1: Low 8 bits of each element are used, so it doesn't matter if we // treat it as ints or bytes here. // Note-2: Shift value is negated for RShiftCntV additionally. See the comments // on vsra8B rule for more details. dnl $1 $2 $3 VSLCNT(8, B, D) VSLCNT(16, B, X) VSRCNT(8, B, D) VSRCNT(16, B, X) dnl define(`PREDICATE', `ifelse($1, 8B, ifelse($3, `', `predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);', `predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&` '$3);'), $1, 4S, ifelse($3, `', `predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);', `predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&` '$3);'), ifelse($3, `', `predicate(n->as_Vector()->length() == $2);', `predicate(n->as_Vector()->length() == $2 && $3);'))')dnl dnl define(`VSLL', ` instruct vsll$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{ PREDICATE(`$1$2', $1, ) match(Set dst (LShiftV$2 src shift)); ins_cost(INSN_COST); format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %} ins_encode %{ __ sshl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} ins_pipe(vshift`'ifelse($4, D, 64, 128)); %}')dnl dnl define(`VSRA', ` instruct vsra$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{ PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift()) match(Set dst (RShiftV$2 src shift)); ins_cost(INSN_COST); format %{ "sshl $dst,$src,$shift\t# vector ($1$3)" %} ins_encode %{ __ sshl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} ins_pipe(vshift`'ifelse($4, D, 64, 128)); %}')dnl dnl define(`VSRA_VAR', ` instruct vsra$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{ PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift()) match(Set dst (RShiftV$2 src shift)); ins_cost(INSN_COST * 2); effect(TEMP_DEF dst); format %{ "negr $dst,$shift\t" "sshl $dst,$src,$dst\t# vector ($1$3)" %} ins_encode %{ __ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B), as_FloatRegister($shift$$reg)); __ sshl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(vshift`'ifelse($4, D, 64, 128)); %}')dnl dnl define(`VSRL', ` instruct vsrl$1$2`'(vec$4 dst, vec$4 src, vec$4 shift) %{ PREDICATE(`$1$2', $1, !n->as_ShiftV()->is_var_shift()) match(Set dst (URShiftV$2 src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector ($1$3)" %} ins_encode %{ __ ushl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), as_FloatRegister($shift$$reg)); %} ins_pipe(vshift`'ifelse($4, D, 64, 128)); %}')dnl dnl define(`VSRL_VAR', ` instruct vsrl$1$2_var`'(vec$4 dst, vec$4 src, vec$4 shift) %{ PREDICATE(`$1$2', $1, n->as_ShiftV()->is_var_shift()) match(Set dst (URShiftV$2 src shift)); ins_cost(INSN_COST * 2); effect(TEMP_DEF dst); format %{ "negr $dst,$shift\t" "ushl $dst,$src,$dst\t# vector ($1$3)" %} ins_encode %{ __ negr(as_FloatRegister($dst$$reg), __ T`'ifelse($4, D, 8B, 16B), as_FloatRegister($shift$$reg)); __ ushl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), as_FloatRegister($dst$$reg)); %} ins_pipe(vshift`'ifelse($4, D, 64, 128)); %}')dnl dnl define(`VSLL_IMM', ` instruct vsll$1$2_imm`'(vec$4 dst, vec$4 src, immI shift) %{ PREDICATE(`$1$2', $1, assert_not_var_shift(n)) match(Set dst (LShiftV$2 src (LShiftCntV shift))); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector ($1$3)" %} ins_encode %{ifelse($2, B,` int sh = (int)$shift$$constant; if (sh >= 8) { __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); } else { __ shl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh); }', $2, S,` int sh = (int)$shift$$constant; if (sh >= 16) { __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); } else { __ shl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh); }', ` __ shl(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), (int)$shift$$constant);') %} ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm); %}')dnl dnl define(`VSRA_IMM', ` instruct vsra$1$2_imm`'(vec$4 dst, vec$4 src, immI_positive shift) %{ PREDICATE(`$1$2', $1, assert_not_var_shift(n)) match(Set dst (RShiftV$2 src (RShiftCntV shift))); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector ($1$3)" %} ins_encode %{ifelse($2, B,` int sh = (int)$shift$$constant; if (sh >= 8) sh = 7; __ sshr(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh);', $2, S,` int sh = (int)$shift$$constant; if (sh >= 16) sh = 15; __ sshr(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh);', ` __ sshr(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), (int)$shift$$constant);') %} ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm); %}')dnl dnl define(`VSRL_IMM', ` instruct vsrl$1$2_imm`'(vec$4 dst, vec$4 src, immI_positive shift) %{ PREDICATE(`$1$2', $1, assert_not_var_shift(n)) match(Set dst (URShiftV$2 src (RShiftCntV shift))); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector ($1$3)" %} ins_encode %{ifelse($2, B,` int sh = (int)$shift$$constant; if (sh >= 8) { __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); } else { __ ushr(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh); }', $2, S,` int sh = (int)$shift$$constant; if (sh >= 16) { __ eor(as_FloatRegister($dst$$reg), __ ifelse($4, D, T8B, T16B), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); } else { __ ushr(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh); }', ` __ ushr(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), (int)$shift$$constant);') %} ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm); %}')dnl dnl define(`VSRLA_IMM', ` instruct vsrla$1$2_imm`'(vec$4 dst, vec$4 src, immI_positive shift) %{ predicate(n->as_Vector()->length() == $1); match(Set dst (AddV$2 dst (URShiftV$2 src (RShiftCntV shift)))); ins_cost(INSN_COST); format %{ "usra $dst, $src, $shift\t# vector ($1$3)" %} ins_encode %{ifelse($2, B,` int sh = (int)$shift$$constant; if (sh < 8) { __ usra(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh); }', $2, S,` int sh = (int)$shift$$constant; if (sh < 16) { __ usra(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh); }', ` __ usra(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), (int)$shift$$constant);') %} ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm); %}')dnl dnl define(`VSRAA_IMM', ` instruct vsraa$1$2_imm`'(vec$4 dst, vec$4 src, immI_positive shift) %{ predicate(n->as_Vector()->length() == $1); match(Set dst (AddV$2 dst (RShiftV$2 src (RShiftCntV shift)))); ins_cost(INSN_COST); format %{ "ssra $dst, $src, $shift\t# vector ($1$3)" %} ins_encode %{ifelse($2, B,` int sh = (int)$shift$$constant; if (sh >= 8) sh = 7; __ ssra(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh);', $2, S,` int sh = (int)$shift$$constant; if (sh >= 16) sh = 15; __ ssra(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), sh);', ` __ ssra(as_FloatRegister($dst$$reg), __ T$1$3, as_FloatRegister($src$$reg), (int)$shift$$constant);') %} ins_pipe(vshift`'ifelse($4, D, 64, 128)_imm); %}')dnl dnl undefine(PREDICATE)dnl dnl dnl $1 $2 $3 $4 VSLL(8, B, B, D) VSLL(16, B, B, X) // Right shifts with vector shift count on aarch64 SIMD are implemented // as left shift by negative shift count. // There are two cases for vector shift count. // // Case 1: The vector shift count is from replication. // | | // LoadVector RShiftCntV // | / // RShiftVI // // Case 2: The vector shift count is from loading. // This case isn't supported by middle-end now. But it's supported by // panama/vectorIntrinsics(JEP 338: Vector API). // | | // LoadVector LoadVector // | / // RShiftVI // // The negate is conducted in RShiftCntV rule for case 1, whereas it's done in // RShiftV* rules for case 2. Because there exists an optimization opportunity // for case 1, that is, multiple neg instructions in inner loop can be hoisted // to outer loop and merged into one neg instruction. // // Note that ShiftVNode::is_var_shift() indicates whether the vector shift // count is a variable vector(case 2) or not(a vector generated by RShiftCntV, // i.e. case 1). dnl $1 $2 $3 $4 VSRA(8, B, B, D) VSRA_VAR(8, B, B, D) VSRA(16, B, B, X) VSRA_VAR(16, B, B, X) VSRL(8, B, B, D) VSRL_VAR(8, B, B, D) VSRL(16, B, B, X) VSRL_VAR(16, B, B, X) VSLL_IMM(8, B, B, D) VSLL_IMM(16, B, B, X) VSRA_IMM(8, B, B, D) VSRA_IMM(16, B, B, X) VSRL_IMM(8, B, B, D) VSRL_IMM(16, B, B, X) VSLL(4, S, H, D) VSLL(8, S, H, X) VSRA(4, S, H, D) VSRA_VAR(4, S, H, D) VSRA(8, S, H, X) VSRA_VAR(8, S, H, X) VSRL(4, S, H, D) VSRL_VAR(4, S, H, D) VSRL(8, S, H, X) VSRL_VAR(8, S, H, X) VSLL_IMM(4, S, H, D) VSLL_IMM(8, S, H, X) VSRA_IMM(4, S, H, D) VSRA_IMM(8, S, H, X) VSRL_IMM(4, S, H, D) VSRL_IMM(8, S, H, X) VSLL(2, I, S, D) VSLL(4, I, S, X) VSRA(2, I, S, D) VSRA_VAR(2, I, S, D) VSRA(4, I, S, X) VSRA_VAR(4, I, S, X) VSRL(2, I, S, D) VSRL_VAR(2, I, S, D) VSRL(4, I, S, X) VSRL_VAR(4, I, S, X) VSLL_IMM(2, I, S, D) VSLL_IMM(4, I, S, X) VSRA_IMM(2, I, S, D) VSRA_IMM(4, I, S, X) VSRL_IMM(2, I, S, D) VSRL_IMM(4, I, S, X) VSLL(2, L, D, X) VSRA(2, L, D, X) VSRA_VAR(2, L, D, X) VSRL(2, L, D, X) VSRL_VAR(2, L, D, X) VSLL_IMM(2, L, D, X) VSRA_IMM(2, L, D, X) VSRL_IMM(2, L, D, X) VSRAA_IMM(8, B, B, D) VSRAA_IMM(16, B, B, X) VSRAA_IMM(4, S, H, D) VSRAA_IMM(8, S, H, X) VSRAA_IMM(2, I, S, D) VSRAA_IMM(4, I, S, X) VSRAA_IMM(2, L, D, X) VSRLA_IMM(8, B, B, D) VSRLA_IMM(16, B, B, X) VSRLA_IMM(4, S, H, D) VSRLA_IMM(8, S, H, X) VSRLA_IMM(2, I, S, D) VSRLA_IMM(4, I, S, X) VSRLA_IMM(2, L, D, X) dnl define(`VMINMAX', ` instruct v$1$3`'ifelse($5, S, F, D)`'(vec$6 dst, vec$6 src1, vec$6 src2) %{ predicate(n->as_Vector()->length() == $3 && n->bottom_type()->is_vect()->element_basic_type() == T_`'ifelse($5, S, FLOAT, DOUBLE)); match(Set dst ($2V src1 src2)); ins_cost(INSN_COST); format %{ "f$1 $dst,$src1,$src2\t# vector ($3$4)" %} ins_encode %{ __ f$1(as_FloatRegister($dst$$reg), __ T$3$5, as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); %} ins_pipe(vdop_fp`'ifelse($6, D, 64, 128)); %}')dnl dnl $1 $2 $3 $4 $5 $6 VMINMAX(max, Max, 2, F, S, D) VMINMAX(max, Max, 4, S, S, X) VMINMAX(max, Max, 2, D, D, X) VMINMAX(min, Min, 2, F, S, D) VMINMAX(min, Min, 4, S, S, X) VMINMAX(min, Min, 2, D, D, X) instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); match(Set dst (RoundDoubleModeV src rmode)); format %{ "frint $dst, $src, $rmode" %} ins_encode %{ switch ($rmode$$constant) { case RoundDoubleModeNode::rmode_rint: __ frintn(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); break; case RoundDoubleModeNode::rmode_floor: __ frintm(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); break; case RoundDoubleModeNode::rmode_ceil: __ frintp(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); break; } %} ins_pipe(vdop_fp128); %} dnl define(`VPOPCOUNT', `dnl ifelse($1$2, `LD', ` // If the PopCountVL is generated by auto-vectorization, the dst basic // type is T_INT. And once we have unified the type definition for // Vector API and auto-vectorization, this rule can be merged with // "vpopcountLX" rule.', `') instruct vpopcount$1$2`'(vec$2 dst, vec$3 src) %{ predicate(n->as_Vector()->length_in_bytes() $4 16`'ifelse($1$2, `LD', ` && n->bottom_type()->is_vect()->element_basic_type() == T_INT', $1$2, `LX', ` && n->bottom_type()->is_vect()->element_basic_type() == T_LONG', `')); match(Set dst (PopCountV$1 src)); ins_cost($5 * INSN_COST); format %{ "vpopcount$1 $dst, $src\t# vector ($6)" %} ins_encode %{dnl ifelse($1, `I', ` BasicType bt = Matcher::vector_element_basic_type(this);', `') __ cnt(as_FloatRegister($dst$$reg), __ T`'ifelse($3, D, 8, 16)B, as_FloatRegister($src$$reg));dnl ifelse($1, `L', ` __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg)); __ uaddlp(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));', ` if (bt == T_SHORT || bt == T_INT) { __ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 8, 16)B, as_FloatRegister($dst$$reg)); if (bt == T_INT) { __ uaddlp(as_FloatRegister($dst$$reg), __ T`'ifelse($2, D, 4, 8)H, as_FloatRegister($dst$$reg)); } }')dnl ifelse($1$2, `LD', ` __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);', `') %} ins_pipe(pipe_class_default); %}')dnl dnl $1 $2 $3 $4 $5 $6 VPOPCOUNT(I, D, D, <, 3, 8B/4H/2S) VPOPCOUNT(I, X, X, ==, 3, 16B/8H/4S) VPOPCOUNT(L, D, X, <, 5, 2S) VPOPCOUNT(L, X, X, ==, 4, 2D) dnl dnl VMASK_TRUECOUNT($1, $2 ) dnl VMASK_TRUECOUNT(suffix, reg) define(`VMASK_TRUECOUNT', ` instruct vmask_truecount$1(iRegINoSp dst, $2 src, $2 tmp) %{ predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); match(Set dst (VectorMaskTrueCount src)); effect(TEMP tmp); ins_cost(2 * INSN_COST); format %{ "addv $tmp, $src\n\t" "umov $dst, $tmp, B, 0\t# vector ($1)" %} ins_encode %{ // Input "src" is a vector of boolean represented as bytes with // 0x00/0x01 as element values. __ addv(as_FloatRegister($tmp$$reg), __ T$1, as_FloatRegister($src$$reg)); __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); %} ins_pipe(pipe_slow); %}')dnl dnl dnl define(`ARGLIST', `ifelse($1, `_LT8B', `iRegINoSp dst, vecD src, rFlagsReg cr', `iRegINoSp dst, vecD src')') dnl dnl VMASK_FIRSTTRUE_D($1, $2, $3, $4 ) dnl VMASK_FIRSTTRUE_D(suffix, cond, cost, size) define(`VMASK_FIRSTTRUE_D', ` instruct vmask_firsttrue$1(ARGLIST($1)) %{ predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN && n->in(1)->bottom_type()->is_vect()->length() $2 8); match(Set dst (VectorMaskFirstTrue src));dnl ifelse($1, `_LT8B', ` effect(KILL cr);') ins_cost($3 * INSN_COST); format %{ "vmask_firsttrue $dst, $src\t# vector ($4)" %} ins_encode %{ // Returns the index of the first active lane of the // vector mask, or VLENGTH if no lane is active. // // Input "src" is a vector of boolean represented as // bytes with 0x00/0x01 as element values. // // Computed by reversing the bits and counting the leading // zero bytes. __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); __ rbit($dst$$Register, $dst$$Register); __ clz($dst$$Register, $dst$$Register); __ lsrw($dst$$Register, $dst$$Register, 3);dnl ifelse(`$1', `_LT8B', ` __ movw(rscratch1, Matcher::vector_length(this, $src)); __ cmpw($dst$$Register, rscratch1); __ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);') %} ins_pipe(pipe_slow); %}')dnl dnl undefine(ARGLIST)dnl dnl // vector mask reductions VMASK_TRUECOUNT(8B, vecD) VMASK_TRUECOUNT(16B, vecX) VMASK_FIRSTTRUE_D(_LT8B, <, 7, 4I/4S/2I) VMASK_FIRSTTRUE_D(8B, ==, 4, 8B) instruct vmask_firsttrue16B(iRegINoSp dst, vecX src) %{ predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); match(Set dst (VectorMaskFirstTrue src)); ins_cost(6 * INSN_COST); format %{ "vmask_firsttrue $dst, $src\t# vector (16B)" %} ins_encode %{ // Returns the index of the first active lane of the // vector mask, or 16 (VLENGTH) if no lane is active. // // Input "src" is a vector of boolean represented as // bytes with 0x00/0x01 as element values. Label FIRST_TRUE_INDEX; // Try to compute the result from lower 64 bits. __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); __ movw(rscratch1, zr); __ cbnz($dst$$Register, FIRST_TRUE_INDEX); // Compute the result from the higher 64 bits. __ fmovhid($dst$$Register, as_FloatRegister($src$$reg)); __ movw(rscratch1, 8); // Reverse the bits and count the leading zero bytes. __ bind(FIRST_TRUE_INDEX); __ rbit($dst$$Register, $dst$$Register); __ clz($dst$$Register, $dst$$Register); __ addw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); %} ins_pipe(pipe_slow); %} instruct vmask_lasttrue8B(iRegINoSp dst, vecD src) %{ predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); match(Set dst (VectorMaskLastTrue src)); ins_cost(4 * INSN_COST); format %{ "vmask_lasttrue $dst, $src\t# vector (8B)" %} ins_encode %{ // Returns the index of the last active lane of the // vector mask, or -1 if no lane is active. // // Input "src" is a vector of boolean represented as // bytes with 0x00/0x01 as element values. // // Computed by counting the leading zero bytes and // subtracting it by 7 (VLENGTH - 1). __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); __ clz($dst$$Register, $dst$$Register); __ movw(rscratch1, 7); __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); %} ins_pipe(pipe_slow); %} instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{ predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN); match(Set dst (VectorMaskLastTrue src)); ins_cost(5 * INSN_COST); format %{ "vmask_lasttrue $dst, $src\t# vector (16B)" %} ins_encode %{ // Returns the index of the last active lane of the // vector mask, or -1 if no lane is active. // // Input "src" is a vector of boolean represented as // bytes with 0x00/0x01 as element values. Label LAST_TRUE_INDEX; // Try to compute the result from higher 64 bits. __ fmovhid($dst$$Register, as_FloatRegister($src$$reg)); __ movw(rscratch1, 16 - 1); __ cbnz($dst$$Register, LAST_TRUE_INDEX); // Compute the result from the lower 64 bits. __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); __ movw(rscratch1, 8 - 1); // Count the leading zero bytes and subtract it by 15 (VLENGTH - 1). __ bind(LAST_TRUE_INDEX); __ clz($dst$$Register, $dst$$Register); __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3); %} ins_pipe(pipe_slow); %} instruct vmask_tolong8B(iRegLNoSp dst, vecD src) %{ match(Set dst (VectorMaskToLong src)); ins_cost(5 * INSN_COST); format %{ "vmask_tolong $dst, $src\t# convert mask to long (8B)" %} ins_encode %{ // Input "src" is a vector of boolean represented as // bytes with 0x00/0x01 as element values. __ fmovd(as_Register($dst$$reg), as_FloatRegister($src$$reg)); __ bytemask_compress(as_Register($dst$$reg)); %} ins_pipe(pipe_slow); %} instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{ match(Set dst (VectorMaskToLong src)); ins_cost(11 * INSN_COST); format %{ "vmask_tolong $dst, $src\t# convert mask to long (16B)" %} ins_encode %{ // Input "src" is a vector of boolean represented as // bytes with 0x00/0x01 as element values. __ umov(as_Register($dst$$reg), as_FloatRegister($src$$reg), __ D, 0); __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1); __ bytemask_compress(as_Register($dst$$reg)); __ bytemask_compress(rscratch1); __ orr(as_Register($dst$$reg), as_Register($dst$$reg), rscratch1, Assembler::LSL, 8); %} ins_pipe(pipe_slow); %} dnl dnl CLTZ_D($1 ) dnl CLTZ_D(op_name) define(`CLTZ_D', ` instruct count$1D(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (Count$1 src)); ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST); format %{ "count$1 $dst, $src\t# vector (8B/4H/2S)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);dnl ifelse($1, `TrailingZerosV', ` __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);', `') __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg)); %} ins_pipe(pipe_slow); %}')dnl dnl dnl CLTZ_X($1 ) dnl CLTZ_X(op_name) define(`CLTZ_X', ` instruct count$1X(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (Count$1 src)); ins_cost(ifelse($1, `TrailingZerosV', `3 * ', `')INSN_COST); format %{ "count$1 $dst, $src\t# vector (16B/8H/4S/2D)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);dnl ifelse($1, `TrailingZerosV', ` __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);', `') if (bt != T_LONG) { __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg)); } else { __ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 0); __ clz(rscratch1, rscratch1); __ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1); __ umov(rscratch1, as_FloatRegister($ifelse($1, `TrailingZerosV', dst, src)$$reg), __ D, 1); __ clz(rscratch1, rscratch1); __ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1); } %} ins_pipe(pipe_slow); %}')dnl dnl //------------------------- CountLeadingZerosV ----------------------------- CLTZ_D(LeadingZerosV) CLTZ_X(LeadingZerosV) //------------------------- CountTrailingZerosV ---------------------------- CLTZ_D(TrailingZerosV) CLTZ_X(TrailingZerosV) dnl dnl REVERSE($1, $2, $3, $4 ) dnl REVERSE(insn_name, op_name, type, insn) define(`REVERSE', ` instruct $1(vec$3 dst, vec$3 src) %{ predicate(n->as_Vector()->length_in_bytes() == ifelse($3, D, 8, 16)); match(Set dst ($2 src)); ins_cost(ifelse($2, `ReverseV', `2 * ', `')INSN_COST); format %{ "$2 $dst, $src\t# vector ($3)" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); __ $4(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, ifelse($3, D, false, true)); %} ins_pipe(pipe_slow); %}')dnl dnl //------------------------------ ReverseV ----------------------------------- REVERSE(vreverseD, ReverseV, D, neon_reverse_bits) REVERSE(vreverseX, ReverseV, X, neon_reverse_bits) //---------------------------- ReverseBytesV -------------------------------- REVERSE(vreverseBytesD, ReverseBytesV, D, neon_reverse_bytes) REVERSE(vreverseBytesX, ReverseBytesV, X, neon_reverse_bytes)