X86_64: Enable support for next generation AMD Zen3 CPU.

2020-12-03  Venkataramanan Kumar  <Venkataramanan.Kumar@amd.com>
	    Sharavan Kumar  <Shravan.Kumar@amd.com>

gcc/ChangeLog:

	* common/config/i386/cpuinfo.h (get_amd_cpu) recognize znver3.
	* common/config/i386/i386-common.c (processor_names): Add
	znver3.
	(processor_alias_table): Add znver3 and AMDFAM19H entry.
	* common/config/i386/i386-cpuinfo.h (processor_types): Add
	AMDFAM19H.
	(processor_subtypes): AMDFAM19H_ZNVER3.
	* config.gcc (i[34567]86-*-linux* | ...): Likewise.
	* config/i386/driver-i386.c: (host_detect_local_cpu): Let
	-march=native recognize znver3 processors.
	* config/i386/i386-c.c (ix86_target_macros_internal): Add
	znver3.
	* config/i386/i386-options.c (m_znver3): New definition.
	(m_ZNVER): Include m_znver3.
	(processor_cost_table): Add znver3.
	* config/i386/i386.c (ix86_reassociation_width): Likewise.
	* config/i386/i386.h (TARGET_znver3): New definition.
	(enum processor_type): Add PROCESSOR_ZNVER3.
	* config/i386/i386.md (define_attr "cpu"): Add znver3.
	* config/i386/x86-tune-sched.c: (ix86_issue_rate): Likewise.
	(ix86_adjust_cost): Likewise.
	* config/i386/x86-tune.def (X86_TUNE_AVOID_256FMA_CHAINS:
	Likewise.
	* config/i386/znver1.md: Add new reservations for znver3.
	* doc/extend.texi: Add details about znver3.
	* doc/invoke.texi: Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/funcspec-56.inc: Handle new march.
	* g++.target/i386/mv29.C: New file.
This commit is contained in:
Venkataramanan Kumar 2020-12-05 11:12:15 +05:30
parent 625e002396
commit 3e2ae3ee28
17 changed files with 397 additions and 128 deletions

View File

@ -241,6 +241,23 @@ get_amd_cpu (struct __processor_model *cpu_model,
cpu_model->__cpu_subtype = AMDFAM17H_ZNVER1;
}
break;
case 0x19:
cpu_model->__cpu_type = AMDFAM19H;
/* AMD family 19h version 1. */
if (model <= 0x0f)
{
cpu = "znver3";
CHECK___builtin_cpu_is ("znver3");
cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3;
}
else if (has_cpu_feature (cpu_model, cpu_features2,
FEATURE_VAES))
{
cpu = "znver3";
CHECK___builtin_cpu_is ("znver3");
cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3;
}
break;
default:
break;
}

View File

@ -1762,7 +1762,8 @@ const char *const processor_names[] =
"btver1",
"btver2",
"znver1",
"znver2"
"znver2",
"znver3"
};
/* Guarantee that the array is aligned with enum processor_type. */
@ -2004,6 +2005,17 @@ const pta processor_alias_table[] =
| PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID
| PTA_WBNOINVD,
M_CPU_SUBTYPE (AMDFAM17H_ZNVER2), P_PROC_AVX2},
{"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
| PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
| PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
| PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
| PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID
| PTA_WBNOINVD | PTA_VAES | PTA_VPCLMULQDQ | PTA_PKU,
M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW
@ -2030,6 +2042,8 @@ const pta processor_alias_table[] =
M_CPU_TYPE (AMDFAM15H), P_NONE},
{"amdfam17h", PROCESSOR_GENERIC, CPU_GENERIC, 0,
M_CPU_TYPE (AMDFAM17H), P_NONE},
{"amdfam19h", PROCESSOR_GENERIC, CPU_GENERIC, 0,
M_CPU_TYPE (AMDFAM19H), P_NONE},
{"shanghai", PROCESSOR_GENERIC, CPU_GENERIC, 0,
M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE},
{"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0,

View File

@ -55,6 +55,7 @@ enum processor_types
INTEL_GOLDMONT,
INTEL_GOLDMONT_PLUS,
INTEL_TREMONT,
AMDFAM19H,
CPU_TYPE_MAX,
BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX
};
@ -86,6 +87,7 @@ enum processor_subtypes
INTEL_COREI7_COOPERLAKE,
INTEL_COREI7_SAPPHIRERAPIDS,
INTEL_COREI7_ALDERLAKE,
AMDFAM19H_ZNVER3,
CPU_SUBTYPE_MAX
};

View File

@ -668,7 +668,7 @@ c7 esther"
# 64-bit x86 processors supported by --with-arch=. Each processor
# MUST be separated by exactly one space.
x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
bdver3 bdver4 znver1 znver2 btver1 btver2 k8 k8-sse3 opteron \
bdver3 bdver4 znver1 znver2 znver3 btver1 btver2 k8 k8-sse3 opteron \
opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \
slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
@ -3678,6 +3678,10 @@ case ${target} in
arch=znver2
cpu=znver2
;;
znver3-*)
arch=znver3
cpu=znver3
;;
bdver4-*)
arch=bdver4
cpu=bdver4
@ -3799,6 +3803,10 @@ case ${target} in
arch=znver2
cpu=znver2
;;
znver3-*)
arch=znver3
cpu=znver3
;;
bdver4-*)
arch=bdver4
cpu=bdver4

View File

@ -455,6 +455,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
processor = PROCESSOR_GEODE;
else if (has_feature (FEATURE_MOVBE) && family == 22)
processor = PROCESSOR_BTVER2;
else if (has_feature (FEATURE_VAES))
processor = PROCESSOR_ZNVER3;
else if (has_feature (FEATURE_CLWB))
processor = PROCESSOR_ZNVER2;
else if (has_feature (FEATURE_CLZERO))
@ -753,6 +755,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
case PROCESSOR_ZNVER2:
cpu = "znver2";
break;
case PROCESSOR_ZNVER3:
cpu = "znver3";
break;
case PROCESSOR_BTVER1:
cpu = "btver1";
break;

View File

@ -128,6 +128,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__znver2");
def_or_undef (parse_in, "__znver2__");
break;
case PROCESSOR_ZNVER3:
def_or_undef (parse_in, "__znver3");
def_or_undef (parse_in, "__znver3__");
break;
case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__btver1");
def_or_undef (parse_in, "__btver1__");
@ -315,6 +319,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_ZNVER2:
def_or_undef (parse_in, "__tune_znver2__");
break;
case PROCESSOR_ZNVER3:
def_or_undef (parse_in, "__tune_znver3__");
break;
case PROCESSOR_BTVER1:
def_or_undef (parse_in, "__tune_btver1__");
break;

View File

@ -147,11 +147,12 @@ along with GCC; see the file COPYING3. If not see
#define m_BDVER4 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER4)
#define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1)
#define m_ZNVER2 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER2)
#define m_ZNVER3 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER3)
#define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1)
#define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2)
#define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
#define m_BTVER (m_BTVER1 | m_BTVER2)
#define m_ZNVER (m_ZNVER1 | m_ZNVER2)
#define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
| m_ZNVER)
@ -745,6 +746,7 @@ static const struct processor_costs *processor_cost_table[] =
&btver1_cost,
&btver2_cost,
&znver1_cost,
&znver2_cost,
&znver2_cost
};

View File

@ -21976,8 +21976,9 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
/* Integer vector instructions execute in FP unit
and can execute 3 additions and one multiplication per cycle. */
if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2)
&& INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
|| ix86_tune == PROCESSOR_ZNVER3)
&& INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
return 1;
/* Account for targets that splits wide vectors into multiple parts. */

View File

@ -484,6 +484,7 @@ extern const struct processor_costs ix86_size_cost;
#define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
#define TARGET_ZNVER1 (ix86_tune == PROCESSOR_ZNVER1)
#define TARGET_ZNVER2 (ix86_tune == PROCESSOR_ZNVER2)
#define TARGET_ZNVER3 (ix86_tune == PROCESSOR_ZNVER3)
/* Feature tests against the various tunings. */
enum ix86_tune_indices {
@ -2397,6 +2398,7 @@ enum processor_type
PROCESSOR_BTVER2,
PROCESSOR_ZNVER1,
PROCESSOR_ZNVER2,
PROCESSOR_ZNVER3,
PROCESSOR_max
};

View File

@ -458,7 +458,7 @@
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3,
bdver4,btver2,znver1,znver2"
bdver4,btver2,znver1,znver2,znver3"
(const (symbol_ref "ix86_schedule")))
;; A basic instruction type. Refinements due to arguments to be

View File

@ -66,6 +66,7 @@ ix86_issue_rate (void)
case PROCESSOR_BDVER4:
case PROCESSOR_ZNVER1:
case PROCESSOR_ZNVER2:
case PROCESSOR_ZNVER3:
case PROCESSOR_CORE2:
case PROCESSOR_NEHALEM:
case PROCESSOR_SANDYBRIDGE:
@ -396,6 +397,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
case PROCESSOR_ZNVER1:
case PROCESSOR_ZNVER2:
case PROCESSOR_ZNVER3:
/* Stack engine allows to execute push&pop instructions in parall. */
if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
&& (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))

View File

@ -444,7 +444,7 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER)
/* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
smaller FMA chain. */
DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2)
DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3)
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */

View File

@ -21,7 +21,7 @@
(define_attr "znver1_decode" "direct,vector,double"
(const_string "direct"))
;; AMD znver1 and znver2 Scheduling
;; AMD znver1, znver2 and znver3 Scheduling
;; Modeling automatons for zen decoders, integer execution pipes,
;; AGU pipes and floating point execution units.
(define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu")
@ -52,7 +52,7 @@
(define_cpu_unit "znver1-ieu3" "znver1_ieu")
(define_reservation "znver1-ieu" "znver1-ieu0|znver1-ieu1|znver1-ieu2|znver1-ieu3")
;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2
;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2 and znver3
;; According to CPU diagram last AGU unit is used only for stores.
(define_cpu_unit "znver1-agu0" "znver1_agu")
(define_cpu_unit "znver1-agu1" "znver1_agu")
@ -63,7 +63,7 @@
;; Load is 4 cycles. We do not model reservation of load unit.
;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, nothing")
(define_reservation "znver1-load" "znver1-agu-reserve")
;; Store operations differs between znver1 and znver2 because extra AGU
;; Store operations differs between znver1, znver2 and znver3 because extra AGU
;; was added.
(define_reservation "znver1-store" "znver1-agu-reserve")
(define_reservation "znver2-store" "znver2-store-agu-reserve")
@ -77,6 +77,7 @@
(define_reservation "znver2-ivector" "znver1-ieu0+znver1-ieu1
+znver1-ieu2+znver1-ieu3
+znver1-agu0+znver1-agu1+znver2-agu2")
;; Floating point unit 4 FP pipes.
(define_cpu_unit "znver1-fp0" "znver1_fp")
(define_cpu_unit "znver1-fp1" "znver1_fp")
@ -99,7 +100,7 @@
"znver1-double,znver1-store,znver1-ieu0|znver1-ieu3")
(define_insn_reservation "znver2_call" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(eq_attr "type" "call,callv"))
"znver1-double,znver2-store,znver1-ieu0|znver1-ieu3")
@ -110,10 +111,10 @@
(eq_attr "memory" "store")))
"znver1-direct,znver1-store")
(define_insn_reservation "znver2_push" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "push")
(eq_attr "memory" "store")))
"znver1-direct,znver1-store")
"znver1-direct,znver2-store")
(define_insn_reservation "znver1_push_load" 4
(and (eq_attr "cpu" "znver1")
@ -121,13 +122,13 @@
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-store")
(define_insn_reservation "znver2_push_load" 4
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "push")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver2-store")
(define_insn_reservation "znver1_pop" 4
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "pop")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load")
@ -138,7 +139,7 @@
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-store")
(define_insn_reservation "znver2_pop_mem" 4
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "pop")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver2-store")
@ -149,7 +150,7 @@
(eq_attr "type" "leave"))
"znver1-double,znver1-ieu, znver1-store")
(define_insn_reservation "znver2_leave" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(eq_attr "type" "leave"))
"znver1-double,znver1-ieu, znver2-store")
@ -157,13 +158,13 @@
;; Multiplications
;; Reg operands
(define_insn_reservation "znver1_imul" 3
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-ieu1")
(define_insn_reservation "znver1_imul_mem" 7
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imul")
(eq_attr "memory" "!none")))
"znver1-direct,znver1-load, znver1-ieu1")
@ -227,6 +228,62 @@
(eq_attr "memory" "none"))))
"znver1-direct,znver1-load,znver1-ieu2*12")
(define_insn_reservation "znver3_idiv_DI" 18
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none"))))
"znver1-double,znver1-ieu2*18")
(define_insn_reservation "znver3_idiv_SI" 12
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "none"))))
"znver1-double,znver1-ieu2*12")
(define_insn_reservation "znver3_idiv_HI" 10
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "HI")
(eq_attr "memory" "none"))))
"znver1-double,znver1-ieu2*10")
(define_insn_reservation "znver3_idiv_QI" 9
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "none"))))
"znver1-direct,znver1-ieu2*9")
(define_insn_reservation "znver3_idiv_mem_DI" 22
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-ieu2*22")
(define_insn_reservation "znver3_idiv_mem_SI" 16
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "SI")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-ieu2*16")
(define_insn_reservation "znver3_idiv_mem_HI" 14
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "HI")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-ieu2*10")
(define_insn_reservation "znver3_idiv_mem_QI" 13
(and (eq_attr "cpu" "znver3")
(and (eq_attr "type" "idiv")
(and (eq_attr "mode" "QI")
(eq_attr "memory" "load"))))
"znver1-direct,znver1-load,znver1-ieu2*9")
;; STR ISHIFT which are micro coded.
;; Fix me: Latency need to be rechecked.
(define_insn_reservation "znver1_str_ishift" 6
@ -236,15 +293,16 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_str_ishift" 3
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ishift")
(eq_attr "memory" "both,store")))
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_str_istr" 19
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "str")
(eq_attr "memory" "both,store")))
"znver1-vector,znver1-ivector")
;; MOV - integer moves
(define_insn_reservation "znver1_load_imov_double" 2
(and (eq_attr "cpu" "znver1")
@ -254,14 +312,14 @@
"znver1-double,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver2_load_imov_double" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "none"))))
"znver1-double,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver1_load_imov_direct" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "none")))
"znver1-direct,znver1-ieu")
@ -274,7 +332,7 @@
"znver1-double,znver1-ieu|znver1-ieu,znver1-store")
(define_insn_reservation "znver2_load_imov_double_store" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "store"))))
@ -287,7 +345,7 @@
"znver1-direct,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_load_imov_direct_store" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "store")))
"znver1-direct,znver1-ieu,znver2-store")
@ -300,14 +358,14 @@
"znver1-double,znver1-load,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver2_load_imov_double_load" 4
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "imovx")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-ieu|znver1-ieu")
(define_insn_reservation "znver1_load_imov_direct_load" 4
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "imov,imovx")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load")
@ -315,13 +373,13 @@
;; INTEGER/GENERAL instructions
;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST
(define_insn_reservation "znver1_insn" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov")
(eq_attr "memory" "none,unknown")))
"znver1-direct,znver1-ieu")
(define_insn_reservation "znver1_insn_load" 5
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-ieu")
@ -333,7 +391,7 @@
"znver1-direct,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_insn_store" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
(eq_attr "memory" "store")))
"znver1-direct,znver1-ieu,znver2-store")
@ -345,7 +403,7 @@
"znver1-direct,znver1-load,znver1-ieu,znver1-store")
(define_insn_reservation "znver2_insn_both" 5
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec")
(eq_attr "memory" "both")))
"znver1-direct,znver1-load,znver1-ieu,znver2-store")
@ -357,7 +415,7 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_ieu_vector" 5
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(eq_attr "type" "other,str,multi"))
"znver1-vector,znver2-ivector")
@ -370,21 +428,21 @@
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_alu1_vector" 3
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "vector")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
"znver1-vector,znver2-ivector")
(define_insn_reservation "znver1_alu1_double" 2
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
"znver1-double,znver1-ieu")
(define_insn_reservation "znver1_alu1_direct" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "alu1")
(eq_attr "memory" "none,unknown"))))
@ -392,45 +450,45 @@
;; Branches : Fix me need to model conditional branches.
(define_insn_reservation "znver1_branch" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "ibr")
(eq_attr "memory" "none")))
(eq_attr "memory" "none")))
"znver1-direct")
;; Indirect branches check latencies.
(define_insn_reservation "znver1_indirect_branch_mem" 6
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "ibr")
(eq_attr "memory" "load")))
(eq_attr "memory" "load")))
"znver1-vector,znver1-ivector")
(define_insn_reservation "znver2_indirect_branch_mem" 6
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ibr")
(eq_attr "memory" "load")))
(eq_attr "memory" "load")))
"znver1-vector,znver2-ivector")
;; LEA executes in ALU units with 1 cycle latency.
(define_insn_reservation "znver1_lea" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "lea"))
"znver1-direct,znver1-ieu")
;; Other integer instrucions
(define_insn_reservation "znver1_idirect" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown")))
"znver1-direct,znver1-ieu")
;; Floating point
(define_insn_reservation "znver1_fp_cmov" 6
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fcmov"))
"znver1-vector,znver1-fvector")
(define_insn_reservation "znver1_fp_mov_direct_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load"))))
@ -443,41 +501,34 @@
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fp2|znver1-fp3,znver1-store")
(define_insn_reservation "znver2_fp_mov_direct_store" 5
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "direct")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fp2|znver1-fp3,znver2-store")
(define_insn_reservation "znver1_fp_mov_double" 4
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "none"))))
"znver1-double,znver1-fp3")
(define_insn_reservation "znver1_fp_mov_double_load" 12
(and (eq_attr "cpu" "znver1")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3")
(define_insn_reservation "znver2_fp_mov_double_load" 12
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "znver1_decode" "double")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3")
(define_insn_reservation "znver1_fp_mov_direct" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fmov"))
"znver1-direct,znver1-fp3")
;; TODO: AGU?
(define_insn_reservation "znver1_fp_spc_direct" 5
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fpspc")
(eq_attr "memory" "store")))
"znver1-direct,znver1-fp3,znver1-fp2")
@ -488,26 +539,26 @@
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
"znver1-vector,znver1-fvector")
(define_insn_reservation "znver2_fp_insn_vector" 6
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "znver1_decode" "vector")
(eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov")))
"znver1-vector,znver2-fvector")
;; FABS
(define_insn_reservation "znver1_fp_fsgn" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "fsgn"))
"znver1-direct,znver1-fp3")
(define_insn_reservation "znver1_fp_fcmp" 2
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "memory" "none")
(and (eq_attr "znver1_decode" "double")
(eq_attr "type" "fcmp"))))
"znver1-double,znver1-fp0,znver1-fp2")
(define_insn_reservation "znver1_fp_fcmp_load" 9
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "memory" "none")
(and (eq_attr "znver1_decode" "double")
(eq_attr "type" "fcmp"))))
@ -515,32 +566,32 @@
;;FADD FSUB FMUL
(define_insn_reservation "znver1_fp_op_mul" 5
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*5")
(define_insn_reservation "znver1_fp_op_mul_load" 12
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*5")
(define_insn_reservation "znver1_fp_op_imul_load" 16
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fop,fmul")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3,znver1-fp0")
(define_insn_reservation "znver1_fp_op_div" 15
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*15")
(define_insn_reservation "znver1_fp_op_div_load" 22
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*15")
@ -553,62 +604,63 @@
"znver1-double,znver1-load,znver1-fp3*19")
(define_insn_reservation "znver2_fp_op_idiv_load" 26
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "fdiv")
(and (eq_attr "fp_int_src" "true")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp3*19")
;; MMX, SSE, SSEn.n, AVX, AVX2 instructions
(define_insn_reservation "znver1_fp_insn" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(eq_attr "type" "mmx"))
"znver1-direct,znver1-fpu")
(define_insn_reservation "znver1_mmx_add" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
(define_insn_reservation "znver1_mmx_add_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
(define_insn_reservation "znver1_mmx_cmp" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp3")
(define_insn_reservation "znver1_mmx_cmp_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")
(define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_mmx_cvt_pck_shuf_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_mmx_shift_move" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp2")
"znver1-direct,znver1-fp2")
(define_insn_reservation "znver1_mmx_shift_move_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp2")
@ -619,19 +671,19 @@
(eq_attr "memory" "store,both")))
"znver1-direct,znver1-fp2,znver1-store")
(define_insn_reservation "znver2_mmx_move_store" 1
(and (eq_attr "cpu" "znver1")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "mmxshft,mmxmov")
(eq_attr "memory" "store,both")))
"znver1-direct,znver1-fp2,znver2-store")
(define_insn_reservation "znver1_mmx_mul" 3
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*3")
(define_insn_reservation "znver1_mmx_load" 10
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "mmxmul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*3")
@ -652,13 +704,13 @@
"znver1-double,znver1-load,znver1-fpu")
(define_insn_reservation "znver1_sse_log" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fpu")
(define_insn_reservation "znver1_sse_log_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fpu")
@ -678,13 +730,13 @@
"znver1-double,znver1-load,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_sse_log1" 1
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog1")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp1|znver1-fp2")
(define_insn_reservation "znver1_sse_log1_load" 8
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "sselog1")
(eq_attr "memory" "!none")))
"znver1-direct,znver1-load,znver1-fp1|znver1-fp2")
@ -701,7 +753,8 @@
(define_insn_reservation "znver1_sse_comi_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "prefix_extra" "0")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load"))))
@ -710,7 +763,8 @@
(define_insn_reservation "znver1_sse_comi_double" 2
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "prefix" "vex")
(and (eq_attr "prefix_extra" "0")
(and (eq_attr "type" "ssecomi")
@ -720,7 +774,8 @@
(define_insn_reservation "znver1_sse_comi_double_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "prefix" "vex")
(and (eq_attr "prefix_extra" "0")
(and (eq_attr "type" "ssecomi")
@ -730,7 +785,8 @@
(define_insn_reservation "znver1_sse_test" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "prefix_extra" "1")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "none"))))
@ -739,7 +795,8 @@
(define_insn_reservation "znver1_sse_test_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "prefix_extra" "1")
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load"))))
@ -757,7 +814,7 @@
"znver1-direct,znver1-ieu0")
(define_insn_reservation "znver2_sse_mov" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "SI")
(and (eq_attr "isa" "avx")
(and (eq_attr "type" "ssemov")
@ -774,7 +831,7 @@
"znver1-direct,znver1-ieu2")
(define_insn_reservation "znver2_avx_mov" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "TI")
(and (eq_attr "isa" "avx")
(and (eq_attr "type" "ssemov")
@ -785,7 +842,8 @@
(define_insn_reservation "znver1_sseavx_mov" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fpu")
@ -797,7 +855,7 @@
(eq_attr "memory" "store"))))
"znver1-direct,znver1-fpu,znver1-store")
(define_insn_reservation "znver2_sseavx_mov_store" 1
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "store")))
"znver1-direct,znver1-fpu,znver2-store")
@ -805,7 +863,8 @@
(define_insn_reservation "znver1_sseavx_mov_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "type" "ssemov")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fpu")
@ -835,7 +894,8 @@
(define_insn_reservation "znver1_sseavx_add" 3
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp2|znver1-fp3")
@ -843,7 +903,8 @@
(define_insn_reservation "znver1_sseavx_add_load" 10
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp2|znver1-fp3")
@ -892,10 +953,39 @@
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver3_sseavx_fma" 4
(and (and (eq_attr "cpu" "znver3")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "type" "ssemuladd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver3_sseavx_fma_load" 11
(and (and (eq_attr "cpu" "znver3")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "type" "ssemuladd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver3_avx256_fma" 4
(and (eq_attr "cpu" "znver3")
(and (eq_attr "mode" "V8SF,V4DF")
(and (eq_attr "type" "ssemuladd")
(eq_attr "memory" "none"))))
"znver1-double,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver3_avx256_fma_load" 11
(and (eq_attr "cpu" "znver3")
(and (eq_attr "mode" "V8SF,V4DF")
(and (eq_attr "type" "ssemuladd")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_sseavx_iadd" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "DI,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseiadd")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3")
@ -903,7 +993,8 @@
(define_insn_reservation "znver1_sseavx_iadd_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "DI,TI"))
(eq_attr "cpu" "znver2"))
(ior (eq_attr "cpu" "znver2")
(eq_attr "cpu" "znver3")))
(and (eq_attr "type" "sseiadd")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3")
@ -924,7 +1015,7 @@
;; SSE conversions.
(define_insn_reservation "znver1_ssecvtsf_si_load" 12
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "SI")
(and (eq_attr "type" "sseicvt")
(and (match_operand:SF 1 "memory_operand")
@ -939,7 +1030,7 @@
(eq_attr "memory" "none")))))
"znver1-double,znver1-fp3,znver1-ieu0")
(define_insn_reservation "znver2_ssecvtdf_si" 4
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "SI")
(and (match_operand:DF 1 "register_operand")
(and (eq_attr "type" "sseicvt")
@ -955,13 +1046,14 @@
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
(define_insn_reservation "znver2_ssecvtdf_si_load" 11
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "mode" "SI")
(and (eq_attr "type" "sseicvt")
(and (match_operand:DF 1 "memory_operand")
(eq_attr "memory" "load")))))
"znver1-double,znver1-load,znver1-fp3,znver1-ieu0")
;; All other used ssecvt fp3 pipes
;; Check: Need to revisit this again.
;; Some SSE converts may use different pipe combinations.
@ -972,19 +1064,13 @@
"znver1-direct,znver1-fp3")
(define_insn_reservation "znver2_ssecvt" 3
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3")
(define_insn_reservation "znver1_ssecvt_load" 11
(and (eq_attr "cpu" "znver1")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3")
(define_insn_reservation "znver2_ssecvt_load" 11
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "type" "ssecvt")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3")
@ -994,7 +1080,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V8SF,V4SF,SF")))
(eq_attr "mode" "V8SF,V4SF,SF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*10")
@ -1003,7 +1091,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V8SF,V4SF,SF")))
(eq_attr "mode" "V8SF,V4SF,SF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*10")
@ -1012,16 +1102,20 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V2DF,DF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V4DF,V2DF,DF")))
(eq_attr "mode" "V4DF,V2DF,DF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "V4DF,V2DF,DF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp3*13")
(define_insn_reservation "znver1_ssediv_sd_pd_load" 20
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V2DF,DF"))
(eq_attr "mode" "V2DF,DF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V4DF,V2DF,DF")))
(eq_attr "mode" "V4DF,V2DF,DF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "V4DF,V2DF,DF")))
(and (eq_attr "type" "ssediv")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp3*13")
@ -1058,7 +1152,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
"znver1-direct,(znver1-fp0|znver1-fp1)*3")
@ -1067,7 +1163,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "V4SF,SF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "V8SF,V4SF,SF")))
(eq_attr "mode" "V8SF,V4SF,SF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "V8SF,V4SF,SF")))
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
@ -1101,17 +1199,18 @@
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4")
(define_insn_reservation "znver2_ssemul_sd_pd" 3
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "none")))
"znver1-direct,(znver1-fp0|znver1-fp1)*3")
(define_insn_reservation "znver2_ssemul_sd_pd_load" 10
(and (eq_attr "cpu" "znver2")
(and (eq_attr "cpu" "znver2,znver3")
(and (eq_attr "type" "ssemul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3")
(define_insn_reservation "znver1_ssemul_avx256_pd" 5
(and (eq_attr "cpu" "znver1")
(and (eq_attr "mode" "V4DF")
@ -1131,13 +1230,15 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "TI"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "TI,OI")))
(eq_attr "mode" "TI,OI"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0*3")
(define_insn_reservation "znver1_sseimul_avx256" 4
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "none"))))
@ -1147,27 +1248,29 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "TI"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "TI,OI"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "TI,OI")))
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0*3")
(define_insn_reservation "znver1_sseimul_avx256_load" 11
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "OI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
"znver1-double,znver1-load,znver1-fp0*4")
(define_insn_reservation "znver1_sseimul_di" 3
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "memory" "none")
(eq_attr "type" "sseimul"))))
"znver1-direct,znver1-fp0*3")
(define_insn_reservation "znver1_sseimul_load_di" 10
(and (eq_attr "cpu" "znver1,znver2")
(and (eq_attr "cpu" "znver1,znver2,znver3")
(and (eq_attr "mode" "DI")
(and (eq_attr "type" "sseimul")
(eq_attr "memory" "load"))))
@ -1178,16 +1281,20 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp1")
(define_insn_reservation "znver1_sse_cmp_load" 8
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(eq_attr "mode" "SF,DF,V4SF,V2DF"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp1")
@ -1208,9 +1315,11 @@
(define_insn_reservation "znver1_sse_icmp" 1
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "QI,HI,SI,DI,TI"))
(eq_attr "mode" "QI,HI,SI,DI,TI"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(eq_attr "mode" "QI,HI,SI,DI,TI,OI"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "none")))
"znver1-direct,znver1-fp0|znver1-fp3")
@ -1219,7 +1328,9 @@
(and (ior (and (eq_attr "cpu" "znver1")
(eq_attr "mode" "QI,HI,SI,DI,TI"))
(and (eq_attr "cpu" "znver2")
(eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(eq_attr "mode" "QI,HI,SI,DI,TI,OI"))
(and (eq_attr "cpu" "znver3")
(eq_attr "mode" "QI,HI,SI,DI,TI,OI")))
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"znver1-direct,znver1-load,znver1-fp0|znver1-fp3")

View File

@ -22882,6 +22882,12 @@ AMD Family 17h Zen version 1.
@item znver2
AMD Family 17h Zen version 2.
@item amdfam19h
AMD Family 19h CPU.
@item znver3
AMD Family 19h Zen version 3.
@end table
Here is an example:

View File

@ -30052,6 +30052,13 @@ MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A,
SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID,
WBNOINVD, and 64-bit instruction set extensions.)
@item znver3
AMD Family 19h core based CPUs with x86-64 instruction set support. (This
supersets BMI, BMI2, CLWB, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED,
MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A,
SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID,
WBNOINVD, PKU, VPCLMULQDQ, VAES, and 64-bit instruction set extensions.)
@item btver1
CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This
supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit

View File

@ -0,0 +1,79 @@
// Test that dispatching can choose the right multiversion
// for AMD CPUs with the same internal GCC processor id
// { dg-do run }
// { dg-require-ifunc "" }
// { dg-options "-O2" }
#include <assert.h>
int __attribute__ ((target("default")))
foo ()
{
return 0;
}
int __attribute__ ((target("arch=amdfam10"))) foo () {
return 1;
}
int __attribute__ ((target("arch=btver1"))) foo () {
return 2;
}
int __attribute__ ((target("arch=btver2"))) foo () {
return 3;
}
int __attribute__ ((target("arch=bdver1"))) foo () {
return 4;
}
int __attribute__ ((target("arch=bdver2"))) foo () {
return 5;
}
int __attribute__ ((target("arch=bdver3"))) foo () {
return 6;
}
int __attribute__ ((target("arch=znver1"))) foo () {
return 7;
}
int __attribute__ ((target("arch=znver2"))) foo () {
return 8;
}
int __attribute__ ((target("arch=znver3"))) foo () {
return 9;
}
int main ()
{
int val = foo ();
if (__builtin_cpu_is ("amdfam10h"))
assert (val == 1);
else if (__builtin_cpu_is ("btver1"))
assert (val == 2);
else if (__builtin_cpu_is ("btver2"))
assert (val == 3);
else if (__builtin_cpu_is ("bdver1"))
assert (val == 4);
else if (__builtin_cpu_is ("bdver2"))
assert (val == 5);
else if (__builtin_cpu_is ("bdver3"))
assert (val == 6);
else if (__builtin_cpu_is ("znver1"))
assert (val == 7);
else if (__builtin_cpu_is ("znver2"))
assert (val == 8);
else if (__builtin_cpu_is ("znver3"))
assert (val == 9);
else
assert (val == 0);
return 0;
}

View File

@ -193,6 +193,9 @@ extern void test_arch_barcelona (void) __attribute__((__target__("arch=barcelon
extern void test_arch_bdver1 (void) __attribute__((__target__("arch=bdver1")));
extern void test_arch_bdver2 (void) __attribute__((__target__("arch=bdver2")));
extern void test_arch_bdver3 (void) __attribute__((__target__("arch=bdver3")));
extern void test_arch_znver1 (void) __attribute__((__target__("arch=znver1")));
extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2")));
extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3")));
extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona")));
extern void test_tune_core2 (void) __attribute__((__target__("tune=core2")));
@ -212,6 +215,9 @@ extern void test_tune_bdver1 (void) __attribute__((__target__("tune=bdver1")));
extern void test_tune_bdver2 (void) __attribute__((__target__("tune=bdver2")));
extern void test_tune_bdver3 (void) __attribute__((__target__("tune=bdver3")));
extern void test_tune_generic (void) __attribute__((__target__("tune=generic")));
extern void test_tune_znver1 (void) __attribute__((__target__("tune=znver1")));
extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2")));
extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3")));
extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse")));
extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387")));