diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index e655e668c7a..db03738313e 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1767,11 +1767,11 @@ struct processor_costs znver3_cost = { 2, 2, 3, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ - /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, - throughput 12. Approx 9 uops do not depend on vector size and every load - is 7 uops. */ - 18, 8, /* Gather load static, per_elt. */ - 18, 10, /* Gather store static, per_elt. */ + /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops, + throughput 9. Approx 7 uops do not depend on vector size and every load + is 4 uops. */ + 14, 8, /* Gather load static, per_elt. */ + 14, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ 512, /* size of l2 cache. */ 64, /* size of prefetch block. */ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 140ccb3d921..caebf76736e 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -436,7 +436,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", /* X86_TUNE_USE_GATHER: Use gather instructions. */ DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", - ~(m_ZNVER | m_GENERIC)) + ~(m_ZNVER1 | m_ZNVER2 | m_GENERIC)) /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or smaller FMA chain. */