diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 0e00ff99df3..ffe810f2bcb 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1822,17 +1822,24 @@ struct processor_costs znver3_cost = { /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ static stringop_algs skylake_memcpy[2] = { - {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, - {libcall, {{16, loop, false}, {512, unrolled_loop, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}, + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}}; static stringop_algs skylake_memset[2] = { - {libcall, {{6, loop_1_byte, true}, - {24, loop, true}, - {8192, rep_prefix_4_byte, true}, - {-1, libcall, false}}}, - {libcall, {{24, loop, true}, {512, unrolled_loop, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}, + {libcall, + {{256, rep_prefix_1_byte, true}, + {256, loop, false}, + {-1, libcall, false}}}}; static const struct processor_costs skylake_cost = { @@ -1889,7 +1896,7 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (0), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ + 17, /* CLEAR_RATIO */ {4, 4, 4}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 134916cc972..eb057a67750 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -273,8 +273,7 @@ DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA) move/set sequences of bytes with known size. */ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, "prefer_known_rep_movsb_stosb", - m_CANNONLAKE | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_TIGERLAKE - | m_ALDERLAKE | m_SAPPHIRERAPIDS) + m_SKYLAKE | m_ALDERLAKE | m_CORE_AVX512) /* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of compact prologues and epilogues by issuing a misaligned moves. This diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-10.c b/gcc/testsuite/gcc.target/i386/memcpy-strategy-10.c new file mode 100644 index 00000000000..970aa741971 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-10.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mno-sse" } */ +/* { dg-final { scan-assembler "jmp\tmemcpy" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "call\tmemcpy" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep movsb" } } */ + +void +foo (char *dest, char *src) +{ + __builtin_memcpy (dest, src, 257); +} diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-11.c b/gcc/testsuite/gcc.target/i386/memcpy-strategy-11.c new file mode 100644 index 00000000000..b6041944630 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-11.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake" } */ +/* { dg-final { scan-assembler-not "jmp\tmemcpy" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "call\tmemcpy" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep movsb" } } */ + +typedef unsigned char e_u8; + +#define MAXBC 8 + +void MixColumn(e_u8 a[4][MAXBC], e_u8 BC) +{ + e_u8 b[4][MAXBC]; + int i, j; + + for(i = 0; i < 4; i++) + for(j = 0; j < BC; j++) a[i][j] = b[i][j]; +} diff --git a/gcc/testsuite/gcc.target/i386/memcpy-strategy-9.c b/gcc/testsuite/gcc.target/i386/memcpy-strategy-9.c new file mode 100644 index 00000000000..b0dc7484d09 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memcpy-strategy-9.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mno-sse" } */ +/* { dg-final { scan-assembler "rep movsb" } } */ + +void +foo (char *dest, char *src) +{ + __builtin_memcpy (dest, src, 256); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-7.c b/gcc/testsuite/gcc.target/i386/memset-strategy-7.c new file mode 100644 index 00000000000..07c2816910c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-7.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mno-sse" } */ +/* { dg-final { scan-assembler "jmp\tmemset" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "call\tmemset" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep stosb" } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 257); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-8.c b/gcc/testsuite/gcc.target/i386/memset-strategy-8.c new file mode 100644 index 00000000000..52ea882c814 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mno-sse" } */ +/* { dg-final { scan-assembler "rep stosb" } } */ + +void +foo (char *dest) +{ + __builtin_memset (dest, 0, 256); +} diff --git a/gcc/testsuite/gcc.target/i386/memset-strategy-9.c b/gcc/testsuite/gcc.target/i386/memset-strategy-9.c new file mode 100644 index 00000000000..d4db031958f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/memset-strategy-9.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake" } */ +/* { dg-final { scan-assembler-not "jmp\tmemset" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "call\tmemset" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "rep stosb" } } */ + +typedef unsigned char e_u8; + +#define MAXBC 8 + +void MixColumn(e_u8 a[4][MAXBC], e_u8 BC) +{ + int i, j; + + for(i = 0; i < 4; i++) + for(j = 0; j < BC; j++) a[i][j] = 1; +}