analyzer: bulk merger/processing of runs of nodes at CFG join points
Prior to this patch the analyzer worklist considered only one node or two nodes at a time, processing and/or merging state individually or pairwise. This could lead to explosions of merger nodes at CFG join points, especially after switch statements, which could have large numbers of in-edges, and thus large numbers of merger exploded_nodes could be created, exceeding the per-point limit and thus stopping analysis with -Wanalyzer-too-complex. This patch special-cases the handling for runs of consecutive nodes in the worklist at a CFG join point, processing and merging them all together. The patch fixes a state explosion seen in bzip2.c seen when attempting to reproduce PR analyzer/95188, in a switch statement in a loop for argument parsing. With this patch, the analyzer successfully consolidates the state after the argument parsing to a single exploded node. In gcc.dg/analyzer/pr96653.c there is a switch statement with over 300 cases which leads to hitting the per-point limit. With this patch the consolidation code doesn't manage to merge all of them due to other worklist-ordering bugs, and it still hits the per-point limits, but it does manage some very long consolidations: merged 2 in-enodes into 2 out-enode(s) at SN: 403 merged 2 in-enodes into 2 out-enode(s) at SN: 403 merged 2 in-enodes into 1 out-enode(s) at SN: 11 merged 29 in-enodes into 1 out-enode(s) at SN: 35 merged 6 in-enodes into 1 out-enode(s) at SN: 41 merged 31 in-enodes into 1 out-enode(s) at SN: 35 and with a followup patch to fix an SCC issue it manages: merged 358 in-enodes into 2 out-enode(s) at SN: 402 The patch appears to fix the failure on non-x86_64 of: gcc.dg/analyzer/pr93032-mztools.c (test for excess errors) which is PR analyzer/96616. Unfortunately, the patch introduces a memory leak false positive in gcc.dg/analyzer/pr94851-1.c, but this appears to be a pre-existing bug that was hidden by state-merging failures. gcc/analyzer/ChangeLog: * engine.cc (exploded_node::dump_dot): Show STATUS_BULK_MERGED. (exploded_graph::process_worklist): Call maybe_process_run_of_before_supernode_enodes. (exploded_graph::maybe_process_run_of_before_supernode_enodes): New. (exploded_graph_annotator::print_enode): Show STATUS_BULK_MERGED. * exploded-graph.h (enum exploded_node::status): Add STATUS_BULK_MERGED. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/bzip2-arg-parse-1.c: New test. * gcc.dg/analyzer/loop-n-down-to-1-by-1.c: Remove xfail. * gcc.dg/analyzer/pr94851-1.c: Add xfail.
This commit is contained in:
parent
b9b5fc0c21
commit
b28491dc2d
@ -848,6 +848,8 @@ exploded_node::dump_dot (graphviz_out *gv, const dump_args_t &args) const
|
||||
pp_printf (pp, "EN: %i", m_index);
|
||||
if (m_status == STATUS_MERGER)
|
||||
pp_string (pp, " (merger)");
|
||||
else if (m_status == STATUS_BULK_MERGED)
|
||||
pp_string (pp, " (bulk merged)");
|
||||
pp_newline (pp);
|
||||
|
||||
if (args.show_enode_details_p (*this))
|
||||
@ -2211,6 +2213,12 @@ exploded_graph::process_worklist ()
|
||||
if (logger)
|
||||
logger->log ("next to process: EN: %i", node->m_index);
|
||||
|
||||
/* If we have a run of nodes that are before-supernode, try merging and
|
||||
processing them together, rather than pairwise or individually. */
|
||||
if (flag_analyzer_state_merge && node != m_origin)
|
||||
if (maybe_process_run_of_before_supernode_enodes (node))
|
||||
goto handle_limit;
|
||||
|
||||
/* Avoid exponential explosions of nodes by attempting to merge
|
||||
nodes that are at the same program point and which have
|
||||
sufficiently similar state. */
|
||||
@ -2340,6 +2348,7 @@ exploded_graph::process_worklist ()
|
||||
|
||||
process_node (node);
|
||||
|
||||
handle_limit:
|
||||
/* Impose a hard limit on the number of exploded nodes, to ensure
|
||||
that the analysis terminates in the face of pathological state
|
||||
explosion (or bugs).
|
||||
@ -2367,6 +2376,201 @@ exploded_graph::process_worklist ()
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to process a consecutive run of sufficiently-similar nodes in
|
||||
the worklist at a CFG join-point (having already popped ENODE from the
|
||||
head of the worklist).
|
||||
|
||||
If ENODE's point is of the form (before-supernode, SNODE) and the next
|
||||
nodes in the worklist are a consecutive run of enodes of the same form,
|
||||
for the same supernode as ENODE (but potentially from different in-edges),
|
||||
process them all together, setting their status to STATUS_BULK_MERGED,
|
||||
and return true.
|
||||
Otherwise, return false, in which case ENODE must be processed in the
|
||||
normal way.
|
||||
|
||||
When processing them all together, generate successor states based
|
||||
on phi nodes for the appropriate CFG edges, and then attempt to merge
|
||||
these states into a minimal set of merged successor states, partitioning
|
||||
the inputs by merged successor state.
|
||||
|
||||
Create new exploded nodes for all of the merged states, and add edges
|
||||
connecting the input enodes to the corresponding merger exploded nodes.
|
||||
|
||||
We hope we have a much smaller number of merged successor states
|
||||
compared to the number of input enodes - ideally just one,
|
||||
if all successor states can be merged.
|
||||
|
||||
Processing and merging many together as one operation rather than as
|
||||
pairs avoids scaling issues where per-pair mergers could bloat the
|
||||
graph with merger nodes (especially so after switch statements). */
|
||||
|
||||
bool
|
||||
exploded_graph::
|
||||
maybe_process_run_of_before_supernode_enodes (exploded_node *enode)
|
||||
{
|
||||
/* A struct for tracking per-input state. */
|
||||
struct item
|
||||
{
|
||||
item (exploded_node *input_enode)
|
||||
: m_input_enode (input_enode),
|
||||
m_processed_state (input_enode->get_state ()),
|
||||
m_merger_idx (-1)
|
||||
{}
|
||||
|
||||
exploded_node *m_input_enode;
|
||||
program_state m_processed_state;
|
||||
int m_merger_idx;
|
||||
};
|
||||
|
||||
gcc_assert (enode->get_status () == exploded_node::STATUS_WORKLIST);
|
||||
gcc_assert (enode->m_succs.length () == 0);
|
||||
|
||||
const program_point &point = enode->get_point ();
|
||||
|
||||
if (point.get_kind () != PK_BEFORE_SUPERNODE)
|
||||
return false;
|
||||
|
||||
const supernode *snode = point.get_supernode ();
|
||||
|
||||
logger * const logger = get_logger ();
|
||||
LOG_SCOPE (logger);
|
||||
|
||||
/* Find a run of enodes in the worklist that are before the same supernode,
|
||||
but potentially from different in-edges. */
|
||||
auto_vec <exploded_node *> enodes;
|
||||
enodes.safe_push (enode);
|
||||
while (exploded_node *enode_2 = m_worklist.peek_next ())
|
||||
{
|
||||
gcc_assert (enode_2->get_status ()
|
||||
== exploded_node::STATUS_WORKLIST);
|
||||
gcc_assert (enode_2->m_succs.length () == 0);
|
||||
|
||||
const program_point &point_2 = enode_2->get_point ();
|
||||
|
||||
if (point_2.get_kind () == PK_BEFORE_SUPERNODE
|
||||
&& point_2.get_supernode () == snode
|
||||
&& point_2.get_call_string () == point.get_call_string ())
|
||||
{
|
||||
enodes.safe_push (enode_2);
|
||||
m_worklist.take_next ();
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the only node is ENODE, then give up. */
|
||||
if (enodes.length () == 1)
|
||||
return false;
|
||||
|
||||
if (logger)
|
||||
logger->log ("got run of %i enodes for SN: %i",
|
||||
enodes.length (), snode->m_index);
|
||||
|
||||
/* All of these enodes have a shared successor point (even if they
|
||||
were for different in-edges). */
|
||||
program_point next_point (point.get_next ());
|
||||
|
||||
/* Calculate the successor state for each enode in enodes. */
|
||||
auto_delete_vec<item> items (enodes.length ());
|
||||
unsigned i;
|
||||
exploded_node *iter_enode;
|
||||
FOR_EACH_VEC_ELT (enodes, i, iter_enode)
|
||||
{
|
||||
item *it = new item (iter_enode);
|
||||
items.quick_push (it);
|
||||
const program_state &state = iter_enode->get_state ();
|
||||
program_state *next_state = &it->m_processed_state;
|
||||
const program_point &iter_point = iter_enode->get_point ();
|
||||
if (const superedge *iter_sedge = iter_point.get_from_edge ())
|
||||
{
|
||||
impl_region_model_context ctxt (*this, iter_enode,
|
||||
&state, next_state, NULL);
|
||||
const cfg_superedge *last_cfg_superedge
|
||||
= iter_sedge->dyn_cast_cfg_superedge ();
|
||||
if (last_cfg_superedge)
|
||||
next_state->m_region_model->update_for_phis
|
||||
(snode, last_cfg_superedge, &ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to partition the items into a set of merged states.
|
||||
We hope we have a much smaller number of merged states
|
||||
compared to the number of input enodes - ideally just one,
|
||||
if all can be merged. */
|
||||
auto_delete_vec <program_state> merged_states;
|
||||
auto_vec<item *> first_item_for_each_merged_state;
|
||||
item *it;
|
||||
FOR_EACH_VEC_ELT (items, i, it)
|
||||
{
|
||||
const program_state &it_state = it->m_processed_state;
|
||||
program_state *merged_state;
|
||||
unsigned iter_merger_idx;
|
||||
FOR_EACH_VEC_ELT (merged_states, iter_merger_idx, merged_state)
|
||||
{
|
||||
program_state merge (m_ext_state);
|
||||
if (it_state.can_merge_with_p (*merged_state, next_point, &merge))
|
||||
{
|
||||
*merged_state = merge;
|
||||
it->m_merger_idx = iter_merger_idx;
|
||||
if (logger)
|
||||
logger->log ("reusing merger state %i for item %i (EN: %i)",
|
||||
it->m_merger_idx, i, it->m_input_enode->m_index);
|
||||
goto got_merger;
|
||||
}
|
||||
}
|
||||
/* If it couldn't be merged with any existing merged_states,
|
||||
create a new one. */
|
||||
if (it->m_merger_idx == -1)
|
||||
{
|
||||
it->m_merger_idx = merged_states.length ();
|
||||
merged_states.safe_push (new program_state (it_state));
|
||||
first_item_for_each_merged_state.safe_push (it);
|
||||
if (logger)
|
||||
logger->log ("using new merger state %i for item %i (EN: %i)",
|
||||
it->m_merger_idx, i, it->m_input_enode->m_index);
|
||||
}
|
||||
got_merger:
|
||||
gcc_assert (it->m_merger_idx >= 0);
|
||||
gcc_assert (it->m_merger_idx < merged_states.length ());
|
||||
}
|
||||
|
||||
/* Create merger nodes. */
|
||||
auto_vec<exploded_node *> next_enodes (merged_states.length ());
|
||||
program_state *merged_state;
|
||||
FOR_EACH_VEC_ELT (merged_states, i, merged_state)
|
||||
{
|
||||
exploded_node *src_enode
|
||||
= first_item_for_each_merged_state[i]->m_input_enode;
|
||||
exploded_node *next
|
||||
= get_or_create_node (next_point, *merged_state, src_enode);
|
||||
/* "next" could be NULL; we handle that when adding the edges below. */
|
||||
next_enodes.quick_push (next);
|
||||
if (logger)
|
||||
{
|
||||
if (next)
|
||||
logger->log ("using EN: %i for merger state %i", next->m_index, i);
|
||||
else
|
||||
logger->log ("using NULL enode for merger state %i", i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create edges from each input enode to the appropriate successor enode.
|
||||
Update the status of the now-processed input enodes. */
|
||||
FOR_EACH_VEC_ELT (items, i, it)
|
||||
{
|
||||
exploded_node *next = next_enodes[it->m_merger_idx];
|
||||
if (next)
|
||||
add_edge (it->m_input_enode, next, NULL);
|
||||
it->m_input_enode->set_status (exploded_node::STATUS_BULK_MERGED);
|
||||
}
|
||||
|
||||
if (logger)
|
||||
logger->log ("merged %i in-enodes into %i out-enode(s) at SN: %i",
|
||||
items.length (), merged_states.length (), snode->m_index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return true if STMT must appear at the start of its exploded node, and
|
||||
thus we can't consolidate its effects within a run of other statements,
|
||||
where PREV_STMT was the previous statement. */
|
||||
@ -3944,6 +4148,9 @@ private:
|
||||
case exploded_node::STATUS_MERGER:
|
||||
pp_string (pp, "(M)");
|
||||
break;
|
||||
case exploded_node::STATUS_BULK_MERGED:
|
||||
pp_string (pp, "(BM)");
|
||||
break;
|
||||
}
|
||||
gv->end_tdtr ();
|
||||
/* Dump any saved_diagnostics at this enode. */
|
||||
|
@ -161,6 +161,9 @@ class exploded_node : public dnode<eg_traits>
|
||||
/* Node was left unprocessed due to merger; it won't have had
|
||||
exploded_graph::process_node called on it. */
|
||||
STATUS_MERGER,
|
||||
|
||||
/* Node was processed by maybe_process_run_of_before_supernode_enodes. */
|
||||
STATUS_BULK_MERGED
|
||||
};
|
||||
|
||||
exploded_node (const point_and_state &ps, int index);
|
||||
@ -730,6 +733,7 @@ public:
|
||||
|
||||
void build_initial_worklist ();
|
||||
void process_worklist ();
|
||||
bool maybe_process_run_of_before_supernode_enodes (exploded_node *node);
|
||||
void process_node (exploded_node *node);
|
||||
|
||||
exploded_node *get_or_create_node (const program_point &point,
|
||||
|
95
gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c
Normal file
95
gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c
Normal file
@ -0,0 +1,95 @@
|
||||
/* Integration test to verify that we don't explode in this
|
||||
argument-parsing logic.
|
||||
Adapted from part of bzip2-1.0.8: bzip2.c: main. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "analyzer-decls.h"
|
||||
|
||||
/* This test file has been heavily modified from the bzip2.c original,
|
||||
which has the following license boilerplate. */
|
||||
/* ------------------------------------------------------------------
|
||||
This file is part of bzip2/libbzip2, a program and library for
|
||||
lossless, block-sorting data compression.
|
||||
|
||||
bzip2/libbzip2 version 1.0.8 of 13 July 2019
|
||||
Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
|
||||
|
||||
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
||||
README file.
|
||||
|
||||
This program is released under the terms of the license contained
|
||||
in the file LICENSE.
|
||||
------------------------------------------------------------------ */
|
||||
|
||||
typedef char Char;
|
||||
typedef unsigned char Bool;
|
||||
typedef int Int32;
|
||||
|
||||
#define True ((Bool)1)
|
||||
#define False ((Bool)0)
|
||||
|
||||
typedef
|
||||
struct zzzz {
|
||||
Char *name;
|
||||
struct zzzz *link;
|
||||
}
|
||||
Cell;
|
||||
|
||||
Int32 verbosity;
|
||||
Bool keepInputFiles, smallMode;
|
||||
Bool forceOverwrite, noisy;
|
||||
Int32 blockSize100k;
|
||||
Int32 opMode;
|
||||
Int32 srcMode;
|
||||
Char *progName;
|
||||
|
||||
extern void license ( void );
|
||||
extern void usage ( Char *fullProgName );
|
||||
|
||||
void test (Cell *argList)
|
||||
{
|
||||
Cell *aa;
|
||||
Int32 i, j;
|
||||
|
||||
for (aa = argList; aa != NULL; aa = aa->link) {
|
||||
if (aa->name[0] == '-' && aa->name[1] != '-') {
|
||||
for (j = 1; aa->name[j] != '\0'; j++) {
|
||||
switch (aa->name[j]) {
|
||||
case 'c': srcMode = 2; break;
|
||||
case 'd': opMode = 2; break;
|
||||
case 'z': opMode = 1; break;
|
||||
case 'f': forceOverwrite = True; break;
|
||||
case 't': opMode = 3; break;
|
||||
case 'k': keepInputFiles = True; break;
|
||||
case 's': smallMode = True; break;
|
||||
case 'q': noisy = False; break;
|
||||
case '1': blockSize100k = 1; break;
|
||||
case '2': blockSize100k = 2; break;
|
||||
case '3': blockSize100k = 3; break;
|
||||
case '4': blockSize100k = 4; break;
|
||||
case '5': blockSize100k = 5; break;
|
||||
case '6': blockSize100k = 6; break;
|
||||
case '7': blockSize100k = 7; break;
|
||||
case '8': blockSize100k = 8; break;
|
||||
case '9': blockSize100k = 9; break;
|
||||
case 'V':
|
||||
case 'L': license(); break;
|
||||
case 'v': verbosity++; break;
|
||||
case 'h': usage ( progName );
|
||||
exit ( 0 );
|
||||
break;
|
||||
default: fprintf ( stderr, "%s: Bad flag `%s'\n",
|
||||
progName, aa->name );
|
||||
usage ( progName );
|
||||
exit ( 1 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* The analyzer ought to be able to successfully merge all of the
|
||||
above changes that can reach here into a single state. */
|
||||
__analyzer_dump_exploded_nodes (0); /* { dg-warning "1 processed enode" } */
|
||||
}
|
@ -24,8 +24,8 @@ void test(int n)
|
||||
__analyzer_dump_exploded_nodes (0); /* { dg-warning "2 processed enodes" } */
|
||||
}
|
||||
|
||||
__analyzer_eval (i <= 0); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
|
||||
/* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
|
||||
__analyzer_eval (i <= 0); /* { dg-warning "TRUE" "true" } */
|
||||
|
||||
|
||||
__analyzer_eval (i == 0); /* { dg-warning "TRUE" "desired" { xfail *-*-* } } */
|
||||
/* { dg-warning "UNKNOWN" "status quo" { target *-*-* } .-1 } */
|
||||
|
@ -40,7 +40,8 @@ int pamark(void) {
|
||||
last->m_next = p;
|
||||
}
|
||||
|
||||
p->m_name = (char)c;
|
||||
p->m_name = (char)c; /* { dg-bogus "leak of 'p'" "bogus leak" { xfail *-*-* } } */
|
||||
// TODO(xfail): related to PR analyzer/97072 and PR analyzer/97074
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user