SPO600 Project Stage 2 (Pt.4) - rewrite GCC pass to identify clone functions and comparing them
In the previous phase, I created a GCC pass on an x86-64 server to identify function clones and compare them. However, it did not work perfectly, as I was unable to retrieve the function pointer using the node pointer stored from the previous execution. As a result, I decided to rewrite the pass using a different approach.
Target: Identify clones and compare the number of basic blocks and GIMPLE statements.
The new version will first use for_each_function to locate the resolver’s base name. Then, in the main body of the execute function, it will check whether the current function has the same name as the resolver. If they match, it will compare the number of GIMPLE statements and basic blocks.
New version code:
/* Test Pass
Jeff Yau, Seneca Polytechnic College
Student ID :142466234
Modelled on tree-nrv.cc and tree-ctyler.cc by Chris Tyler, Seneca Polytechnic College, 2024-11
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include <string>
#include <stdlib.h>
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "internal-fn.h"
#include "gimple-pretty-print.h"
#include "cgraph.h"
#include "gimple-ssa.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "function.h"
#include "basic-block.h"
namespace {
const pass_data pass_data_jeff =
{
GIMPLE_PASS, /* type */
"jeff", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_jeff : public gimple_opt_pass
{
public:
pass_jeff (gcc::context *ctxt)
: gimple_opt_pass (pass_data_jeff, ctxt)
{}
/* opt_pass methods: */
bool gate (function *) final override { return 1; }
unsigned int execute (function *) final override;
}; // class pass_jeff
unsigned int pass_jeff::execute(function *func)
{
static std::string base_function;
static int var1_bb_count = 0;
static int var1_stmt_count = 0;
struct cgraph_node *node;
if (!dump_file ||!func || !func ->cfg)
return 0;
FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node)
{
if (!base_function.empty())
break; //resolver already found
function *fun =node->get_fun();
if (!fun)
continue; //ignore no function with no body
const char *fname = node->name();
if (!fname)
continue; //ignore function with no name
std::string name(fname);
if (base_function.empty() && name.find(".resolver") != std::string::npos)
{
base_function = name.substr(0, name.find(".resolver"));
fprintf(dump_file, "Found base function: %s\n", base_function.c_str());
break; //resolver found
}
}
node = cgraph_node::get(func->decl);
if (!node){
fprintf(dump_file, "ERROR: cgraph_node::get(func->decl)\n");
}
const char *fname = node->name();
std::string name(fname);
if (name.find(base_function) != 0 || name.find(".resolver") !=std::string::npos)
return 0;
int bb_count = 0;
int stmt_count = 0;
basic_block bb;
FOR_EACH_BB_FN(bb, func)
{
bb_count++;
for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi))
stmt_count++;
}
fprintf(dump_file, "------------counter: %d,%d----------\n", bb_count, stmt_count);
if (var1_bb_count == 0 && var1_stmt_count == 0)
{
var1_bb_count = bb_count;
var1_stmt_count = stmt_count;
fprintf(dump_file, "variant1 stats: BBs=%d, Stmts=%d\n", bb_count, stmt_count);
}
else
{
fprintf(dump_file, "Comparing: %s\n", base_function.c_str());
fprintf(dump_file, " Current BBs=%d, Stmts=%d | Base BBs=%d, Stmts=%d\n",
bb_count, stmt_count, var1_bb_count, var1_stmt_count);
if (bb_count == var1_bb_count && stmt_count == var1_stmt_count)
fprintf(dump_file, " PRUNE\n");
else
fprintf(dump_file, " NOPRUNE\n");
}
return 0;
}
} // anon namespace
gimple_opt_pass *
make_pass_jeff (gcc::context *ctxt)
{
return new pass_jeff (ctxt);
}
This time, the pass can correctly compare the clones based on the number of basic blocks and GIMPLE statements. The output is currently correct.
No-prune example output:
Prune example output:Target: Compare each GIMPLE statement between two clones
/* Test Pass
Jeff Yau, Seneca Polytechnic College
Student ID :142466234
Modelled on tree-nrv.cc and tree-ctyler.cc by Chris Tyler, Seneca Polytechnic College, 2024-11
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include <vector>
#include <string>
#include <stdlib.h>
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "internal-fn.h"
#include "gimple-pretty-print.h"
#include "cgraph.h"
#include "gimple-ssa.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "function.h"
#include "basic-block.h"
namespace {
const pass_data pass_data_jeff =
{
GIMPLE_PASS, /* type */
"jeff", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_jeff : public gimple_opt_pass
{
public:
pass_jeff (gcc::context *ctxt)
: gimple_opt_pass (pass_data_jeff, ctxt)
{}
/* opt_pass methods: */
bool gate (function *) final override { return 1; }
unsigned int execute (function *) final override;
}; // class pass_jeff
unsigned int pass_jeff::execute(function *func)
{
static std::string base_function;
static int var1_bb_count = 0;
static int var1_stmt_count = 0;
static std::vector<enum gimple_code> var1_gimple_code;
struct cgraph_node *node;
if (!dump_file ||!func || !func ->cfg)
return 0;
FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node)
{
if (!base_function.empty())
break; //resolver already found
function *fun =node->get_fun();
if (!fun)
continue; //ignore no function with no body
const char *fname = node->name();
if (!fname)
continue; //ignore function with no name
std::string name(fname);
if (base_function.empty() && name.find(".resolver") != std::string::npos)
{
base_function = name.substr(0, name.find(".resolver"));
fprintf(dump_file, "Found base function: %s\n", base_function.c_str());
break; //resolver found
}
}
node = cgraph_node::get(func->decl);
if (!node){
fprintf(dump_file, "ERROR: cgraph_node::get(func->decl)\n");
}
const char *fname = node->name();
std::string name(fname);
if (name.find(base_function) != 0 || name.find(".resolver") !=std::string::npos)
return 0;
int bb_count = 0;
int stmt_count = 0;
basic_block bb;
FOR_EACH_BB_FN(bb, func)
{
bb_count++;
for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi))
stmt_count++;
}
if (var1_bb_count == 0 && var1_stmt_count == 0)
{
var1_bb_count = bb_count;
var1_stmt_count = stmt_count;
FOR_EACH_BB_FN(bb,func){
for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi))
var1_gimple_code.push_back(gimple_code(gsi_stmt(gsi)));
}
}
else
{
if (bb_count != var1_bb_count || stmt_count != var1_stmt_count){
fprintf(dump_file, " NOPRUNE\n");
return 0;
}
std::vector<enum gimple_code>::iterator it = var1_gimple_code.begin();
FOR_EACH_BB_FN(bb, func){
for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
if (gimple_code(gsi_stmt(gsi)) != *it){
fprintf(dump_file, " NOPRUNE\n");
return 0;
}
++it;
}
}
fprintf(dump_file, " PRUNE\n");
}
return 0;
}
} // anon namespace
gimple_opt_pass *
make_pass_jeff (gcc::context *ctxt)
{
return new pass_jeff (ctxt);
}
Comments
Post a Comment