SPO600 Project Stage 2 (Pt.5) - gcc pass to identify clone functions and comparing them

In the previous parts, I created a GCC pass on an x86-64 server to identify function clones and compare them. It will compare the number of GIMPLE statements and basic blocks of two clone. Also, I used a vector to store the GIMPLE Type and compare it afterward.

Target: Compare operands type and the number of operands for assignment GIMPLE statements.

For the assignment operation, we check the right-hand operands to determine whether the statement structures are identical.

The new version will also use a vector to store the number of operands and the types of the right-hand operands.

New version code:

/* Test Pass
        Jeff Yau, Seneca Polytechnic College
        Student ID :142466234
        Modelled on tree-nrv.cc and tree-ctyler.cc by Chris Tyler, Seneca Polytechnic College, 2024-11

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

#include <vector>
#include <string>
#include <stdlib.h>
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "internal-fn.h"
#include "gimple-pretty-print.h"
#include "cgraph.h"
#include "gimple-ssa.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "function.h"
#include "basic-block.h"


namespace {
const pass_data pass_data_jeff =
{
  GIMPLE_PASS, /* type */
  "jeff", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  TV_NONE, /* tv_id */
  PROP_cfg, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
  0, /* todo_flags_finish */
};

class pass_jeff : public gimple_opt_pass
{
public:
  pass_jeff (gcc::context *ctxt)
    : gimple_opt_pass (pass_data_jeff, ctxt)
  {}

  /* opt_pass methods: */
  bool gate (function *) final override { return 1; }

  unsigned int execute (function *) final override;

}; // class pass_jeff
unsigned int pass_jeff::execute(function *func)
{
        static std::string base_function;
        static int var1_bb_count = 0;
        static int var1_stmt_count = 0;
        static std::vector<enum gimple_code> var1_gimple_code;
        static std::vector<unsigned> var1_operands_count;
        static std::vector<enum tree_code> var1_operandsType;
        struct cgraph_node *node;
        if (!dump_file ||!func || !func ->cfg)
                return 0;

        FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node)
        {
                if (!base_function.empty())
                    break;     //resolver already found

                function *fun =node->get_fun();
                if (!fun)
                    continue;  //ignore no function with no body

                const char *fname = node->name();
                if (!fname)
                    continue;   //ignore function with no name

                std::string name(fname);

                if (base_function.empty() && name.find(".resolver") != std::string::npos)
                {
                    base_function = name.substr(0, name.find(".resolver"));
                    fprintf(dump_file, "Found base function: %s\n", base_function.c_str());
                    break;       //resolver found
                }

        }
        node = cgraph_node::get(func->decl);
        if (!node){
                fprintf(dump_file, "ERROR: cgraph_node::get(func->decl)\n");
        }
        const char *fname = node->name();
        std::string name(fname);
        if (name.find(base_function) != 0 || name.find(".resolver") !=std::string::npos)
                return 0;

        int bb_count = 0;
        int stmt_count = 0;
        basic_block bb;
        FOR_EACH_BB_FN(bb, func)
        {
                bb_count++;
                for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi))
                        stmt_count++;
        }

        if (var1_bb_count == 0 && var1_stmt_count == 0)
        {
                var1_bb_count = bb_count;
                var1_stmt_count = stmt_count;
                FOR_EACH_BB_FN(bb,func){
                        for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
                                var1_gimple_code.push_back(gimple_code(gsi_stmt(gsi)));
                                var1_operands_count.push_back(gimple_num_ops(gsi_stmt(gsi)));
                                if (is_gimple_assign(gsi_stmt(gsi)))
                                        var1_operandsType.push_back(gimple_assign_rhs_code(gsi_stmt(gsi)));
                        }
                }
        }
        else
        {
                if (bb_count != var1_bb_count || stmt_count != var1_stmt_count){
                        fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                        return 0;
                }
                std::vector<enum gimple_code>::iterator it = var1_gimple_code.begin();
                std::vector<unsigned>::iterator count_it = var1_operands_count.begin();
                std::vector<enum tree_code>::iterator optype_it = var1_operandsType.begin();
                FOR_EACH_BB_FN(bb, func){
                        for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
                                if (gimple_code(gsi_stmt(gsi)) != *it
                                        ||gimple_num_ops(gsi_stmt(gsi))!= *count_it)
                                {
                                        fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                                        return 0;
                                }
                                if( *it == GIMPLE_ASSIGN && *(optype_it++) != gimple_assign_rhs_code(gsi_stmt(gsi)))
                                {
                                        fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                                        return 0;
                                }
                                ++it;
                                ++count_it;
                        }
                }
           fprintf(dump_file, " PRUNE: %s\n",base_function.c_str());

        }

    return 0;
}


} // anon namespace

gimple_opt_pass *
make_pass_jeff (gcc::context *ctxt)
{
  return new pass_jeff (ctxt);
}

Output is correct. 

NOPRUNE for the noprune-test-case
PRUNE for the prune-test-case







Next part I will work on comparing the variables.

Target: Compare the order of every variables of GIMPLES statements between two clones

I need a helper function that extracts all variables from a GIMPLE statement in order. For each variable, it checks whether it has appeared before using a map. If it has, the function retrieves its previously assigned index. If not, it assigns a new index to the variable. The result is stored in another map (alternative is a vector). After collecting the index representations for all variables in each clone function, the two maps are compared.

The Final Version code:

/* Test Pass
        Jeff Yau, Seneca Polytechnic College
        Student ID :142466234
        Modelled on tree-nrv.cc and tree-ctyler.cc by Chris Tyler, Seneca Polytechnic College, 2024-11

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

#include <map>
#include <vector>
#include <string>
#include <stdlib.h>
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "tree-pass.h"
#include "ssa.h"
#include "tree-pretty-print.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "internal-fn.h"
#include "gimple-pretty-print.h"
#include "cgraph.h"
#include "gimple-ssa.h"
#include "attribs.h"
#include "pretty-print.h"
#include "tree-inline.h"
#include "intl.h"
#include "function.h"
#include "basic-block.h"
#include "gcc-plugin.h"

namespace {
const pass_data pass_data_jeff =
{
  GIMPLE_PASS, /* type */
  "jeff", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  TV_NONE, /* tv_id */
  PROP_cfg, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
  0, /* todo_flags_finish */
};

class pass_jeff : public gimple_opt_pass
{
public:
  pass_jeff (gcc::context *ctxt)
    : gimple_opt_pass (pass_data_jeff, ctxt)
  {}

  /* opt_pass methods: */
  bool gate (function *) final override { return 1; }

  unsigned int execute (function *) final override;

}; // class pass_jeff


int process_gimple_variables(std::map<tree, int>& refMap, int index, gimple* stmt, std::map<int, int>& resultMap) {
    std::vector<tree> vars;

    switch (gimple_code(stmt)) {
        case GIMPLE_ASSIGN: {
            tree lhs = gimple_assign_lhs(stmt);
            vars.push_back(lhs);

            int rhs_ops = gimple_num_ops(stmt) - 1;
            for (int i = 1; i <= rhs_ops; ++i) {
                tree op = gimple_op(stmt, i);
                vars.push_back(op);
            }
            break;
        }

        case GIMPLE_DEBUG_BIND: {
            tree var = gimple_debug_bind_get_var(stmt);
            tree val = gimple_debug_bind_get_value(stmt);
            if (var)
                vars.push_back(var);
            if (val)
                vars.push_back(val);
            break;
        }

        case GIMPLE_COND: {
            tree lhs = gimple_cond_lhs(stmt);
            tree rhs = gimple_cond_rhs(stmt);
            if (lhs)
                vars.push_back(lhs);
            if (rhs)
                vars.push_back(rhs);
            break;
        }
        default:
            break;
    }

    for (tree var : vars) {
        std::map<tree, int>::iterator it = refMap.find(var);
        if (it != refMap.end()) {
            resultMap[index] = it->second;
        } else {
            refMap[var] = index;
            resultMap[index] = index;
        }
        ++index;
    }

    return index;
}

unsigned int pass_jeff::execute(function *func)
{
        static std::string base_function;
        static int var1_bb_count = 0;
        static int var1_stmt_count = 0;
        static std::vector<enum gimple_code> var1_gimple_code;
        static std::vector<unsigned> var1_operands_count;
        static std::vector<enum tree_code> var1_operandsType;
        static std::map<int,int> var1_variableMap;
        struct cgraph_node *node;
        if (!dump_file ||!func || !func ->cfg)
                return 0;

        FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node)
        {
                if (!base_function.empty())
                    break;     //resolver already found

                function *fun =node->get_fun();
                if (!fun)
                    continue;  //ignore no function with no body

                const char *fname = node->name();
                if (!fname)
                    continue;   //ignore function with no name

                std::string name(fname);

                if (base_function.empty() && name.find(".resolver") != std::string::npos)
                {
                    base_function = name.substr(0, name.find(".resolver"));
                    fprintf(dump_file, "Found base function: %s\n", base_function.c_str());
                    break;       //resolver found
                }

        }
        node = cgraph_node::get(func->decl);
        if (!node){
                fprintf(dump_file, "ERROR: cgraph_node::get(func->decl)\n");
        }
        const char *fname = node->name();
        std::string name(fname);
        if (name.find(base_function) != 0 || name.find(".resolver") !=std::string::npos)
                return 0;

        int bb_count = 0;
        int stmt_count = 0;
        basic_block bb;
        int index = 0;
        std::map<tree,int> refMap;
        FOR_EACH_BB_FN(bb, func)
        {
                bb_count++;
                for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
                        stmt_count++;
                }
        }

        if (var1_bb_count == 0 && var1_stmt_count == 0)
        {
                var1_bb_count = bb_count;
                var1_stmt_count = stmt_count;
                FOR_EACH_BB_FN(bb,func){
                        for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
                                index = process_gimple_variables(refMap, index, gsi_stmt(gsi), var1_variableMap);
                                var1_gimple_code.push_back(gimple_code(gsi_stmt(gsi)));
                                var1_operands_count.push_back(gimple_num_ops(gsi_stmt(gsi)));
                                if (is_gimple_assign(gsi_stmt(gsi)))
                                        var1_operandsType.push_back(gimple_assign_rhs_code(gsi_stmt(gsi)));
                        }
                }
        }
        else
        {
                if (bb_count != var1_bb_count || stmt_count != var1_stmt_count){
                        fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                        return 0;
                }
                std::vector<enum gimple_code>::iterator it = var1_gimple_code.begin();
                std::vector<unsigned>::iterator count_it = var1_operands_count.begin();
                std::vector<enum tree_code>::iterator optype_it = var1_operandsType.begin();
                std::map<int,int> var2_variableMap;
                index = 0;
                FOR_EACH_BB_FN(bb, func){
                        for (gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
                                index = process_gimple_variables(refMap, index, gsi_stmt(gsi), var2_variableMap);
                                if (gimple_code(gsi_stmt(gsi)) != *it
                                        ||gimple_num_ops(gsi_stmt(gsi))!= *count_it)
                                {
                                        fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                                        return 0;
                                }
                                if( *it == GIMPLE_ASSIGN && *(optype_it++) != gimple_assign_rhs_code(gsi_stmt(gsi)))
                                {
                                        fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                                        return 0;
                                }
                                ++it;
                                ++count_it;
                        }
                }
                std::map<int, int>::const_iterator it1 = var1_variableMap.begin();
                std::map<int, int>::const_iterator it2 = var2_variableMap.begin();

                while (it1 != var1_variableMap.end()) {
                        if (it1->first != it2->first || it1->second != it2->second)
                        {
                                fprintf(dump_file, " NOPRUNE: %s\n",base_function.c_str());
                                return 0;
                        }
                        ++it1;
                        ++it2;
                }
           fprintf(dump_file, " PRUNE: %s\n",base_function.c_str());

        }

    return 0;
}


} // anon namespace

gimple_opt_pass *
make_pass_jeff (gcc::context *ctxt)
{
  return new pass_jeff (ctxt);
}

Output is correct. 

NOPRUNE for the noprune-test-case
PRUNE for the prune-test-case



Comments

Popular posts from this blog

SPO600 Project Stage 1 (Pt.1) - Create a GCC Pass

SPO600 Project Stage 2 (Pt.1) - GCC pass locating clone function

SPO600 Project Stage 2 (Pt.2) - GCC pass locating clone function -modified version