Skip to content

Testability prediction

The replication package for the paper 'An ensemble meta-estimator to predict source code testability' published in Applied Soft Computing Journal.

Goal

This script implements machine learning models for predicting testability presented in Applied Soft Computing Journal

Machine learning models

  • Model 1: DecisionTreeRegressor
  • Model 2: RandomForestRegressor
  • Model 3: GradientBoostingRegressor
  • Model 4: HistGradientBoostingRegressor
  • Model 5: SGDRegressor
  • Model 6: MLPRegressor

Learning datasets

Dataset: Applied preprocessing: Number of metrics

  • DS1: (default) Simple classes elimination, data classes elimination, outliers elimination, and metric standardization: 262 features

  • DS2: DS1 + Feature selection: 20 features

  • DS3: DS1 + Context vector elimination: 194 features

  • DS4: DS1 + Context vector elimination and lexical metrics elimination 177

  • DS5: DS1 + Systematically generated metrics elimination 71

  • DS6: Top 15 important source code metrics affecting testability

Model dependent variable

Testability of class X: T(X) = E[C]/ (1 + omega) ^ (|n| - 1) where E[C] = 1/3StatementCoverage + 1/3BranchCoverage + 1/3*MutationCoverage

Results

The results will be saved in ../../results directory

Inferences

Use the method inference_model2 of the class Regression to predict testability of new Java classes.

Regression

Source code in adafest\code\testability\ml_models_testability.py
class Regression:
    def __init__(self, df_path: str = None, feature_selection_mode=False):
        self.df = pd.read_csv(df_path, delimiter=',', index_col=False)
        self.X_train1, self.X_test1, self.y_train, self.y_test = train_test_split(
            self.df.iloc[:, 1:-1],
            self.df.iloc[:, -1],
            test_size=0.25,
            random_state=117,
        )

        # -- Feature selection (For DS2)
        if feature_selection_mode:
            selector = feature_selection.SelectKBest(feature_selection.f_regression, k=20)
            # clf = linear_model.LassoCV(eps=1e-3, n_alphas=100, normalize=True, max_iter=5000, tol=1e-4)
            # clf.fit(self.X_train1, self.y_train)
            # importance = np.abs(clf.coef_)
            # print('importance', importance)
            # clf = RandomForestRegressor()
            # selector = feature_selection.SelectFromModel(clf, prefit=False, norm_order=2, max_features=20,)
            selector.fit(self.X_train1, self.y_train)

            # Get columns to keep and create new dataframe with only selected features
            cols = selector.get_support(indices=True)
            self.X_train1 = self.X_train1.iloc[:, cols]
            self.X_test1 = self.X_test1.iloc[:, cols]
            print('Selected columns by feature selection:', self.X_train1.columns)
            # quit()
        # --- End of feature selection

        # Standardization
        # self.scaler = preprocessing.RobustScaler(with_centering=True, with_scaling=True, unit_variance=True)
        # self.scaler = preprocessing.StandardScaler()
        self.scaler = QuantileTransformer(n_quantiles=1000, random_state=11)
        self.scaler.fit(self.X_train1)
        self.X_train = self.scaler.transform(self.X_train1)
        self.X_test = self.scaler.transform(self.X_test1)
        dump(self.scaler, df_path[:-4] + '_scaler.joblib')
        # quit()

    def regress(self, model_path: str = None, model_number: int = None):
        """

        :param model_path:
        :param model_number: 1: DTR, 2: RFR, 3: GBR, 4: HGBR, 5: SGDR, 6: MLPR,
        :return:
        """
        regressor = None
        parameters = None
        if model_number == 1:
            regressor = DecisionTreeRegressor(random_state=23, )
            # Set the parameters to be used for tuning by cross-validation
            parameters = {
                # 'criterion': ['mse', 'friedman_mse', 'mae'],
                'max_depth': range(3, 50, 5),
                'min_samples_split': range(2, 30, 2)
            }
        elif model_number == 2:
            regressor = RandomForestRegressor(random_state=19, )
            parameters = {
                'n_estimators': range(100, 200, 100),
                # 'criterion': ['mse', 'mae'],
                'max_depth': range(10, 50, 10),
                # 'min_samples_split': range(2, 30, 2),
                # 'max_features': ['auto', 'sqrt', 'log2']
            }
        elif model_number == 3:
            regressor = GradientBoostingRegressor(n_estimators=400, learning_rate=0.05, random_state=17, )
            parameters = {
                # 'loss': ['ls', 'lad', ],
                'max_depth': range(10, 50, 10),
                'min_samples_split': range(2, 30, 3)
            }
        elif model_number == 4:
            regressor = HistGradientBoostingRegressor(max_iter=400, learning_rate=0.05, random_state=13, )
            parameters = {
                # 'loss': ['least_squares', 'least_absolute_deviation'],
                'max_depth': range(10, 50, 10),
                'min_samples_leaf': range(5, 50, 10)
            }
        elif model_number == 5:
            regressor = linear_model.SGDRegressor(early_stopping=True, n_iter_no_change=5, random_state=11, )
            parameters = {
                'loss': ['squared_loss', 'huber', 'epsilon_insensitive'],
                'penalty': ['l2', 'l1', 'elasticnet'],
                'max_iter': range(50, 1000, 50),
                'learning_rate': ['invscaling', 'optimal', 'constant', 'adaptive'],
                'eta0': [0.1, 0.01],
                'average': [32, ]
            }
        elif model_number == 6:
            regressor = MLPRegressor(random_state=7, )
            parameters = {
                'hidden_layer_sizes': [(256, 100), (512, 256, 100), ],
                'activation': ['tanh', ],
                'solver': ['adam', ],
                'max_iter': range(50, 200, 50)
            }

        if regressor is None:
            return
        if parameters is None:
            return
        # Set the objectives which must be optimized during parameter tuning
        # scoring = ['r2', 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_absolute_error',]
        scoring = ['neg_root_mean_squared_error', ]
        # CrossValidation iterator object:
        # https://scikit-learn.org/stable/tutorial/statistical_inference/model_selection.html
        cv = ShuffleSplit(n_splits=5, test_size=0.20, random_state=101)
        # Find the best model using gird-search with cross-validation
        clf = GridSearchCV(
            regressor,
            param_grid=parameters,
            scoring=scoring,
            cv=cv,
            n_jobs=7,
            refit='neg_root_mean_squared_error'
        )

        print('fitting model number', model_number)
        clf.fit(X=self.X_train, y=self.y_train)

        print('Writing grid search result ...')
        df = pd.DataFrame(clf.cv_results_, )
        df.to_csv(model_path[:-7] + '_grid_search_cv_results.csv', index=False)

        df = pd.DataFrame()
        print('Best parameters set found on development set:', clf.best_params_)
        df['best_parameters_development_set'] = [clf.best_params_]
        print('Best classifier score on development set:', clf.best_score_)
        df['best_score_development_set'] = [clf.best_score_]
        print('best classifier score on test set:', clf.score(self.X_test, self.y_test))
        df['best_score_test_set:'] = [clf.score(self.X_test, self.y_test)]
        df.to_csv(model_path[:-7] + '_grid_search_cv_results_best.csv', index=False)

        # Save and evaluate the best obtained model
        print('Writing evaluation result ...')
        clf = clf.best_estimator_
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        dump(clf, model_path)

        self.evaluate_model(model=clf, model_path=model_path)
        # self.evaluate_model_class(model=clf, model_path=model_path)
        # self.inference_model(model=clf, model_path=model_path)
        print('=' * 75)

    def vote(self, model_path=None, dataset_number=1):
        # Trained regressors
        reg1 = load(r'../../results/HGBR1_DS{0}.joblib'.format(dataset_number))
        reg2 = load(r'../../results/RFR1_DS{0}.joblib'.format(dataset_number))
        reg3 = load(r'../../results/MLPR1_DS{0}.joblib'.format(dataset_number))
        # reg4 = load(r'results/SGDR1_DS1.joblib')

        ereg = VotingRegressor(
            [('HGBR1_DS{0}'.format(dataset_number), reg1),
             ('RFR1_DS{0}'.format(dataset_number), reg2),
             ('MLPR1_DS{0}'.format(dataset_number), reg3)],
            weights=[3. / 6., 2. / 6., 1. / 6.]
        )

        ereg.fit(self.X_train, self.y_train)
        dump(ereg, model_path)
        try:
            self.evaluate_model(model=ereg, model_path=model_path)
        except:
            print('Prediction is out of the range.')

    def inference_model(self, model=None, model_path=None):
        if model is None:
            model = joblib.load(model_path)

        y_true, y_pred = self.y_test, model.predict(self.X_test[3:4, ])
        print('X_test {0}'.format(self.X_test[3:4, ]))
        print('------')
        print('y_test or y_true {0}'.format(y_true[3:4, ]))
        print('------')
        print('y_pred by model {0}'.format(y_pred))

        y_true, y_pred = self.y_test, model.predict(self.X_test)
        df_new = pd.DataFrame(columns=self.df.columns)
        for i, row in self.y_test.iteritems():
            print('', i, row)
            df_new = df_new.append(self.df.loc[i], ignore_index=True)
        df_new['y_true'] = self.y_test.values
        df_new['y_pred'] = list(y_pred)

        df_new.to_csv(model_path[:-7] + '_inference_result.csv', index=True, index_label='Row')

    def inference_model2(self, model=None, model_path=None, predict_data_path=None):
        if model is None:
            model = joblib.load(model_path)

        df_predict_data = pd.read_csv(predict_data_path, delimiter=',', index_col=False)
        X_test1 = df_predict_data.iloc[:, 1:]
        X_test = self.scaler.transform(X_test1)
        y_pred = model.predict(X_test)

        df_new = pd.DataFrame(df_predict_data.iloc[:, 0], columns=['Class'])
        df_new['PredictedTestability'] = list(y_pred)
        print(df_new)

    def evaluate_model(self, model=None, model_path=None):
        if model is None:
            model = joblib.load(model_path)

        y_true, y_pred = self.y_test, model.predict(self.X_test)
        # y_score = model.predict_proba(X_test)

        # Print all classifier model metrics
        print('Evaluating regressor ...')
        print('Regressor minimum prediction', min(y_pred), 'Regressor maximum prediction', max(y_pred))
        df = pd.DataFrame()
        df['r2_score_uniform_average'] = [r2_score(y_true, y_pred, multioutput='uniform_average')]
        df['r2_score_variance_weighted'] = [r2_score(y_true, y_pred, multioutput='variance_weighted')]

        df['explained_variance_score_uniform_average'] = [
            explained_variance_score(y_true, y_pred, multioutput='uniform_average')]
        df['explained_variance_score_variance_weighted'] = [
            explained_variance_score(y_true, y_pred, multioutput='variance_weighted')]

        df['mean_absolute_error'] = [mean_absolute_error(y_true, y_pred)]
        df['mean_squared_error_MSE'] = [mean_squared_error(y_true, y_pred)]
        df['mean_squared_error_RMSE'] = [mean_squared_error(y_true, y_pred, squared=False)]
        df['median_absolute_error'] = [median_absolute_error(y_true, y_pred)]

        if min(y_pred) >= 0:
            df['mean_squared_log_error'] = [mean_squared_log_error(y_true, y_pred)]

        # To handle ValueError: Mean Tweedie deviance error with power=2 can only be used on \
        # strictly positive y and y_pred.
        if min(y_pred > 0) and min(y_true) > 0:
            df['mean_poisson_deviance'] = [mean_poisson_deviance(y_true, y_pred, )]
            df['mean_gamma_deviance'] = [mean_gamma_deviance(y_true, y_pred, )]
        df['max_error'] = [max_error(y_true, y_pred)]

        df.to_csv(model_path[:-7] + '_evaluation_metrics_R1.csv', index=True, index_label='Row')

    def statistical_comparison(self, ):
        model1 = load(r'results/VR1_DS1.joblib')
        # model2 = load(r'results/RFR1_DS1.joblib')
        # model2 = load(r'results/MLPR1_DS1.joblib')
        model2 = load(r'results/HGBR1_DS1.joblib')

        y_pred1 = model1.predict(self.X_test)
        y_pred2 = model2.predict(self.X_test)

        y_true = np.array(list(self.y_test.values))
        y_pred1 = np.array(list(y_pred1))
        y_pred2 = np.array(list(y_pred2))

        output_errors1 = (abs(y_true - y_pred1))
        output_errors2 = (abs(y_true - y_pred2))

        s, p = ttest_ind(list(output_errors1), list(output_errors2), alternative="less", )
        print(f'statistic = {s}, p-value={p}')

    def statistical_comparison2(self, ):
        model1 = load(r'results/VR1_DS1.joblib')
        # model2 = load(r'results/RFR1_DS1.joblib')
        # model2 = load(r'results/MLPR1_DS1.joblib')
        # model2 = load(r'results/HGBR1_DS1.joblib')
        # model2 = load(r'results/SGDR1_DS1.joblib')
        model2 = load(r'results/DTR1_DS1.joblib')
        # model2 = load(r'results/DTR1_DS3.joblib')

        me1 = []
        me2 = []
        for i in range(100):  # 100
            x_test = []
            y_test = []
            for j in range(2000):  # 2000
                random_index = random.randint(0, len(self.X_test) - 1)
                x_test.append(list(self.X_test)[random_index])
                y_test.append(self.y_test.values[random_index])
            y_pred1 = model1.predict(x_test)
            y_pred2 = model2.predict(x_test)
            # me1.append(mean_squared_error(y_test, y_pred1))
            # me2.append(mean_squared_error(y_test, y_pred2))
            me1.append(r2_score(y_test, y_pred1))
            me2.append(r2_score(y_test, y_pred2))

        # print('me1', me1)
        # print('me2', me2)

        s, p = ttest_ind(me2, me1, alternative="less", )
        print(f'statistic t-test = {s}, p-value={p}')
        s, p = wilcoxon(me2, me1, alternative="less", )
        print(f'statistic wilcoxon = {s}, p-value={p}')
        s, p = mannwhitneyu(me2, me1, alternative="less", )
        print(f'statistic Mann-Whitney U = {s}, p-value={p}')
        print()

regress(self, model_path=None, model_number=None)

:param model_path: :param model_number: 1: DTR, 2: RFR, 3: GBR, 4: HGBR, 5: SGDR, 6: MLPR, :return:

Source code in adafest\code\testability\ml_models_testability.py
def regress(self, model_path: str = None, model_number: int = None):
    """

    :param model_path:
    :param model_number: 1: DTR, 2: RFR, 3: GBR, 4: HGBR, 5: SGDR, 6: MLPR,
    :return:
    """
    regressor = None
    parameters = None
    if model_number == 1:
        regressor = DecisionTreeRegressor(random_state=23, )
        # Set the parameters to be used for tuning by cross-validation
        parameters = {
            # 'criterion': ['mse', 'friedman_mse', 'mae'],
            'max_depth': range(3, 50, 5),
            'min_samples_split': range(2, 30, 2)
        }
    elif model_number == 2:
        regressor = RandomForestRegressor(random_state=19, )
        parameters = {
            'n_estimators': range(100, 200, 100),
            # 'criterion': ['mse', 'mae'],
            'max_depth': range(10, 50, 10),
            # 'min_samples_split': range(2, 30, 2),
            # 'max_features': ['auto', 'sqrt', 'log2']
        }
    elif model_number == 3:
        regressor = GradientBoostingRegressor(n_estimators=400, learning_rate=0.05, random_state=17, )
        parameters = {
            # 'loss': ['ls', 'lad', ],
            'max_depth': range(10, 50, 10),
            'min_samples_split': range(2, 30, 3)
        }
    elif model_number == 4:
        regressor = HistGradientBoostingRegressor(max_iter=400, learning_rate=0.05, random_state=13, )
        parameters = {
            # 'loss': ['least_squares', 'least_absolute_deviation'],
            'max_depth': range(10, 50, 10),
            'min_samples_leaf': range(5, 50, 10)
        }
    elif model_number == 5:
        regressor = linear_model.SGDRegressor(early_stopping=True, n_iter_no_change=5, random_state=11, )
        parameters = {
            'loss': ['squared_loss', 'huber', 'epsilon_insensitive'],
            'penalty': ['l2', 'l1', 'elasticnet'],
            'max_iter': range(50, 1000, 50),
            'learning_rate': ['invscaling', 'optimal', 'constant', 'adaptive'],
            'eta0': [0.1, 0.01],
            'average': [32, ]
        }
    elif model_number == 6:
        regressor = MLPRegressor(random_state=7, )
        parameters = {
            'hidden_layer_sizes': [(256, 100), (512, 256, 100), ],
            'activation': ['tanh', ],
            'solver': ['adam', ],
            'max_iter': range(50, 200, 50)
        }

    if regressor is None:
        return
    if parameters is None:
        return
    # Set the objectives which must be optimized during parameter tuning
    # scoring = ['r2', 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_absolute_error',]
    scoring = ['neg_root_mean_squared_error', ]
    # CrossValidation iterator object:
    # https://scikit-learn.org/stable/tutorial/statistical_inference/model_selection.html
    cv = ShuffleSplit(n_splits=5, test_size=0.20, random_state=101)
    # Find the best model using gird-search with cross-validation
    clf = GridSearchCV(
        regressor,
        param_grid=parameters,
        scoring=scoring,
        cv=cv,
        n_jobs=7,
        refit='neg_root_mean_squared_error'
    )

    print('fitting model number', model_number)
    clf.fit(X=self.X_train, y=self.y_train)

    print('Writing grid search result ...')
    df = pd.DataFrame(clf.cv_results_, )
    df.to_csv(model_path[:-7] + '_grid_search_cv_results.csv', index=False)

    df = pd.DataFrame()
    print('Best parameters set found on development set:', clf.best_params_)
    df['best_parameters_development_set'] = [clf.best_params_]
    print('Best classifier score on development set:', clf.best_score_)
    df['best_score_development_set'] = [clf.best_score_]
    print('best classifier score on test set:', clf.score(self.X_test, self.y_test))
    df['best_score_test_set:'] = [clf.score(self.X_test, self.y_test)]
    df.to_csv(model_path[:-7] + '_grid_search_cv_results_best.csv', index=False)

    # Save and evaluate the best obtained model
    print('Writing evaluation result ...')
    clf = clf.best_estimator_
    y_true, y_pred = self.y_test, clf.predict(self.X_test)
    dump(clf, model_path)

    self.evaluate_model(model=clf, model_path=model_path)
    # self.evaluate_model_class(model=clf, model_path=model_path)
    # self.inference_model(model=clf, model_path=model_path)
    print('=' * 75)