70 lines
1.9 KiB
Python
70 lines
1.9 KiB
Python
|
|
import pandas as pd
|
||
|
|
import preprocessing as pp
|
||
|
|
from sklearn.ensemble import RandomForestClassifier
|
||
|
|
|
||
|
|
# Feature Conversion Values
|
||
|
|
categorical_columns = ["person_home_ownership",
|
||
|
|
"loan_intent",
|
||
|
|
"loan_grade",
|
||
|
|
"cb_person_default_on_file"]
|
||
|
|
person_home_ownership_values = {
|
||
|
|
"RENT": 1,
|
||
|
|
"MORTGAGE": 2,
|
||
|
|
"OWN": 3,
|
||
|
|
"OTHER": 4,
|
||
|
|
}
|
||
|
|
loan_intent_values = {
|
||
|
|
"EDUCATIONAL": 1,
|
||
|
|
"MEDICAL": 2,
|
||
|
|
"VENTURE": 3,
|
||
|
|
"PERSONAL": 4,
|
||
|
|
"DEBTCONSOLIDATION": 5
|
||
|
|
}
|
||
|
|
loan_grade_values = {
|
||
|
|
"A": 1,
|
||
|
|
"B": 2,
|
||
|
|
"C": 3,
|
||
|
|
"D": 4,
|
||
|
|
"E": 5
|
||
|
|
}
|
||
|
|
cb_person_default_on_file_values = {
|
||
|
|
"Y": 1,
|
||
|
|
"N": 0,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def get_default_predictor():
|
||
|
|
data = pd.from_csv("credit_risk_dataset.csv")
|
||
|
|
data = convert_categories(data)
|
||
|
|
|
||
|
|
# Imputation
|
||
|
|
data = pp.impute_missing_values(data)
|
||
|
|
|
||
|
|
# Training and Testing Preperation
|
||
|
|
train_features, train_target, test_features, test_target = pp.training_test_split(data, "loan_status")
|
||
|
|
|
||
|
|
# Normalise the data
|
||
|
|
training_features, test_features = pp.normalise(train_features,
|
||
|
|
test_features)
|
||
|
|
# Init Models
|
||
|
|
rf_model = random_forest_classifier(training_features,
|
||
|
|
train_target)
|
||
|
|
return rf_model
|
||
|
|
|
||
|
|
|
||
|
|
def random_forest_classifier(training_features, training_target):
|
||
|
|
model = RandomForestClassifier(max_features="log2",
|
||
|
|
random_state=79,
|
||
|
|
n_jobs=-1)
|
||
|
|
return model
|
||
|
|
|
||
|
|
|
||
|
|
def convert_categories(data):
|
||
|
|
data["person_home_ownership"] = data["person_home_ownership"].map(
|
||
|
|
person_home_ownership_values)
|
||
|
|
data["loan_intent"] = data["loan_intent"].map(loan_intent_values)
|
||
|
|
data["loan_grade"] = data["loan_grade"].map(loan_grade_values)
|
||
|
|
data["cb_person_default_on_file"] = data["cb_person_default_on_file"].map(
|
||
|
|
cb_person_default_on_file_values)
|
||
|
|
return data
|