78 lines
2.2 KiB
Python
78 lines
2.2 KiB
Python
import models
|
|
import pandas as pd
|
|
import preprocessing as pp
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
# Load Data
|
|
credit_risk = pd.read_csv("credit_risk_dataset.csv")
|
|
|
|
# Feature Addition
|
|
|
|
|
|
# Feature Conversion
|
|
|
|
person_home_ownership_values = {
|
|
"RENT": 1,
|
|
"MORTGAGE": 2,
|
|
"OWN": 3,
|
|
"OTHER": 4,
|
|
}
|
|
loan_intent_values = {
|
|
"EDUCATIONAL": 1,
|
|
"MEDICAL": 2,
|
|
"VENTURE": 3,
|
|
"PERSONAL": 4,
|
|
"DEBTCONSOLIDATION": 5
|
|
}
|
|
loan_grade_values = {
|
|
"A": 1,
|
|
"B": 2,
|
|
"C": 3,
|
|
"D": 4,
|
|
"E": 5
|
|
}
|
|
cb_person_default_on_file_values = {
|
|
"Y": 1,
|
|
"N": 0,
|
|
}
|
|
|
|
# Convert categorical column to a numerical column
|
|
credit_risk["person_home_ownership"] = credit_risk["person_home_ownership"].map(person_home_ownership_values)
|
|
credit_risk["loan_intent"] = credit_risk["loan_intent"].map(loan_intent_values)
|
|
credit_risk["loan_grade"] = credit_risk["loan_grade"].map(loan_grade_values)
|
|
credit_risk["cb_person_default_on_file"] = credit_risk["cb_person_default_on_file"].map(cb_person_default_on_file_values)
|
|
|
|
print("Feature Conversion Complete")
|
|
|
|
# Feature Removal
|
|
# columns_for_removal = ["housing_median_age", "total_rooms", "total_bedrooms"]
|
|
# for column in columns_for_removal:
|
|
# housing.drop(column, axis=1, inplace=True)
|
|
|
|
# Preprocessing
|
|
credit_risk = pp.impute_missing_values(credit_risk) # Handle missing values
|
|
print("Missing Values handling Complete")
|
|
# housing = pp.remove_outliers(housing) #Remove outliers
|
|
|
|
# Training and Testing Preperation
|
|
training_features, training_target_value, test_features, test_target_value = pp.training_test_split(credit_risk, "loan_status") # Split the data into Training and Test sets
|
|
print("Training and Test features split Complete")
|
|
|
|
# Normalise the data
|
|
training_features, test_features = pp.normalise(training_features,
|
|
test_features)
|
|
print("Normalisation Complete")
|
|
# Init Models
|
|
rf_model = models.random_forest_classifier(training_features,
|
|
training_target_value)
|
|
print("Model Init Complete")
|
|
|
|
# Get Predictions
|
|
rf_predictions = rf_model.predict(test_features)
|
|
print("Predictions Complete")
|
|
|
|
# Compare Results
|
|
accuracy = accuracy_score(test_target_value, rf_predictions)
|
|
print(f"Accuracy: {accuracy}")
|
|
print(rf_predictions)
|