import models import pandas as pd import preprocessing as pp from sklearn.metrics import accuracy_score # Load Data credit_risk = pd.read_csv("credit_risk_dataset.csv") # Feature Addition # Feature Conversion person_home_ownership_values = { "RENT": 1, "MORTGAGE": 2, "OWN": 3, "OTHER": 4, } loan_intent_values = { "EDUCATIONAL": 1, "MEDICAL": 2, "VENTURE": 3, "PERSONAL": 4, "DEBTCONSOLIDATION": 5 } loan_grade_values = { "A": 1, "B": 2, "C": 3, "D": 4, "E": 5 } cb_person_default_on_file_values = { "Y": 1, "N": 0, } # Convert categorical column to a numerical column credit_risk["person_home_ownership"] = credit_risk["person_home_ownership"].map(person_home_ownership_values) credit_risk["loan_intent"] = credit_risk["loan_intent"].map(loan_intent_values) credit_risk["loan_grade"] = credit_risk["loan_grade"].map(loan_grade_values) credit_risk["cb_person_default_on_file"] = credit_risk["cb_person_default_on_file"].map(cb_person_default_on_file_values) print("Feature Conversion Complete") # Feature Removal # columns_for_removal = ["housing_median_age", "total_rooms", "total_bedrooms"] # for column in columns_for_removal: # housing.drop(column, axis=1, inplace=True) # Preprocessing credit_risk = pp.impute_missing_values(credit_risk) # Handle missing values print("Missing Values handling Complete") # housing = pp.remove_outliers(housing) #Remove outliers # Training and Testing Preperation training_features, training_target_value, test_features, test_target_value = pp.training_test_split(credit_risk, "loan_status") # Split the data into Training and Test sets print("Training and Test features split Complete") # Normalise the data training_features, test_features = pp.normalise(training_features, test_features) print("Normalisation Complete") # Init Models rf_model = models.random_forest_classifier(training_features, training_target_value) print("Model Init Complete") # Get Predictions rf_predictions = rf_model.predict(test_features) print("Predictions Complete") # Compare Results accuracy = accuracy_score(test_target_value, rf_predictions) print(f"Accuracy: {accuracy}") print(rf_predictions)