import pandas as pd import preprocessing as pp from sklearn.ensemble import RandomForestClassifier # Feature Conversion Values categorical_columns = ["person_home_ownership", "loan_intent", "loan_grade", "cb_person_default_on_file"] person_home_ownership_values = { "RENT": 1, "MORTGAGE": 2, "OWN": 3, "OTHER": 4, } loan_intent_values = { "EDUCATIONAL": 1, "MEDICAL": 2, "VENTURE": 3, "PERSONAL": 4, "DEBTCONSOLIDATION": 5 } loan_grade_values = { "A": 1, "B": 2, "C": 3, "D": 4, "E": 5 } cb_person_default_on_file_values = { "Y": 1, "N": 0, } def get_default_predictor(): data = pd.from_csv("credit_risk_dataset.csv") data = convert_categories(data) # Imputation data = pp.impute_missing_values(data) # Training and Testing Preperation train_features, train_target, test_features, test_target = pp.training_test_split(data, "loan_status") # Normalise the data training_features, test_features = pp.normalise(train_features, test_features) # Init Models rf_model = random_forest_classifier(training_features, train_target) return rf_model def random_forest_classifier(training_features, training_target): model = RandomForestClassifier(max_features="log2", random_state=79, n_jobs=-1) return model def convert_categories(data): data["person_home_ownership"] = data["person_home_ownership"].map( person_home_ownership_values) data["loan_intent"] = data["loan_intent"].map(loan_intent_values) data["loan_grade"] = data["loan_grade"].map(loan_grade_values) data["cb_person_default_on_file"] = data["cb_person_default_on_file"].map( cb_person_default_on_file_values) return data