import pandas as pd from sklearn.impute import KNNImputer dataframe = pd.read_csv("credit_risk_dataset.csv") # Feature Conversion Values person_home_ownership_values = { "RENT": 1, "MORTGAGE": 2, "OWN": 3, "OTHER": 4, } loan_intent_values = { "EDUCATIONAL": 1, "MEDICAL": 2, "VENTURE": 3, "PERSONAL": 4, "DEBTCONSOLIDATION": 5 } loan_grade_values = { "A": 1, "B": 2, "C": 3, "D": 4, "E": 5 } cb_person_default_on_file_values = { "Y": 1, "N": 0, } dataframe["person_home_ownership"] = dataframe["person_home_ownership"].map(person_home_ownership_values) dataframe["loan_intent"] = dataframe["loan_intent"].map(loan_intent_values) dataframe["loan_grade"] = dataframe["loan_grade"].map(loan_grade_values) dataframe["cb_person_default_on_file"] = dataframe["cb_person_default_on_file"].map(cb_person_default_on_file_values) imputer = KNNImputer(n_neighbors=9, weights="uniform", metric="nan_euclidean") imputed_data = imputer.fit_transform(dataframe) pd.DataFrame(imputed_data, columns=dataframe.columns).to_csv("imputed_data.csv", index=False)