41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
import pandas as pd
|
|
from sklearn.impute import KNNImputer
|
|
|
|
dataframe = pd.read_csv("credit_risk_dataset.csv")
|
|
|
|
# Feature Conversion Values
|
|
person_home_ownership_values = {
|
|
"RENT": 1,
|
|
"MORTGAGE": 2,
|
|
"OWN": 3,
|
|
"OTHER": 4,
|
|
}
|
|
loan_intent_values = {
|
|
"EDUCATIONAL": 1,
|
|
"MEDICAL": 2,
|
|
"VENTURE": 3,
|
|
"PERSONAL": 4,
|
|
"DEBTCONSOLIDATION": 5
|
|
}
|
|
loan_grade_values = {
|
|
"A": 1,
|
|
"B": 2,
|
|
"C": 3,
|
|
"D": 4,
|
|
"E": 5
|
|
}
|
|
cb_person_default_on_file_values = {
|
|
"Y": 1,
|
|
"N": 0,
|
|
}
|
|
|
|
dataframe["person_home_ownership"] = dataframe["person_home_ownership"].map(person_home_ownership_values)
|
|
dataframe["loan_intent"] = dataframe["loan_intent"].map(loan_intent_values)
|
|
dataframe["loan_grade"] = dataframe["loan_grade"].map(loan_grade_values)
|
|
dataframe["cb_person_default_on_file"] = dataframe["cb_person_default_on_file"].map(cb_person_default_on_file_values)
|
|
|
|
imputer = KNNImputer(n_neighbors=9, weights="uniform", metric="nan_euclidean")
|
|
imputed_data = imputer.fit_transform(dataframe)
|
|
pd.DataFrame(imputed_data,
|
|
columns=dataframe.columns).to_csv("imputed_data.csv", index=False)
|