67 lines
2.4 KiB
Python
67 lines
2.4 KiB
Python
|
|
import matplotlib.pyplot as plt
|
||
|
|
import seaborn as sns
|
||
|
|
|
||
|
|
def missing_value_pairwise_plots(data_frame, null_column, save=False):
|
||
|
|
not_missing_data = data_frame.dropna()
|
||
|
|
mising_data = data_frame[data_frame[null_column].isnull()]
|
||
|
|
for column in data_frame.columns:
|
||
|
|
if column != null_column:
|
||
|
|
plt.figure()
|
||
|
|
plt.title(f"Scatter Plot of {column} against {null_column}")
|
||
|
|
plot_missing_values(column)
|
||
|
|
plt.scatter(not_missing_data[column], not_missing_data[null_column], color=[[0.502, 0, 0.502, 0.4]], label="Existing Values")
|
||
|
|
plt.xlabel(column)
|
||
|
|
plt.ylabel(null_column)
|
||
|
|
plt.legend()
|
||
|
|
if save:
|
||
|
|
plt.savefig(f"missing_values[{column}:{null_column}].png")
|
||
|
|
else:
|
||
|
|
plt.show()
|
||
|
|
plt.close()
|
||
|
|
|
||
|
|
def plot_missing_values(column):
|
||
|
|
plt.plot([], [], color="red", alpha=0.4, label="Missing Values")
|
||
|
|
for value in column:
|
||
|
|
plt.axvline(x=value, color="red", alpha=0.4)
|
||
|
|
|
||
|
|
def correlation_matrix(data_frame, save=False):
|
||
|
|
matrix = data_frame.corr()
|
||
|
|
plt.figure()
|
||
|
|
sns.heatmap(matrix, annot=True)
|
||
|
|
plt.title("Correlation Matrix of Existing Features")
|
||
|
|
|
||
|
|
if save:
|
||
|
|
plt.savefig("correlation_matrix.png")
|
||
|
|
else:
|
||
|
|
plt.show()
|
||
|
|
plt.close()
|
||
|
|
|
||
|
|
def imputation_plots(data_frame, imputed_data, null_column, columns, save=False):
|
||
|
|
not_missing_data = data_frame.dropna()
|
||
|
|
for column in columns:
|
||
|
|
plt.figure()
|
||
|
|
plt.scatter(imputed_data[column], imputed_data[null_column], color=[[0, 0.502, 0, 0.4]], label="Imputed Data")
|
||
|
|
plt.scatter(not_missing_data[column], not_missing_data[null_column], colot=[[0.502, 0, 0.502, 0,4]], label="Original Data")
|
||
|
|
plt.title(f"Scatter Plot of {column} against {null_column} after KNN(9) Imputation")
|
||
|
|
plt.xlabel(column)
|
||
|
|
plt.ylabel(column)
|
||
|
|
plt.legend()
|
||
|
|
if save:
|
||
|
|
plt.savefig(f"imputation_results[{column}:{null_column}].png")
|
||
|
|
else:
|
||
|
|
plt.show()
|
||
|
|
plt.close()
|
||
|
|
|
||
|
|
def outlier_box_plots(data_frame, save=False):
|
||
|
|
for column in data_frame.columns:
|
||
|
|
plt.figure()
|
||
|
|
plt.title(f"Box Plot of {column}")
|
||
|
|
plt.boxplot(data_frame[column])
|
||
|
|
plt.ylabel(column)
|
||
|
|
plt.xticks(rotation=45)
|
||
|
|
if save:
|
||
|
|
plt.savefig(f"outlier_box_plot[{column}].png")
|
||
|
|
else:
|
||
|
|
plt.show()
|
||
|
|
plt.close()
|