import matplotlib.pyplot as plt import seaborn as sns def missing_value_pairwise_plots(data_frame, null_column, save=False): not_missing_data = data_frame.dropna() mising_data = data_frame[data_frame[null_column].isnull()] for column in data_frame.columns: if column != null_column: plt.figure() plt.title(f"Scatter Plot of {column} against {null_column}") plot_missing_values(column) plt.scatter(not_missing_data[column], not_missing_data[null_column], color=[[0.502, 0, 0.502, 0.4]], label="Existing Values") plt.xlabel(column) plt.ylabel(null_column) plt.legend() if save: plt.savefig(f"missing_values[{column}:{null_column}].png") else: plt.show() plt.close() def plot_missing_values(column): plt.plot([], [], color="red", alpha=0.4, label="Missing Values") for value in column: plt.axvline(x=value, color="red", alpha=0.4) def correlation_matrix(data_frame, save=False): matrix = data_frame.corr() plt.figure() sns.heatmap(matrix, annot=True) plt.title("Correlation Matrix of Existing Features") if save: plt.savefig("correlation_matrix.png") else: plt.show() plt.close() def imputation_plots(data_frame, imputed_data, null_column, columns, save=False): not_missing_data = data_frame.dropna() for column in columns: plt.figure() plt.scatter(imputed_data[column], imputed_data[null_column], color=[[0, 0.502, 0, 0.4]], label="Imputed Data") plt.scatter(not_missing_data[column], not_missing_data[null_column], colot=[[0.502, 0, 0.502, 0,4]], label="Original Data") plt.title(f"Scatter Plot of {column} against {null_column} after KNN(9) Imputation") plt.xlabel(column) plt.ylabel(column) plt.legend() if save: plt.savefig(f"imputation_results[{column}:{null_column}].png") else: plt.show() plt.close() def outlier_box_plots(data_frame, save=False): for column in data_frame.columns: plt.figure() plt.title(f"Box Plot of {column}") plt.boxplot(data_frame[column]) plt.ylabel(column) plt.xticks(rotation=45) if save: plt.savefig(f"outlier_box_plot[{column}].png") else: plt.show() plt.close()