barclays_challenge_event_20.../visualisations.py

67 lines
2.4 KiB
Python
Raw Normal View History

2024-03-29 16:05:46 +00:00
import matplotlib.pyplot as plt
import seaborn as sns
def missing_value_pairwise_plots(data_frame, null_column, save=False):
not_missing_data = data_frame.dropna()
mising_data = data_frame[data_frame[null_column].isnull()]
for column in data_frame.columns:
if column != null_column:
plt.figure()
plt.title(f"Scatter Plot of {column} against {null_column}")
plot_missing_values(column)
plt.scatter(not_missing_data[column], not_missing_data[null_column], color=[[0.502, 0, 0.502, 0.4]], label="Existing Values")
plt.xlabel(column)
plt.ylabel(null_column)
plt.legend()
if save:
plt.savefig(f"missing_values[{column}:{null_column}].png")
else:
plt.show()
plt.close()
def plot_missing_values(column):
plt.plot([], [], color="red", alpha=0.4, label="Missing Values")
for value in column:
plt.axvline(x=value, color="red", alpha=0.4)
def correlation_matrix(data_frame, save=False):
matrix = data_frame.corr()
plt.figure()
sns.heatmap(matrix, annot=True)
plt.title("Correlation Matrix of Existing Features")
if save:
plt.savefig("correlation_matrix.png")
else:
plt.show()
plt.close()
def imputation_plots(data_frame, imputed_data, null_column, columns, save=False):
not_missing_data = data_frame.dropna()
for column in columns:
plt.figure()
plt.scatter(imputed_data[column], imputed_data[null_column], color=[[0, 0.502, 0, 0.4]], label="Imputed Data")
plt.scatter(not_missing_data[column], not_missing_data[null_column], colot=[[0.502, 0, 0.502, 0,4]], label="Original Data")
plt.title(f"Scatter Plot of {column} against {null_column} after KNN(9) Imputation")
plt.xlabel(column)
plt.ylabel(column)
plt.legend()
if save:
plt.savefig(f"imputation_results[{column}:{null_column}].png")
else:
plt.show()
plt.close()
def outlier_box_plots(data_frame, save=False):
for column in data_frame.columns:
plt.figure()
plt.title(f"Box Plot of {column}")
plt.boxplot(data_frame[column])
plt.ylabel(column)
plt.xticks(rotation=45)
if save:
plt.savefig(f"outlier_box_plot[{column}].png")
else:
plt.show()
plt.close()