init routes
This commit is contained in:
parent
2371f14c3c
commit
2b553d78e3
|
|
@ -2,4 +2,3 @@ __pycache__
|
||||||
build
|
build
|
||||||
barclays_credit_classifier.egg-info
|
barclays_credit_classifier.egg-info
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
services:
|
||||||
|
|
||||||
|
mongo:
|
||||||
|
image: mongo
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: root
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: example
|
||||||
|
ports:
|
||||||
|
- 27017:27017
|
||||||
|
|
||||||
|
server:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: server.Dockerfile
|
||||||
|
ports:
|
||||||
|
- 12345:12345
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
FROM python:3
|
||||||
|
|
||||||
|
WORKDIR $HOME/projects/uni/barclays_challenge_event_2024/
|
||||||
|
|
||||||
|
ADD setup.py ./
|
||||||
|
ADD routes/ ./
|
||||||
|
ADD model/ ./
|
||||||
|
ADD main.py ./
|
||||||
|
ADD db.py ./
|
||||||
|
ADD credit_risk_dataset.csv ./
|
||||||
|
RUN pip install .
|
||||||
77
main.py
77
main.py
|
|
@ -1,77 +0,0 @@
|
||||||
import models
|
|
||||||
import pandas as pd
|
|
||||||
import preprocessing as pp
|
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
|
|
||||||
# Load Data
|
|
||||||
credit_risk = pd.read_csv("credit_risk_dataset.csv")
|
|
||||||
|
|
||||||
# Feature Addition
|
|
||||||
|
|
||||||
|
|
||||||
# Feature Conversion
|
|
||||||
|
|
||||||
person_home_ownership_values = {
|
|
||||||
"RENT": 1,
|
|
||||||
"MORTGAGE": 2,
|
|
||||||
"OWN": 3,
|
|
||||||
"OTHER": 4,
|
|
||||||
}
|
|
||||||
loan_intent_values = {
|
|
||||||
"EDUCATIONAL": 1,
|
|
||||||
"MEDICAL": 2,
|
|
||||||
"VENTURE": 3,
|
|
||||||
"PERSONAL": 4,
|
|
||||||
"DEBTCONSOLIDATION": 5
|
|
||||||
}
|
|
||||||
loan_grade_values = {
|
|
||||||
"A": 1,
|
|
||||||
"B": 2,
|
|
||||||
"C": 3,
|
|
||||||
"D": 4,
|
|
||||||
"E": 5
|
|
||||||
}
|
|
||||||
cb_person_default_on_file_values = {
|
|
||||||
"Y": 1,
|
|
||||||
"N": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert categorical column to a numerical column
|
|
||||||
credit_risk["person_home_ownership"] = credit_risk["person_home_ownership"].map(person_home_ownership_values)
|
|
||||||
credit_risk["loan_intent"] = credit_risk["loan_intent"].map(loan_intent_values)
|
|
||||||
credit_risk["loan_grade"] = credit_risk["loan_grade"].map(loan_grade_values)
|
|
||||||
credit_risk["cb_person_default_on_file"] = credit_risk["cb_person_default_on_file"].map(cb_person_default_on_file_values)
|
|
||||||
|
|
||||||
print("Feature Conversion Complete")
|
|
||||||
|
|
||||||
# Feature Removal
|
|
||||||
# columns_for_removal = ["housing_median_age", "total_rooms", "total_bedrooms"]
|
|
||||||
# for column in columns_for_removal:
|
|
||||||
# housing.drop(column, axis=1, inplace=True)
|
|
||||||
|
|
||||||
# Preprocessing
|
|
||||||
credit_risk = pp.impute_missing_values(credit_risk) # Handle missing values
|
|
||||||
print("Missing Values handling Complete")
|
|
||||||
# housing = pp.remove_outliers(housing) #Remove outliers
|
|
||||||
|
|
||||||
# Training and Testing Preperation
|
|
||||||
training_features, training_target_value, test_features, test_target_value = pp.training_test_split(credit_risk, "loan_status") # Split the data into Training and Test sets
|
|
||||||
print("Training and Test features split Complete")
|
|
||||||
|
|
||||||
# Normalise the data
|
|
||||||
training_features, test_features = pp.normalise(training_features,
|
|
||||||
test_features)
|
|
||||||
print("Normalisation Complete")
|
|
||||||
# Init Models
|
|
||||||
rf_model = models.random_forest_classifier(training_features,
|
|
||||||
training_target_value)
|
|
||||||
print("Model Init Complete")
|
|
||||||
|
|
||||||
# Get Predictions
|
|
||||||
rf_predictions = rf_model.predict(test_features)
|
|
||||||
print("Predictions Complete")
|
|
||||||
|
|
||||||
# Compare Results
|
|
||||||
accuracy = accuracy_score(test_target_value, rf_predictions)
|
|
||||||
print(f"Accuracy: {accuracy}")
|
|
||||||
print(rf_predictions)
|
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
import pandas as pd
|
||||||
|
import preprocessing as pp
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
|
||||||
|
# Feature Conversion Values
|
||||||
|
categorical_columns = ["person_home_ownership",
|
||||||
|
"loan_intent",
|
||||||
|
"loan_grade",
|
||||||
|
"cb_person_default_on_file"]
|
||||||
|
person_home_ownership_values = {
|
||||||
|
"RENT": 1,
|
||||||
|
"MORTGAGE": 2,
|
||||||
|
"OWN": 3,
|
||||||
|
"OTHER": 4,
|
||||||
|
}
|
||||||
|
loan_intent_values = {
|
||||||
|
"EDUCATIONAL": 1,
|
||||||
|
"MEDICAL": 2,
|
||||||
|
"VENTURE": 3,
|
||||||
|
"PERSONAL": 4,
|
||||||
|
"DEBTCONSOLIDATION": 5
|
||||||
|
}
|
||||||
|
loan_grade_values = {
|
||||||
|
"A": 1,
|
||||||
|
"B": 2,
|
||||||
|
"C": 3,
|
||||||
|
"D": 4,
|
||||||
|
"E": 5
|
||||||
|
}
|
||||||
|
cb_person_default_on_file_values = {
|
||||||
|
"Y": 1,
|
||||||
|
"N": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_predictor():
|
||||||
|
data = pd.from_csv("credit_risk_dataset.csv")
|
||||||
|
data = convert_categories(data)
|
||||||
|
|
||||||
|
# Imputation
|
||||||
|
data = pp.impute_missing_values(data)
|
||||||
|
|
||||||
|
# Training and Testing Preperation
|
||||||
|
train_features, train_target, test_features, test_target = pp.training_test_split(data, "loan_status")
|
||||||
|
|
||||||
|
# Normalise the data
|
||||||
|
training_features, test_features = pp.normalise(train_features,
|
||||||
|
test_features)
|
||||||
|
# Init Models
|
||||||
|
rf_model = random_forest_classifier(training_features,
|
||||||
|
train_target)
|
||||||
|
return rf_model
|
||||||
|
|
||||||
|
|
||||||
|
def random_forest_classifier(training_features, training_target):
|
||||||
|
model = RandomForestClassifier(max_features="log2",
|
||||||
|
random_state=79,
|
||||||
|
n_jobs=-1)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def convert_categories(data):
|
||||||
|
data["person_home_ownership"] = data["person_home_ownership"].map(
|
||||||
|
person_home_ownership_values)
|
||||||
|
data["loan_intent"] = data["loan_intent"].map(loan_intent_values)
|
||||||
|
data["loan_grade"] = data["loan_grade"].map(loan_grade_values)
|
||||||
|
data["cb_person_default_on_file"] = data["cb_person_default_on_file"].map(
|
||||||
|
cb_person_default_on_file_values)
|
||||||
|
return data
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
from sklearn.ensemble import RandomForestClassifier
|
|
||||||
|
|
||||||
|
|
||||||
def random_forest_classifier(training_features, training_target):
|
|
||||||
model = RandomForestClassifier(max_features="log2",
|
|
||||||
random_state=79,
|
|
||||||
n_jobs=-1)
|
|
||||||
model.fit(training_features, training_target)
|
|
||||||
return model
|
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
blueprint = Blueprint("user", __name__)
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["POST"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["PATCH"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["GET"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["OPTIONS"])
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
from flask import Blueprint, request
|
||||||
|
|
||||||
|
blueprint = Blueprint("application", __name__)
|
||||||
|
|
||||||
|
@blueprint.route("/api/application/apply", methods=["POST"])
|
||||||
|
def process_application():
|
||||||
|
req = request.json
|
||||||
|
|
||||||
|
|
||||||
|
@blueprint.route("/api/application", methods=["PATCH"])
|
||||||
|
@blueprint.route("/api/application", methods=["OPTIONS"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/application", methods=["GET"])
|
||||||
|
def authenticate_user():
|
||||||
|
req = request.json
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
blueprint = Blueprint("user", __name__)
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["GET"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["OPTIONS"])
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
blueprint = Blueprint("user", __name__)
|
||||||
|
|
||||||
|
@blueprint.route("/api/referral", methods=["POST"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/referral", methods=["PATCH"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/referral", methods=["DELETE"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/referral", methods=["GET"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/referral", methods=["OPTIONS"])
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
blueprint = Blueprint("user", __name__)
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["POST"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["PUT"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["PATCH"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["DELETE"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["GET"])
|
||||||
|
|
||||||
|
@blueprint.route("/api/user", methods=["OPTIONS"])
|
||||||
13
setup.py
13
setup.py
|
|
@ -1,18 +1,17 @@
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="barclays_credit_classifier",
|
name="Credit Assessment",
|
||||||
version="1.0.0",
|
version="1.0.0",
|
||||||
description="Predicts whether someone will default on their loan. Uses the Credit Risk Dataset from Kaggle",
|
description="ML Assisted Credit Assessment",
|
||||||
author="r0r-5chach",
|
author="r0r-5chach",
|
||||||
author_email="r0r-5chach.xyz@proton.me",
|
author_email="r0r-5chach.xyz@proton.me",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"numpy",
|
"asyncio",
|
||||||
"scipy",
|
"Flask",
|
||||||
"matplotlib",
|
|
||||||
"pandas",
|
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"seaborn"
|
"pandas",
|
||||||
|
"pymongo"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,66 +0,0 @@
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
|
|
||||||
def missing_value_pairwise_plots(data_frame, null_column, save=False):
|
|
||||||
not_missing_data = data_frame.dropna()
|
|
||||||
mising_data = data_frame[data_frame[null_column].isnull()]
|
|
||||||
for column in data_frame.columns:
|
|
||||||
if column != null_column:
|
|
||||||
plt.figure()
|
|
||||||
plt.title(f"Scatter Plot of {column} against {null_column}")
|
|
||||||
plot_missing_values(column)
|
|
||||||
plt.scatter(not_missing_data[column], not_missing_data[null_column], color=[[0.502, 0, 0.502, 0.4]], label="Existing Values")
|
|
||||||
plt.xlabel(column)
|
|
||||||
plt.ylabel(null_column)
|
|
||||||
plt.legend()
|
|
||||||
if save:
|
|
||||||
plt.savefig(f"missing_values[{column}:{null_column}].png")
|
|
||||||
else:
|
|
||||||
plt.show()
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def plot_missing_values(column):
|
|
||||||
plt.plot([], [], color="red", alpha=0.4, label="Missing Values")
|
|
||||||
for value in column:
|
|
||||||
plt.axvline(x=value, color="red", alpha=0.4)
|
|
||||||
|
|
||||||
def correlation_matrix(data_frame, save=False):
|
|
||||||
matrix = data_frame.corr()
|
|
||||||
plt.figure()
|
|
||||||
sns.heatmap(matrix, annot=True)
|
|
||||||
plt.title("Correlation Matrix of Existing Features")
|
|
||||||
|
|
||||||
if save:
|
|
||||||
plt.savefig("correlation_matrix.png")
|
|
||||||
else:
|
|
||||||
plt.show()
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def imputation_plots(data_frame, imputed_data, null_column, columns, save=False):
|
|
||||||
not_missing_data = data_frame.dropna()
|
|
||||||
for column in columns:
|
|
||||||
plt.figure()
|
|
||||||
plt.scatter(imputed_data[column], imputed_data[null_column], color=[[0, 0.502, 0, 0.4]], label="Imputed Data")
|
|
||||||
plt.scatter(not_missing_data[column], not_missing_data[null_column], colot=[[0.502, 0, 0.502, 0,4]], label="Original Data")
|
|
||||||
plt.title(f"Scatter Plot of {column} against {null_column} after KNN(9) Imputation")
|
|
||||||
plt.xlabel(column)
|
|
||||||
plt.ylabel(column)
|
|
||||||
plt.legend()
|
|
||||||
if save:
|
|
||||||
plt.savefig(f"imputation_results[{column}:{null_column}].png")
|
|
||||||
else:
|
|
||||||
plt.show()
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
def outlier_box_plots(data_frame, save=False):
|
|
||||||
for column in data_frame.columns:
|
|
||||||
plt.figure()
|
|
||||||
plt.title(f"Box Plot of {column}")
|
|
||||||
plt.boxplot(data_frame[column])
|
|
||||||
plt.ylabel(column)
|
|
||||||
plt.xticks(rotation=45)
|
|
||||||
if save:
|
|
||||||
plt.savefig(f"outlier_box_plot[{column}].png")
|
|
||||||
else:
|
|
||||||
plt.show()
|
|
||||||
plt.close()
|
|
||||||
Loading…
Reference in New Issue