From 2b553d78e33d916efac46b32b441f584ec870605 Mon Sep 17 00:00:00 2001 From: r0r-5chach Date: Thu, 4 Apr 2024 03:47:57 +0100 Subject: [PATCH] init routes --- .gitignore | 1 - db.py | 0 docker/docker-compose.yml | 17 +++++ docker/server.Dockerfile | 11 ++++ main.py | 77 ---------------------- model/model.py | 69 +++++++++++++++++++ preprocessing.py => model/preprocessing.py | 0 models.py | 9 --- routes/application_routes.py | 11 ++++ routes/authentication_routes.py | 15 +++++ routes/bureau_routes.py | 7 ++ routes/referral_routes.py | 13 ++++ routes/user_routes.py | 15 +++++ setup.py | 15 ++--- visualisations.py | 66 ------------------- 15 files changed, 165 insertions(+), 161 deletions(-) create mode 100644 db.py create mode 100644 docker/docker-compose.yml create mode 100644 docker/server.Dockerfile delete mode 100644 main.py create mode 100644 model/model.py rename preprocessing.py => model/preprocessing.py (100%) delete mode 100644 models.py create mode 100644 routes/application_routes.py create mode 100644 routes/authentication_routes.py create mode 100644 routes/bureau_routes.py create mode 100644 routes/referral_routes.py create mode 100644 routes/user_routes.py delete mode 100644 visualisations.py diff --git a/.gitignore b/.gitignore index 8728440..6964186 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ __pycache__ build barclays_credit_classifier.egg-info .env - diff --git a/db.py b/db.py new file mode 100644 index 0000000..e69de29 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..d45e8b2 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,17 @@ +services: + + mongo: + image: mongo + restart: always + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: example + ports: + - 27017:27017 + + server: + build: + context: . + dockerfile: server.Dockerfile + ports: + - 12345:12345 diff --git a/docker/server.Dockerfile b/docker/server.Dockerfile new file mode 100644 index 0000000..be4acc4 --- /dev/null +++ b/docker/server.Dockerfile @@ -0,0 +1,11 @@ +FROM python:3 + +WORKDIR $HOME/projects/uni/barclays_challenge_event_2024/ + +ADD setup.py ./ +ADD routes/ ./ +ADD model/ ./ +ADD main.py ./ +ADD db.py ./ +ADD credit_risk_dataset.csv ./ +RUN pip install . diff --git a/main.py b/main.py deleted file mode 100644 index a24d9cd..0000000 --- a/main.py +++ /dev/null @@ -1,77 +0,0 @@ -import models -import pandas as pd -import preprocessing as pp -from sklearn.metrics import accuracy_score - -# Load Data -credit_risk = pd.read_csv("credit_risk_dataset.csv") - -# Feature Addition - - -# Feature Conversion - -person_home_ownership_values = { - "RENT": 1, - "MORTGAGE": 2, - "OWN": 3, - "OTHER": 4, -} -loan_intent_values = { - "EDUCATIONAL": 1, - "MEDICAL": 2, - "VENTURE": 3, - "PERSONAL": 4, - "DEBTCONSOLIDATION": 5 -} -loan_grade_values = { - "A": 1, - "B": 2, - "C": 3, - "D": 4, - "E": 5 -} -cb_person_default_on_file_values = { - "Y": 1, - "N": 0, -} - -# Convert categorical column to a numerical column -credit_risk["person_home_ownership"] = credit_risk["person_home_ownership"].map(person_home_ownership_values) -credit_risk["loan_intent"] = credit_risk["loan_intent"].map(loan_intent_values) -credit_risk["loan_grade"] = credit_risk["loan_grade"].map(loan_grade_values) -credit_risk["cb_person_default_on_file"] = credit_risk["cb_person_default_on_file"].map(cb_person_default_on_file_values) - -print("Feature Conversion Complete") - -# Feature Removal -# columns_for_removal = ["housing_median_age", "total_rooms", "total_bedrooms"] -# for column in columns_for_removal: -# housing.drop(column, axis=1, inplace=True) - -# Preprocessing -credit_risk = pp.impute_missing_values(credit_risk) # Handle missing values -print("Missing Values handling Complete") -# housing = pp.remove_outliers(housing) #Remove outliers - -# Training and Testing Preperation -training_features, training_target_value, test_features, test_target_value = pp.training_test_split(credit_risk, "loan_status") # Split the data into Training and Test sets -print("Training and Test features split Complete") - -# Normalise the data -training_features, test_features = pp.normalise(training_features, - test_features) -print("Normalisation Complete") -# Init Models -rf_model = models.random_forest_classifier(training_features, - training_target_value) -print("Model Init Complete") - -# Get Predictions -rf_predictions = rf_model.predict(test_features) -print("Predictions Complete") - -# Compare Results -accuracy = accuracy_score(test_target_value, rf_predictions) -print(f"Accuracy: {accuracy}") -print(rf_predictions) diff --git a/model/model.py b/model/model.py new file mode 100644 index 0000000..46151d4 --- /dev/null +++ b/model/model.py @@ -0,0 +1,69 @@ +import pandas as pd +import preprocessing as pp +from sklearn.ensemble import RandomForestClassifier + +# Feature Conversion Values +categorical_columns = ["person_home_ownership", + "loan_intent", + "loan_grade", + "cb_person_default_on_file"] +person_home_ownership_values = { + "RENT": 1, + "MORTGAGE": 2, + "OWN": 3, + "OTHER": 4, +} +loan_intent_values = { + "EDUCATIONAL": 1, + "MEDICAL": 2, + "VENTURE": 3, + "PERSONAL": 4, + "DEBTCONSOLIDATION": 5 + } +loan_grade_values = { + "A": 1, + "B": 2, + "C": 3, + "D": 4, + "E": 5 +} +cb_person_default_on_file_values = { + "Y": 1, + "N": 0, +} + + +def get_default_predictor(): + data = pd.from_csv("credit_risk_dataset.csv") + data = convert_categories(data) + + # Imputation + data = pp.impute_missing_values(data) + + # Training and Testing Preperation + train_features, train_target, test_features, test_target = pp.training_test_split(data, "loan_status") + + # Normalise the data + training_features, test_features = pp.normalise(train_features, + test_features) + # Init Models + rf_model = random_forest_classifier(training_features, + train_target) + return rf_model + + +def random_forest_classifier(training_features, training_target): + model = RandomForestClassifier(max_features="log2", + random_state=79, + n_jobs=-1) + return model + + +def convert_categories(data): + data["person_home_ownership"] = data["person_home_ownership"].map( + person_home_ownership_values) + data["loan_intent"] = data["loan_intent"].map(loan_intent_values) + data["loan_grade"] = data["loan_grade"].map(loan_grade_values) + data["cb_person_default_on_file"] = data["cb_person_default_on_file"].map( + cb_person_default_on_file_values) + return data diff --git a/preprocessing.py b/model/preprocessing.py similarity index 100% rename from preprocessing.py rename to model/preprocessing.py diff --git a/models.py b/models.py deleted file mode 100644 index 52fb724..0000000 --- a/models.py +++ /dev/null @@ -1,9 +0,0 @@ -from sklearn.ensemble import RandomForestClassifier - - -def random_forest_classifier(training_features, training_target): - model = RandomForestClassifier(max_features="log2", - random_state=79, - n_jobs=-1) - model.fit(training_features, training_target) - return model diff --git a/routes/application_routes.py b/routes/application_routes.py new file mode 100644 index 0000000..a796f0a --- /dev/null +++ b/routes/application_routes.py @@ -0,0 +1,11 @@ +from flask import Blueprint + +blueprint = Blueprint("user", __name__) + +@blueprint.route("/api/user", methods=["POST"]) + +@blueprint.route("/api/user", methods=["PATCH"]) + +@blueprint.route("/api/user", methods=["GET"]) + +@blueprint.route("/api/user", methods=["OPTIONS"]) diff --git a/routes/authentication_routes.py b/routes/authentication_routes.py new file mode 100644 index 0000000..54b100f --- /dev/null +++ b/routes/authentication_routes.py @@ -0,0 +1,15 @@ +from flask import Blueprint, request + +blueprint = Blueprint("application", __name__) + +@blueprint.route("/api/application/apply", methods=["POST"]) +def process_application(): + req = request.json + + +@blueprint.route("/api/application", methods=["PATCH"]) +@blueprint.route("/api/application", methods=["OPTIONS"]) + +@blueprint.route("/api/application", methods=["GET"]) +def authenticate_user(): + req = request.json diff --git a/routes/bureau_routes.py b/routes/bureau_routes.py new file mode 100644 index 0000000..5ce8ddb --- /dev/null +++ b/routes/bureau_routes.py @@ -0,0 +1,7 @@ +from flask import Blueprint + +blueprint = Blueprint("user", __name__) + +@blueprint.route("/api/user", methods=["GET"]) + +@blueprint.route("/api/user", methods=["OPTIONS"]) diff --git a/routes/referral_routes.py b/routes/referral_routes.py new file mode 100644 index 0000000..7f333e9 --- /dev/null +++ b/routes/referral_routes.py @@ -0,0 +1,13 @@ +from flask import Blueprint + +blueprint = Blueprint("user", __name__) + +@blueprint.route("/api/referral", methods=["POST"]) + +@blueprint.route("/api/referral", methods=["PATCH"]) + +@blueprint.route("/api/referral", methods=["DELETE"]) + +@blueprint.route("/api/referral", methods=["GET"]) + +@blueprint.route("/api/referral", methods=["OPTIONS"]) diff --git a/routes/user_routes.py b/routes/user_routes.py new file mode 100644 index 0000000..a4c055d --- /dev/null +++ b/routes/user_routes.py @@ -0,0 +1,15 @@ +from flask import Blueprint + +blueprint = Blueprint("user", __name__) + +@blueprint.route("/api/user", methods=["POST"]) + +@blueprint.route("/api/user", methods=["PUT"]) + +@blueprint.route("/api/user", methods=["PATCH"]) + +@blueprint.route("/api/user", methods=["DELETE"]) + +@blueprint.route("/api/user", methods=["GET"]) + +@blueprint.route("/api/user", methods=["OPTIONS"]) diff --git a/setup.py b/setup.py index 74a9e19..59164c0 100644 --- a/setup.py +++ b/setup.py @@ -1,18 +1,17 @@ from setuptools import setup, find_packages setup( - name="barclays_credit_classifier", + name="Credit Assessment", version="1.0.0", - description="Predicts whether someone will default on their loan. Uses the Credit Risk Dataset from Kaggle", - author="r0r-5chach", + description="ML Assisted Credit Assessment", + author="r0r-5chach", author_email="r0r-5chach.xyz@proton.me", packages=find_packages(), install_requires=[ - "numpy", - "scipy", - "matplotlib", - "pandas", + "asyncio", + "Flask", "scikit-learn", - "seaborn" + "pandas", + "pymongo" ] ) diff --git a/visualisations.py b/visualisations.py deleted file mode 100644 index 9906c3f..0000000 --- a/visualisations.py +++ /dev/null @@ -1,66 +0,0 @@ -import matplotlib.pyplot as plt -import seaborn as sns - -def missing_value_pairwise_plots(data_frame, null_column, save=False): - not_missing_data = data_frame.dropna() - mising_data = data_frame[data_frame[null_column].isnull()] - for column in data_frame.columns: - if column != null_column: - plt.figure() - plt.title(f"Scatter Plot of {column} against {null_column}") - plot_missing_values(column) - plt.scatter(not_missing_data[column], not_missing_data[null_column], color=[[0.502, 0, 0.502, 0.4]], label="Existing Values") - plt.xlabel(column) - plt.ylabel(null_column) - plt.legend() - if save: - plt.savefig(f"missing_values[{column}:{null_column}].png") - else: - plt.show() - plt.close() - -def plot_missing_values(column): - plt.plot([], [], color="red", alpha=0.4, label="Missing Values") - for value in column: - plt.axvline(x=value, color="red", alpha=0.4) - -def correlation_matrix(data_frame, save=False): - matrix = data_frame.corr() - plt.figure() - sns.heatmap(matrix, annot=True) - plt.title("Correlation Matrix of Existing Features") - - if save: - plt.savefig("correlation_matrix.png") - else: - plt.show() - plt.close() - -def imputation_plots(data_frame, imputed_data, null_column, columns, save=False): - not_missing_data = data_frame.dropna() - for column in columns: - plt.figure() - plt.scatter(imputed_data[column], imputed_data[null_column], color=[[0, 0.502, 0, 0.4]], label="Imputed Data") - plt.scatter(not_missing_data[column], not_missing_data[null_column], colot=[[0.502, 0, 0.502, 0,4]], label="Original Data") - plt.title(f"Scatter Plot of {column} against {null_column} after KNN(9) Imputation") - plt.xlabel(column) - plt.ylabel(column) - plt.legend() - if save: - plt.savefig(f"imputation_results[{column}:{null_column}].png") - else: - plt.show() - plt.close() - -def outlier_box_plots(data_frame, save=False): - for column in data_frame.columns: - plt.figure() - plt.title(f"Box Plot of {column}") - plt.boxplot(data_frame[column]) - plt.ylabel(column) - plt.xticks(rotation=45) - if save: - plt.savefig(f"outlier_box_plot[{column}].png") - else: - plt.show() - plt.close()