From 2b553d78e33d916efac46b32b441f584ec870605 Mon Sep 17 00:00:00 2001
From: r0r-5chach <r0r-5chach.xyz@proton.me>
Date: Thu, 4 Apr 2024 03:47:57 +0100
Subject: [PATCH] init routes

---
 .gitignore                                 |  1 -
 db.py                                      |  0
 docker/docker-compose.yml                  | 17 +++++
 docker/server.Dockerfile                   | 11 ++++
 main.py                                    | 77 ----------------------
 model/model.py                             | 69 +++++++++++++++++++
 preprocessing.py => model/preprocessing.py |  0
 models.py                                  |  9 ---
 routes/application_routes.py               | 11 ++++
 routes/authentication_routes.py            | 15 +++++
 routes/bureau_routes.py                    |  7 ++
 routes/referral_routes.py                  | 13 ++++
 routes/user_routes.py                      | 15 +++++
 setup.py                                   | 15 ++---
 visualisations.py                          | 66 -------------------
 15 files changed, 165 insertions(+), 161 deletions(-)
 create mode 100644 db.py
 create mode 100644 docker/docker-compose.yml
 create mode 100644 docker/server.Dockerfile
 delete mode 100644 main.py
 create mode 100644 model/model.py
 rename preprocessing.py => model/preprocessing.py (100%)
 delete mode 100644 models.py
 create mode 100644 routes/application_routes.py
 create mode 100644 routes/authentication_routes.py
 create mode 100644 routes/bureau_routes.py
 create mode 100644 routes/referral_routes.py
 create mode 100644 routes/user_routes.py
 delete mode 100644 visualisations.py

diff --git a/.gitignore b/.gitignore
index 8728440..6964186 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,3 @@ __pycache__
 build
 barclays_credit_classifier.egg-info
 .env
-
diff --git a/db.py b/db.py
new file mode 100644
index 0000000..e69de29
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..d45e8b2
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,17 @@
+services:
+
+  mongo:
+    image: mongo
+    restart: always
+    environment:
+      MONGO_INITDB_ROOT_USERNAME: root
+      MONGO_INITDB_ROOT_PASSWORD: example
+    ports:
+      - 27017:27017
+
+  server:
+    build:
+      context: .
+      dockerfile: server.Dockerfile
+    ports:
+      - 12345:12345
diff --git a/docker/server.Dockerfile b/docker/server.Dockerfile
new file mode 100644
index 0000000..be4acc4
--- /dev/null
+++ b/docker/server.Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3
+
+WORKDIR $HOME/projects/uni/barclays_challenge_event_2024/
+
+ADD setup.py ./
+ADD routes/ ./
+ADD model/ ./
+ADD main.py ./
+ADD db.py ./
+ADD credit_risk_dataset.csv ./
+RUN pip install .
diff --git a/main.py b/main.py
deleted file mode 100644
index a24d9cd..0000000
--- a/main.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import models
-import pandas as pd
-import preprocessing as pp
-from sklearn.metrics import accuracy_score
-
-# Load Data
-credit_risk = pd.read_csv("credit_risk_dataset.csv")
-
-# Feature Addition
-
-
-# Feature Conversion
-
-person_home_ownership_values = {
-    "RENT": 1,
-    "MORTGAGE": 2,
-    "OWN": 3,
-    "OTHER": 4,
-}
-loan_intent_values = {
-    "EDUCATIONAL": 1,
-    "MEDICAL": 2,
-    "VENTURE": 3,
-    "PERSONAL": 4,
-    "DEBTCONSOLIDATION": 5
-}
-loan_grade_values = {
-    "A": 1,
-    "B": 2,
-    "C": 3,
-    "D": 4,
-    "E": 5
-}
-cb_person_default_on_file_values = {
-    "Y": 1,
-    "N": 0,
-}
-
-# Convert categorical column to a numerical column
-credit_risk["person_home_ownership"] = credit_risk["person_home_ownership"].map(person_home_ownership_values)
-credit_risk["loan_intent"] = credit_risk["loan_intent"].map(loan_intent_values)
-credit_risk["loan_grade"] = credit_risk["loan_grade"].map(loan_grade_values)
-credit_risk["cb_person_default_on_file"] = credit_risk["cb_person_default_on_file"].map(cb_person_default_on_file_values)
-
-print("Feature Conversion Complete")
-
-# Feature Removal
-# columns_for_removal = ["housing_median_age", "total_rooms", "total_bedrooms"]
-# for column in columns_for_removal:
-#    housing.drop(column, axis=1, inplace=True)
-
-# Preprocessing
-credit_risk = pp.impute_missing_values(credit_risk)  # Handle missing values
-print("Missing Values handling Complete")
-# housing = pp.remove_outliers(housing) #Remove outliers
-
-# Training and Testing Preperation
-training_features, training_target_value, test_features, test_target_value = pp.training_test_split(credit_risk, "loan_status")  # Split the data into Training and Test sets
-print("Training and Test features split Complete")
-
-# Normalise the data
-training_features, test_features = pp.normalise(training_features,
-                                                test_features)
-print("Normalisation Complete")
-# Init Models
-rf_model = models.random_forest_classifier(training_features,
-                                           training_target_value)
-print("Model Init Complete")
-
-# Get Predictions
-rf_predictions = rf_model.predict(test_features)
-print("Predictions Complete")
-
-# Compare Results
-accuracy = accuracy_score(test_target_value, rf_predictions)
-print(f"Accuracy: {accuracy}")
-print(rf_predictions)
diff --git a/model/model.py b/model/model.py
new file mode 100644
index 0000000..46151d4
--- /dev/null
+++ b/model/model.py
@@ -0,0 +1,69 @@
+import pandas as pd
+import preprocessing as pp
+from sklearn.ensemble import RandomForestClassifier
+
+# Feature Conversion Values
+categorical_columns = ["person_home_ownership",
+                       "loan_intent",
+                       "loan_grade",
+                       "cb_person_default_on_file"]
+person_home_ownership_values = {
+    "RENT": 1,
+    "MORTGAGE": 2,
+    "OWN": 3,
+    "OTHER": 4,
+}
+loan_intent_values = {
+    "EDUCATIONAL": 1,
+    "MEDICAL": 2,
+    "VENTURE": 3,
+    "PERSONAL": 4,
+    "DEBTCONSOLIDATION": 5
+    }
+loan_grade_values = {
+    "A": 1,
+    "B": 2,
+    "C": 3,
+    "D": 4,
+    "E": 5
+}
+cb_person_default_on_file_values = {
+    "Y": 1,
+    "N": 0,
+}
+
+
+def get_default_predictor():
+    data = pd.from_csv("credit_risk_dataset.csv")
+    data = convert_categories(data)
+
+    # Imputation
+    data = pp.impute_missing_values(data)
+
+    # Training and Testing Preperation
+    train_features, train_target, test_features, test_target = pp.training_test_split(data, "loan_status")
+
+    # Normalise the data
+    training_features, test_features = pp.normalise(train_features,
+                                                    test_features)
+    # Init Models
+    rf_model = random_forest_classifier(training_features,
+                                        train_target)
+    return rf_model
+
+
+def random_forest_classifier(training_features, training_target):
+    model = RandomForestClassifier(max_features="log2",
+                                   random_state=79,
+                                   n_jobs=-1)
+    return model
+
+
+def convert_categories(data):
+    data["person_home_ownership"] = data["person_home_ownership"].map(
+            person_home_ownership_values)
+    data["loan_intent"] = data["loan_intent"].map(loan_intent_values)
+    data["loan_grade"] = data["loan_grade"].map(loan_grade_values)
+    data["cb_person_default_on_file"] = data["cb_person_default_on_file"].map(
+            cb_person_default_on_file_values)
+    return data
diff --git a/preprocessing.py b/model/preprocessing.py
similarity index 100%
rename from preprocessing.py
rename to model/preprocessing.py
diff --git a/models.py b/models.py
deleted file mode 100644
index 52fb724..0000000
--- a/models.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from sklearn.ensemble import RandomForestClassifier
-
-
-def random_forest_classifier(training_features, training_target):
-    model = RandomForestClassifier(max_features="log2",
-                                   random_state=79,
-                                   n_jobs=-1)
-    model.fit(training_features, training_target)
-    return model
diff --git a/routes/application_routes.py b/routes/application_routes.py
new file mode 100644
index 0000000..a796f0a
--- /dev/null
+++ b/routes/application_routes.py
@@ -0,0 +1,11 @@
+from flask import Blueprint
+
+blueprint = Blueprint("user", __name__)
+
+@blueprint.route("/api/user", methods=["POST"])
+
+@blueprint.route("/api/user", methods=["PATCH"])
+
+@blueprint.route("/api/user", methods=["GET"])
+
+@blueprint.route("/api/user", methods=["OPTIONS"])
diff --git a/routes/authentication_routes.py b/routes/authentication_routes.py
new file mode 100644
index 0000000..54b100f
--- /dev/null
+++ b/routes/authentication_routes.py
@@ -0,0 +1,15 @@
+from flask import Blueprint, request
+
+blueprint = Blueprint("application", __name__)
+
+@blueprint.route("/api/application/apply", methods=["POST"])
+def process_application():
+    req = request.json
+
+
+@blueprint.route("/api/application", methods=["PATCH"])
+@blueprint.route("/api/application", methods=["OPTIONS"])
+
+@blueprint.route("/api/application", methods=["GET"])
+def authenticate_user():
+    req = request.json
diff --git a/routes/bureau_routes.py b/routes/bureau_routes.py
new file mode 100644
index 0000000..5ce8ddb
--- /dev/null
+++ b/routes/bureau_routes.py
@@ -0,0 +1,7 @@
+from flask import Blueprint
+
+blueprint = Blueprint("user", __name__)
+
+@blueprint.route("/api/user", methods=["GET"])
+
+@blueprint.route("/api/user", methods=["OPTIONS"])
diff --git a/routes/referral_routes.py b/routes/referral_routes.py
new file mode 100644
index 0000000..7f333e9
--- /dev/null
+++ b/routes/referral_routes.py
@@ -0,0 +1,13 @@
+from flask import Blueprint
+
+blueprint = Blueprint("user", __name__)
+
+@blueprint.route("/api/referral", methods=["POST"])
+
+@blueprint.route("/api/referral", methods=["PATCH"])
+
+@blueprint.route("/api/referral", methods=["DELETE"])
+
+@blueprint.route("/api/referral", methods=["GET"])
+
+@blueprint.route("/api/referral", methods=["OPTIONS"])
diff --git a/routes/user_routes.py b/routes/user_routes.py
new file mode 100644
index 0000000..a4c055d
--- /dev/null
+++ b/routes/user_routes.py
@@ -0,0 +1,15 @@
+from flask import Blueprint
+
+blueprint = Blueprint("user", __name__)
+
+@blueprint.route("/api/user", methods=["POST"])
+
+@blueprint.route("/api/user", methods=["PUT"])
+
+@blueprint.route("/api/user", methods=["PATCH"])
+
+@blueprint.route("/api/user", methods=["DELETE"])
+
+@blueprint.route("/api/user", methods=["GET"])
+
+@blueprint.route("/api/user", methods=["OPTIONS"])
diff --git a/setup.py b/setup.py
index 74a9e19..59164c0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,18 +1,17 @@
 from setuptools import setup, find_packages
 
 setup(
-        name="barclays_credit_classifier",
+        name="Credit Assessment",
         version="1.0.0",
-        description="Predicts whether someone will default on their loan. Uses the Credit Risk Dataset from Kaggle",
-        author="r0r-5chach", 
+        description="ML Assisted Credit Assessment",
+        author="r0r-5chach",
         author_email="r0r-5chach.xyz@proton.me",
         packages=find_packages(),
         install_requires=[
-            "numpy",
-            "scipy",
-            "matplotlib",
-            "pandas",
+            "asyncio",
+            "Flask",
             "scikit-learn",
-            "seaborn"
+            "pandas",
+            "pymongo"
             ]
         )
diff --git a/visualisations.py b/visualisations.py
deleted file mode 100644
index 9906c3f..0000000
--- a/visualisations.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-def missing_value_pairwise_plots(data_frame, null_column, save=False):
-    not_missing_data = data_frame.dropna()
-    mising_data = data_frame[data_frame[null_column].isnull()]
-    for column in data_frame.columns:
-        if column != null_column:
-            plt.figure()
-            plt.title(f"Scatter Plot of {column} against {null_column}")
-            plot_missing_values(column)
-            plt.scatter(not_missing_data[column], not_missing_data[null_column], color=[[0.502, 0, 0.502, 0.4]], label="Existing Values")
-            plt.xlabel(column)
-            plt.ylabel(null_column)
-            plt.legend()
-            if save:
-                plt.savefig(f"missing_values[{column}:{null_column}].png")
-            else:
-                plt.show()
-            plt.close()
-
-def plot_missing_values(column):
-    plt.plot([], [], color="red", alpha=0.4, label="Missing Values")
-    for value in column:
-        plt.axvline(x=value, color="red", alpha=0.4)
-
-def correlation_matrix(data_frame, save=False):
-    matrix = data_frame.corr()
-    plt.figure()
-    sns.heatmap(matrix, annot=True)
-    plt.title("Correlation Matrix of Existing Features")
-    
-    if save:
-        plt.savefig("correlation_matrix.png")
-    else:
-        plt.show()
-    plt.close()
-
-def imputation_plots(data_frame, imputed_data, null_column, columns, save=False):
-    not_missing_data = data_frame.dropna()
-    for column in columns:
-        plt.figure()
-        plt.scatter(imputed_data[column], imputed_data[null_column], color=[[0, 0.502, 0, 0.4]], label="Imputed Data")
-        plt.scatter(not_missing_data[column], not_missing_data[null_column], colot=[[0.502, 0, 0.502, 0,4]], label="Original Data")
-        plt.title(f"Scatter Plot of {column} against {null_column} after KNN(9) Imputation")
-        plt.xlabel(column)
-        plt.ylabel(column)
-        plt.legend()
-        if save:
-            plt.savefig(f"imputation_results[{column}:{null_column}].png")
-        else:
-            plt.show()
-        plt.close()
-
-def outlier_box_plots(data_frame, save=False):
-    for column in data_frame.columns:
-        plt.figure()
-        plt.title(f"Box Plot of {column}")
-        plt.boxplot(data_frame[column])
-        plt.ylabel(column)
-        plt.xticks(rotation=45)
-        if save:
-            plt.savefig(f"outlier_box_plot[{column}].png")
-        else:
-            plt.show()
-        plt.close()