end of challenge
This commit is contained in:
parent
ef79c30e8c
commit
26968d7545
|
|
@ -1 +1,5 @@
|
||||||
/target
|
/target
|
||||||
|
__pycache__
|
||||||
|
build
|
||||||
|
credit_risk_imputation.egg-info
|
||||||
|
.env
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,15 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "approx"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f2a05fd1bd10b2527e20a2cd32d8873d115b8b39fe219ee25f42a8aca6ba278"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "approx"
|
name = "approx"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
|
|
@ -94,7 +103,7 @@ checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
|
||||||
name = "barclays"
|
name = "barclays"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"lazy_static",
|
"linfa-preprocessing",
|
||||||
"ndarray",
|
"ndarray",
|
||||||
"polars",
|
"polars",
|
||||||
"smartcore",
|
"smartcore",
|
||||||
|
|
@ -295,6 +304,70 @@ version = "1.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
|
checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding"
|
||||||
|
version = "0.2.33"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
|
||||||
|
dependencies = [
|
||||||
|
"encoding-index-japanese",
|
||||||
|
"encoding-index-korean",
|
||||||
|
"encoding-index-simpchinese",
|
||||||
|
"encoding-index-singlebyte",
|
||||||
|
"encoding-index-tradchinese",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding-index-japanese"
|
||||||
|
version = "1.20141219.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
|
||||||
|
dependencies = [
|
||||||
|
"encoding_index_tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding-index-korean"
|
||||||
|
version = "1.20141219.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
|
||||||
|
dependencies = [
|
||||||
|
"encoding_index_tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding-index-simpchinese"
|
||||||
|
version = "1.20141219.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
|
||||||
|
dependencies = [
|
||||||
|
"encoding_index_tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding-index-singlebyte"
|
||||||
|
version = "1.20141219.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
|
||||||
|
dependencies = [
|
||||||
|
"encoding_index_tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding-index-tradchinese"
|
||||||
|
version = "1.20141219.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
|
||||||
|
dependencies = [
|
||||||
|
"encoding_index_tests",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encoding_index_tests"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "enum_dispatch"
|
name = "enum_dispatch"
|
||||||
version = "0.3.13"
|
version = "0.3.13"
|
||||||
|
|
@ -356,6 +429,12 @@ version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hashbrown"
|
||||||
|
version = "0.12.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashbrown"
|
name = "hashbrown"
|
||||||
version = "0.14.3"
|
version = "0.14.3"
|
||||||
|
|
@ -411,6 +490,16 @@ dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indexmap"
|
||||||
|
version = "1.9.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"hashbrown 0.12.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.2.6"
|
version = "2.2.6"
|
||||||
|
|
@ -418,7 +507,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"equivalent",
|
"equivalent",
|
||||||
"hashbrown",
|
"hashbrown 0.14.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.10.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -451,12 +549,6 @@ dependencies = [
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lazy_static"
|
|
||||||
version = "1.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.153"
|
version = "0.2.153"
|
||||||
|
|
@ -469,6 +561,50 @@ version = "0.2.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linfa"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1cab423110bc374e4cfa915da88952e2c6a4a5a6300ac0a0e68022bff2ace0b3"
|
||||||
|
dependencies = [
|
||||||
|
"approx 0.4.0",
|
||||||
|
"ndarray",
|
||||||
|
"num-traits",
|
||||||
|
"rand",
|
||||||
|
"sprs",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linfa-linalg"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "56e7562b41c8876d3367897067013bb2884cc78e6893f092ecd26b305176ac82"
|
||||||
|
dependencies = [
|
||||||
|
"ndarray",
|
||||||
|
"num-traits",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linfa-preprocessing"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9de00d503ab60e12428b77abeff006d1c4f1dba9d962bbd72bca591706c0a8ff"
|
||||||
|
dependencies = [
|
||||||
|
"approx 0.4.0",
|
||||||
|
"encoding",
|
||||||
|
"linfa",
|
||||||
|
"linfa-linalg",
|
||||||
|
"ndarray",
|
||||||
|
"ndarray-rand",
|
||||||
|
"ndarray-stats",
|
||||||
|
"regex",
|
||||||
|
"sprs",
|
||||||
|
"thiserror",
|
||||||
|
"unicode-normalization",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.11"
|
version = "0.4.11"
|
||||||
|
|
@ -558,6 +694,7 @@ version = "0.15.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
|
checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"approx 0.4.0",
|
||||||
"matrixmultiply",
|
"matrixmultiply",
|
||||||
"num-complex",
|
"num-complex",
|
||||||
"num-integer",
|
"num-integer",
|
||||||
|
|
@ -565,6 +702,41 @@ dependencies = [
|
||||||
"rawpointer",
|
"rawpointer",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ndarray-rand"
|
||||||
|
version = "0.14.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "65608f937acc725f5b164dcf40f4f0bc5d67dc268ab8a649d3002606718c4588"
|
||||||
|
dependencies = [
|
||||||
|
"ndarray",
|
||||||
|
"rand",
|
||||||
|
"rand_distr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ndarray-stats"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af5a8477ac96877b5bd1fd67e0c28736c12943aba24eda92b127e036b0c8f400"
|
||||||
|
dependencies = [
|
||||||
|
"indexmap 1.9.3",
|
||||||
|
"itertools",
|
||||||
|
"ndarray",
|
||||||
|
"noisy_float",
|
||||||
|
"num-integer",
|
||||||
|
"num-traits",
|
||||||
|
"rand",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "noisy_float"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "978fe6e6ebc0bf53de533cd456ca2d9de13de13856eda1518a285d7705a213af"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "now"
|
name = "now"
|
||||||
version = "0.1.3"
|
version = "0.1.3"
|
||||||
|
|
@ -800,7 +972,7 @@ dependencies = [
|
||||||
"fast-float",
|
"fast-float",
|
||||||
"foreign_vec",
|
"foreign_vec",
|
||||||
"getrandom",
|
"getrandom",
|
||||||
"hashbrown",
|
"hashbrown 0.14.3",
|
||||||
"itoa",
|
"itoa",
|
||||||
"itoap",
|
"itoap",
|
||||||
"lz4",
|
"lz4",
|
||||||
|
|
@ -856,8 +1028,8 @@ dependencies = [
|
||||||
"chrono-tz",
|
"chrono-tz",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
"either",
|
"either",
|
||||||
"hashbrown",
|
"hashbrown 0.14.3",
|
||||||
"indexmap",
|
"indexmap 2.2.6",
|
||||||
"ndarray",
|
"ndarray",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
|
@ -954,9 +1126,9 @@ dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"chrono-tz",
|
"chrono-tz",
|
||||||
"either",
|
"either",
|
||||||
"hashbrown",
|
"hashbrown 0.14.3",
|
||||||
"hex",
|
"hex",
|
||||||
"indexmap",
|
"indexmap 2.2.6",
|
||||||
"memchr",
|
"memchr",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"polars-arrow",
|
"polars-arrow",
|
||||||
|
|
@ -999,7 +1171,7 @@ dependencies = [
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"crossbeam-queue",
|
"crossbeam-queue",
|
||||||
"enum_dispatch",
|
"enum_dispatch",
|
||||||
"hashbrown",
|
"hashbrown 0.14.3",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"polars-arrow",
|
"polars-arrow",
|
||||||
"polars-compute",
|
"polars-compute",
|
||||||
|
|
@ -1097,8 +1269,8 @@ checksum = "694656a7d2b0cd8f07660dbc8d0fb7a81066ff57a452264907531d805c1e58c4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"hashbrown",
|
"hashbrown 0.14.3",
|
||||||
"indexmap",
|
"indexmap 2.2.6",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"polars-error",
|
"polars-error",
|
||||||
|
|
@ -1325,7 +1497,7 @@ version = "0.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c42ca1fcd851ada8834d3dfcd088850dc8c703bde50c2baccd89181b74dc3ade"
|
checksum = "c42ca1fcd851ada8834d3dfcd088850dc8c703bde50c2baccd89181b74dc3ade"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"approx",
|
"approx 0.5.1",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"ndarray",
|
"ndarray",
|
||||||
"num",
|
"num",
|
||||||
|
|
@ -1344,6 +1516,18 @@ dependencies = [
|
||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sprs"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "88bab60b0a18fb9b3e0c26e92796b3c3a278bf5fa4880f5ad5cc3bdfb843d0b1"
|
||||||
|
dependencies = [
|
||||||
|
"ndarray",
|
||||||
|
"num-complex",
|
||||||
|
"num-traits",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlparser"
|
name = "sqlparser"
|
||||||
version = "0.39.0"
|
version = "0.39.0"
|
||||||
|
|
@ -1461,12 +1645,36 @@ dependencies = [
|
||||||
"syn 2.0.58",
|
"syn 2.0.58",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec"
|
||||||
|
version = "1.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec_macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinyvec_macros"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.12"
|
version = "1.0.12"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-normalization"
|
||||||
|
version = "0.1.23"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
|
||||||
|
dependencies = [
|
||||||
|
"tinyvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-reverse"
|
name = "unicode-reverse"
|
||||||
version = "1.0.9"
|
version = "1.0.9"
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
lazy_static = "1.4.0"
|
linfa-preprocessing = "0.7.0"
|
||||||
ndarray = "0.15.6"
|
ndarray = "0.15.6"
|
||||||
polars = { version = "0.38.3", features = ["ndarray"] }
|
polars = { version = "0.38.3", features = ["ndarray"] }
|
||||||
smartcore = { version = "0.3.2", features = ["ndarray-bindings"] }
|
smartcore = { version = "0.3.2", features = ["ndarray-bindings"] }
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,40 @@
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.impute import KNNImputer
|
||||||
|
|
||||||
|
dataframe = pd.read_csv("credit_risk_dataset.csv")
|
||||||
|
|
||||||
|
# Feature Conversion Values
|
||||||
|
person_home_ownership_values = {
|
||||||
|
"RENT": 1,
|
||||||
|
"MORTGAGE": 2,
|
||||||
|
"OWN": 3,
|
||||||
|
"OTHER": 4,
|
||||||
|
}
|
||||||
|
loan_intent_values = {
|
||||||
|
"EDUCATIONAL": 1,
|
||||||
|
"MEDICAL": 2,
|
||||||
|
"VENTURE": 3,
|
||||||
|
"PERSONAL": 4,
|
||||||
|
"DEBTCONSOLIDATION": 5
|
||||||
|
}
|
||||||
|
loan_grade_values = {
|
||||||
|
"A": 1,
|
||||||
|
"B": 2,
|
||||||
|
"C": 3,
|
||||||
|
"D": 4,
|
||||||
|
"E": 5
|
||||||
|
}
|
||||||
|
cb_person_default_on_file_values = {
|
||||||
|
"Y": 1,
|
||||||
|
"N": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
dataframe["person_home_ownership"] = dataframe["person_home_ownership"].map(person_home_ownership_values)
|
||||||
|
dataframe["loan_intent"] = dataframe["loan_intent"].map(loan_intent_values)
|
||||||
|
dataframe["loan_grade"] = dataframe["loan_grade"].map(loan_grade_values)
|
||||||
|
dataframe["cb_person_default_on_file"] = dataframe["cb_person_default_on_file"].map(cb_person_default_on_file_values)
|
||||||
|
|
||||||
|
imputer = KNNImputer(n_neighbors=9, weights="uniform", metric="nan_euclidean")
|
||||||
|
imputed_data = imputer.fit_transform(dataframe)
|
||||||
|
pd.DataFrame(imputed_data,
|
||||||
|
columns=dataframe.columns).to_csv("imputed_data.csv", index=False)
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="credit_risk_imputation",
|
||||||
|
version="1.0.0",
|
||||||
|
description="""
|
||||||
|
Credit Risk Assessment Dataset Cleaning using KNN Imputation""",
|
||||||
|
author="r0r-5chach",
|
||||||
|
author_email="r0r-5chach.xyz@proton.me",
|
||||||
|
packages=find_packages(),
|
||||||
|
install_requires=[
|
||||||
|
"pandas",
|
||||||
|
"scikit-learn"
|
||||||
|
]
|
||||||
|
)
|
||||||
58
src/model.rs
58
src/model.rs
|
|
@ -1,53 +1,13 @@
|
||||||
pub mod preprocessing;
|
mod preprocessing;
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use linfa_preprocessing::linear_scaling::LinearScaler;
|
||||||
use lazy_static::lazy_static;
|
use preprocessing::SplitData;
|
||||||
use polars::prelude::*;
|
use smartcore::ensemble::random_forest_classifier::{RandomForestClassifier, RandomForestClassifierParameters};
|
||||||
use smartcore::linalg::basic::matrix::DenseMatrix;
|
|
||||||
|
|
||||||
lazy_static! {
|
pub fn init_classifier() {
|
||||||
static ref CATEGORICAL_COLUMNS: Vec<&'static str> = vec![
|
let split_data = SplitData::new();
|
||||||
"person_home_ownership",
|
let features = split_data.features;
|
||||||
"loan_intent",
|
let target = split_data.target;
|
||||||
"loan_grade",
|
|
||||||
"cb_person_default_on_file",
|
|
||||||
];
|
|
||||||
|
|
||||||
static ref HOME_OWNERSHIP_VALUES: HashMap<&'static str, u32> = HashMap::from([
|
|
||||||
("RENT", 1),
|
|
||||||
("MORTGAGE", 2),
|
|
||||||
("OWN", 3),
|
|
||||||
("OTHER", 4),
|
|
||||||
]);
|
|
||||||
|
|
||||||
static ref INTENT_VALUES: HashMap<&'static str, u32> = HashMap::from([
|
|
||||||
("EDUCATIONAL", 1),
|
|
||||||
("MEDICAL", 2),
|
|
||||||
("VENTURE", 3),
|
|
||||||
("PERSONAL", 4),
|
|
||||||
("DEBTCONSOLIDATION", 5),
|
|
||||||
]);
|
|
||||||
|
|
||||||
static ref GRADE_VALUES: HashMap<&'static str, u32> = HashMap::from([
|
|
||||||
("A", 1),
|
|
||||||
("B", 2),
|
|
||||||
("C", 3),
|
|
||||||
("D", 4),
|
|
||||||
("E", 5),
|
|
||||||
]);
|
|
||||||
|
|
||||||
static ref DEFAULT_FILE_VALUES: HashMap<&'static str, u32> = HashMap::from([
|
|
||||||
("Y", 1),
|
|
||||||
("N", 2),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn init_default_predictor() {
|
|
||||||
let data = CsvReader::from_path("credit_risk_dataset.csv")
|
|
||||||
.unwrap()
|
|
||||||
.finish()
|
|
||||||
.unwrap()
|
|
||||||
.to_ndarray::<Float64Type>(IndexOrder::Fortran)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
|
let classifier = RandomForestClassifier::fit(&features.train, &target.train, Default::default()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,80 @@
|
||||||
use smartcore::{metrics::distance::euclidian::Euclidian, neighbors::knn_regressor::{KNNRegressor, KNNRegressorParameters}};
|
|
||||||
use ndarray::{Array1,Array2};
|
|
||||||
|
|
||||||
pub struct KNNImputer {
|
use linfa_preprocessing::linear_scaling::{LinearScaler, LinearScalerParams};
|
||||||
model: KNNRegressor<f64,f64,Array2<f64>,Array1<f64>,Euclidian<f64>>
|
use ndarray::{Array1,Array2, Axis};
|
||||||
|
use polars::prelude::*;
|
||||||
|
use smartcore::model_selection::train_test_split;
|
||||||
|
|
||||||
|
const FILE: &str = "imputed_data.csv";
|
||||||
|
const TARGET: &str = "loan_status";
|
||||||
|
|
||||||
|
struct Data {
|
||||||
|
features: Array2<f64>,
|
||||||
|
target: Array1<f64>,
|
||||||
|
split_data: Option<SplitData>,
|
||||||
}
|
}
|
||||||
|
impl Data {
|
||||||
|
fn new() -> Self {
|
||||||
|
let dataframe = import_data();
|
||||||
|
let target_index = dataframe.get_column_index(TARGET).unwrap();
|
||||||
|
|
||||||
impl KNNImputer {
|
let mut dataframe = dataframe.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap();
|
||||||
pub fn new(dataframe: ndarray::Array2<f64>, target_column: usize) -> Self {
|
|
||||||
let true_values: Array1<f64> = dataframe.column(target_column).to_vec().into();
|
let target = dataframe.index_axis(Axis(1), target_index).to_owned();
|
||||||
KNNImputer {
|
dataframe.remove_index(Axis(1), target_index);
|
||||||
model: KNNRegressor::fit(&dataframe, &true_values.into(), Default::default()).unwrap(),
|
|
||||||
}
|
Data {
|
||||||
|
features: dataframe,
|
||||||
|
target,
|
||||||
|
split_data: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn impute(&self) -> bool {
|
fn split_data(&self) -> SplitData {
|
||||||
//TODO: Predict value,
|
let (features_train, features_test,
|
||||||
|
target_train, target_test) = train_test_split(&self.features, &self.target,
|
||||||
|
0.75, true, Some(79));
|
||||||
|
|
||||||
|
|
||||||
|
let features = Features{
|
||||||
|
train: scaler.transform(features_train),
|
||||||
|
test: scaler.transform(features_test)
|
||||||
|
};
|
||||||
|
let target = Target{
|
||||||
|
train: scaler.transform(target_train),
|
||||||
|
test: scaler.transform(target_test)
|
||||||
|
};
|
||||||
|
|
||||||
|
SplitData {
|
||||||
|
features,
|
||||||
|
target,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct SplitData {
|
||||||
|
pub features: Features,
|
||||||
|
pub target:Target,
|
||||||
|
}
|
||||||
|
impl SplitData {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let data = Data::new();
|
||||||
|
data.split_data()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Features {
|
||||||
|
pub train: Array2<f64>,
|
||||||
|
pub test: Array2<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Target {
|
||||||
|
pub train: Array1<f64>,
|
||||||
|
pub test: Array1<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn import_data() -> DataFrame {
|
||||||
|
CsvReader::from_path(FILE)
|
||||||
|
.unwrap()
|
||||||
|
.finish()
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue