using Jchemo, JchemoData using JLD2, CairoMakie
path_jdat = dirname(dirname(pathof(JchemoData))) db = joinpath(path_jdat, "data/tecator.jld2") @load db dat @names dat
(:X, :Y)
X = dat.X @head X
... (178, 100)
Row | 850 | 852 | 854 | 856 | 858 | 860 | 862 | 864 | 866 | 868 | 870 | 872 | 874 | 876 | 878 | 880 | 882 | 884 | 886 | 888 | 890 | 892 | 894 | 896 | 898 | 900 | 902 | 904 | 906 | 908 | 910 | 912 | 914 | 916 | 918 | 920 | 922 | 924 | 926 | 928 | 930 | 932 | 934 | 936 | 938 | 940 | 942 | 944 | 946 | 948 | 950 | 952 | 954 | 956 | 958 | 960 | 962 | 964 | 966 | 968 | 970 | 972 | 974 | 976 | 978 | 980 | 982 | 984 | 986 | 988 | 990 | 992 | 994 | 996 | 998 | 1000 | 1002 | 1004 | 1006 | 1008 | 1010 | 1012 | 1014 | 1016 | 1018 | 1020 | 1022 | 1024 | 1026 | 1028 | 1030 | 1032 | 1034 | 1036 | 1038 | 1040 | 1042 | 1044 | 1046 | 1048 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | |
1 | 2.61776 | 2.61814 | 2.61859 | 2.61912 | 2.61981 | 2.62071 | 2.62186 | 2.62334 | 2.62511 | 2.62722 | 2.62964 | 2.63245 | 2.63565 | 2.63933 | 2.64353 | 2.64825 | 2.6535 | 2.65937 | 2.66585 | 2.67281 | 2.68008 | 2.68733 | 2.69427 | 2.70073 | 2.70684 | 2.71281 | 2.71914 | 2.72628 | 2.73462 | 2.74416 | 2.75466 | 2.76568 | 2.77679 | 2.7879 | 2.79949 | 2.81225 | 2.82706 | 2.84356 | 2.86106 | 2.87857 | 2.89497 | 2.90924 | 2.92085 | 2.93015 | 2.93846 | 2.94771 | 2.96019 | 2.97831 | 3.00306 | 3.03506 | 3.07428 | 3.11963 | 3.16868 | 3.21771 | 3.26254 | 3.29988 | 3.32847 | 3.34899 | 3.36342 | 3.37379 | 3.38152 | 3.38741 | 3.39164 | 3.39418 | 3.3949 | 3.39366 | 3.39045 | 3.38541 | 3.37869 | 3.37041 | 3.36073 | 3.34979 | 3.33769 | 3.32443 | 3.31013 | 3.29487 | 3.27891 | 3.26232 | 3.24542 | 3.22828 | 3.2108 | 3.19287 | 3.17433 | 3.15503 | 3.13475 | 3.11339 | 3.09116 | 3.0685 | 3.04596 | 3.02393 | 3.00247 | 2.98145 | 2.96072 | 2.94013 | 2.91978 | 2.89966 | 2.87964 | 2.8596 | 2.8394 | 2.8192 |
2 | 2.83454 | 2.83871 | 2.84283 | 2.84705 | 2.85138 | 2.85587 | 2.8606 | 2.86566 | 2.87093 | 2.87661 | 2.88264 | 2.88898 | 2.89577 | 2.90308 | 2.91097 | 2.91953 | 2.92873 | 2.93863 | 2.94929 | 2.96072 | 2.97272 | 2.98493 | 2.9969 | 3.00833 | 3.0192 | 3.0299 | 3.04101 | 3.05345 | 3.06777 | 3.08416 | 3.10221 | 3.12106 | 3.13983 | 3.1581 | 3.17623 | 3.19519 | 3.21584 | 3.23747 | 3.25889 | 3.27835 | 3.29384 | 3.30362 | 3.30681 | 3.30393 | 3.297 | 3.28925 | 3.28409 | 3.28505 | 3.29326 | 3.30923 | 3.33267 | 3.36251 | 3.39661 | 3.43188 | 3.46492 | 3.49295 | 3.51458 | 3.53004 | 3.54067 | 3.54797 | 3.55306 | 3.55675 | 3.55921 | 3.56045 | 3.56034 | 3.55876 | 3.55571 | 3.55132 | 3.54585 | 3.5395 | 3.53235 | 3.52442 | 3.51583 | 3.50668 | 3.497 | 3.48683 | 3.47626 | 3.46552 | 3.45501 | 3.44481 | 3.43477 | 3.42465 | 3.41419 | 3.40303 | 3.39082 | 3.37731 | 3.36265 | 3.34745 | 3.33245 | 3.31818 | 3.30473 | 3.29186 | 3.27921 | 3.26655 | 3.25369 | 3.24045 | 3.22659 | 3.21181 | 3.196 | 3.17942 |
3 | 2.58284 | 2.58458 | 2.58629 | 2.58808 | 2.58996 | 2.59192 | 2.59401 | 2.59627 | 2.59873 | 2.60131 | 2.60414 | 2.60714 | 2.61029 | 2.61361 | 2.61714 | 2.62089 | 2.62486 | 2.62909 | 2.63361 | 2.63835 | 2.6433 | 2.64838 | 2.65354 | 2.6587 | 2.66375 | 2.6688 | 2.67383 | 2.67892 | 2.68411 | 2.68937 | 2.6947 | 2.70012 | 2.70563 | 2.71141 | 2.71775 | 2.7249 | 2.73344 | 2.74327 | 2.75433 | 2.76642 | 2.77931 | 2.79272 | 2.80649 | 2.82064 | 2.83541 | 2.85121 | 2.86872 | 2.88905 | 2.91289 | 2.94088 | 2.97325 | 3.00946 | 3.0478 | 3.08554 | 3.11947 | 3.14696 | 3.16677 | 3.17938 | 3.18631 | 3.18924 | 3.1895 | 3.18801 | 3.18498 | 3.18039 | 3.17411 | 3.16611 | 3.15641 | 3.14512 | 3.13241 | 3.11843 | 3.10329 | 3.08714 | 3.07014 | 3.05237 | 3.03393 | 3.01504 | 2.99569 | 2.97612 | 2.95642 | 2.9366 | 2.91667 | 2.89655 | 2.87622 | 2.85563 | 2.83474 | 2.81361 | 2.79235 | 2.77113 | 2.75015 | 2.72956 | 2.70934 | 2.68951 | 2.67009 | 2.65112 | 2.63262 | 2.61461 | 2.59718 | 2.58034 | 2.56404 | 2.54816 |
Y = dat.Y @head Y
... (178, 4)
Row | water | fat | protein | typ |
---|---|---|---|---|
Float64 | Float64 | Float64 | String | |
1 | 60.5 | 22.5 | 16.7 | train |
2 | 46.0 | 40.1 | 13.5 | train |
3 | 71.0 | 8.4 | 20.5 | train |
typ = Y.typ tab(typ)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "test" => 31 "train" => 115 "val" => 32
wlst = names(X) wl = parse.(Float64, wlst) #plotsp(X, wl; xlabel = "Wavelength (nm)", ylabel = "Absorbance").f
100-element Vector{Float64}: 850.0 852.0 854.0 856.0 858.0 860.0 862.0 864.0 866.0 868.0 ⋮ 1032.0 1034.0 1036.0 1038.0 1040.0 1042.0 1044.0 1046.0 1048.0
Preprocessing
model1 = snv() model2 = savgol(npoint = 15, deriv = 2, degree = 3) model = pip(model1, model2) fit!(model, X) @head Xp = transf(model, X) #plotsp(Xp, wl; xlabel = "Wavelength (nm)", ylabel = "Absorbance").f
3×100 Matrix{Float64}: 0.000397076 0.000495203 0.000598623 … 0.00827138 0.00917311 0.00946072 0.00242055 0.00244366 0.00234233 0.00580631 0.00689249 0.00749316 0.0011927 0.00122721 0.00120098 0.0101019 0.0108142 0.0108444 ... (178, 100)
s = typ .== "train" Xtrain = Xp[s, :] Ytrain = Y[s, :] Xtest = rmrow(Xp, s) Ytest = rmrow(Y, s) ntrain = nro(Xtrain) ntest = nro(Xtest) ntot = ntrain + ntest (ntot = ntot, ntrain, ntest)
(ntot = 178, ntrain = 115, ntest = 63)
Working response y
namy = names(Y)[1:3] j = 2 nam = namy[j] # work on the j-th y-variable ytrain = Ytrain[:, nam] ytest = Ytest[:, nam]
63-element Vector{Float64}: 29.8 1.4 4.6 11.0 17.0 22.4 27.9 46.5 6.1 2.0 ⋮ 18.1 19.4 24.8 27.2 28.4 31.3 33.8 35.5 42.5
gamma = 100 nlv = 15 model = kplsr(; gamma, nlv) # For direct KPLSR (Bennett & Embrechts, 2003), use as below: #model = dkplsr(; gamma, nlv) fit!(model, Xtrain, ytrain) @names model fitm = model.fitm @names fitm
(:X, :Kt, :T, :C, :U, :R, :DKt, :vtot, :xscales, :ymeans, :yscales, :weights, :iter, :kwargs, :par)
@head pred = predict(model, Xtest).pred
3×1 Matrix{Float64}: 30.27263234256255 1.2491123455562665 4.4710390881702065 ... (63, 1)
@head predict(model, Xtest; nlv = 2).pred
3×1 Matrix{Float64}: 29.32135166200425 2.162005442116719 5.685860978803078 ... (63, 1)
predict(model, Xtest; nlv = 0:2).pred
3-element Vector{Matrix{Float64}}: [18.358260869565218; 18.358260869565218; … ; 18.358260869565218; 18.358260869565218;;] [26.864333587051988; 4.1217670557698955; … ; 36.33040026195786; 45.001339268847815;;] [29.32135166200425; 2.162005442116719; … ; 39.208838218241105; 46.42134811761902;;]
rmsep(pred, ytest)
1×1 Matrix{Float64}: 0.5511112753976203
bias(pred, ytest)
1×1 Matrix{Float64}: -0.15413101976451102
mse(pred, ytest)
Row | nam | rmsep | sep | bias | cor2 | r2 | rpd | rpdr | mean | n |
---|---|---|---|---|---|---|---|---|---|---|
String | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Int64 | |
1 | y1 | 0.551 | 0.529 | -0.154 | 0.998 | 0.998 | 22.995 | 29.673 | 18.414 | 63 |
@head r = residreg(pred, ytest) # residuals
3×1 Matrix{Float64}: -0.47263234256254805 0.15088765444373342 0.1289609118297932 ... (63, 1)
plotxy(pred, ytest; size = (500, 400), color = (:red, .5), bisect = true, title = string("Test set - variable ", nam), xlabel = "Prediction", ylabel = "Observed").f
Residuals
plotxy(ytest, r; size = (500, 400), color = (:red, .5), zeros = true, title = string("Test set - variable ", nam), xlabel = "Observed (Test)", ylabel = "Residuals").f
zpred = vec(pred) zr = vec(r)
63-element Vector{Float64}: -0.47263234256254805 0.15088765444373342 0.1289609118297932 -0.010875159198107553 1.4489775124476338 0.290389476158623 0.2844942468389391 -0.06310162831640298 0.22063704374842352 0.24732308729945274 ⋮ 0.12741438205125277 -0.1781680431607029 0.9736514052289138 -0.3613955588708784 -0.02422859343892725 1.34464900017079 0.09247884206364887 -0.5730256984462514 0.16607155085765157
Predictions
model_lo = loessr(span = 1/2) fit!(model_lo, zpred, ytest) pred_lo = predict(model_lo, sort(zpred)).pred f, ax = plotxy(zpred, ytest; size = (500, 400), bisect = true, title = string("Test set - variable ", nam), xlabel = "Predicted", ylabel = "Observed") lines!(ax, sort(zpred), vec(pred_lo); color = :red) f
Residuals
model_lo = loessr(span = 1/2) fit!(model_lo, zpred, zr) pred_lo = predict(model_lo, sort(zpred)).pred f, ax = plotxy(zpred, zr; size = (500, 400), title = string("Test set - variable ", nam), xlabel = "Predictions", ylabel = "Residuals") lines!(ax, sort(zpred), vec(pred_lo); color = :red) hlines!(ax, 0; color = :grey) f