using Jchemo, JchemoData using JLD2, CairoMakie using FreqTables
path_jdat = dirname(dirname(pathof(JchemoData))) db = joinpath(path_jdat, "data/forages2.jld2") @load db dat @names dat
(:X, :Y)
X = dat.X @head X
... (485, 700)
Row | 1100 | 1102 | 1104 | 1106 | 1108 | 1110 | 1112 | 1114 | 1116 | 1118 | 1120 | 1122 | 1124 | 1126 | 1128 | 1130 | 1132 | 1134 | 1136 | 1138 | 1140 | 1142 | 1144 | 1146 | 1148 | 1150 | 1152 | 1154 | 1156 | 1158 | 1160 | 1162 | 1164 | 1166 | 1168 | 1170 | 1172 | 1174 | 1176 | 1178 | 1180 | 1182 | 1184 | 1186 | 1188 | 1190 | 1192 | 1194 | 1196 | 1198 | 1200 | 1202 | 1204 | 1206 | 1208 | 1210 | 1212 | 1214 | 1216 | 1218 | 1220 | 1222 | 1224 | 1226 | 1228 | 1230 | 1232 | 1234 | 1236 | 1238 | 1240 | 1242 | 1244 | 1246 | 1248 | 1250 | 1252 | 1254 | 1256 | 1258 | 1260 | 1262 | 1264 | 1266 | 1268 | 1270 | 1272 | 1274 | 1276 | 1278 | 1280 | 1282 | 1284 | 1286 | 1288 | 1290 | 1292 | 1294 | 1296 | 1298 | ⋯ |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | ⋯ | |
1 | -0.000231591 | -0.000175945 | -8.48176e-5 | 2.05217e-5 | 0.000110094 | 0.000161757 | 0.000154953 | 0.000163754 | 0.000187602 | 0.00021499 | 0.000242479 | 0.000265498 | 0.000282141 | 0.000281442 | 0.000271025 | 0.000261075 | 0.000257284 | 0.000252177 | 0.00024293 | 0.000228295 | 0.000219097 | 0.000214136 | 0.000215612 | 0.000218982 | 0.000228004 | 0.000236081 | 0.000236017 | 0.000220327 | 0.000187096 | 0.000137138 | 7.68593e-5 | 1.13679e-5 | -5.00951e-5 | -9.54664e-5 | -0.000119199 | -0.000131897 | -0.000142349 | -0.000161489 | -0.00019387 | -0.000244808 | -0.000303259 | -0.000366904 | -0.000416738 | -0.000451535 | -0.00046995 | -0.000478637 | -0.000477348 | -0.000478142 | -0.000476719 | -0.000479701 | -0.000482037 | -0.000496769 | -0.000511959 | -0.000532094 | -0.000542661 | -0.000540188 | -0.000512715 | -0.00045798 | -0.000370395 | -0.000256256 | -0.000126907 | 1.13716e-6 | 0.000119047 | 0.000212745 | 0.000275685 | 0.000307863 | 0.000313547 | 0.000296977 | 0.000269661 | 0.000247818 | 0.000233944 | 0.000228773 | 0.000224567 | 0.000221256 | 0.000218893 | 0.000217741 | 0.000210144 | 0.00019664 | 0.000181949 | 0.000169774 | 0.000151691 | 0.00012385 | 9.23378e-5 | 5.9959e-5 | 2.58352e-5 | -4.77314e-6 | -3.21835e-5 | -5.53154e-5 | -6.71707e-5 | -6.54166e-5 | -5.16448e-5 | -2.43366e-5 | 1.12255e-5 | 4.68917e-5 | 7.773e-5 | 0.000106785 | 0.000133173 | 0.000153607 | 0.000168518 | 0.000182591 | ⋯ |
2 | -9.66352e-5 | -3.30928e-5 | 5.64966e-5 | 0.000154135 | 0.000237725 | 0.000295789 | 0.000319587 | 0.000357405 | 0.000404611 | 0.000447996 | 0.000479786 | 0.000488339 | 0.000465929 | 0.000402301 | 0.000313648 | 0.000220226 | 0.000138483 | 7.35084e-5 | 3.50018e-5 | 2.83293e-5 | 6.05478e-5 | 0.000118272 | 0.000187726 | 0.000249842 | 0.00029697 | 0.000315062 | 0.000298828 | 0.000251643 | 0.000187055 | 0.000118243 | 5.60849e-5 | 3.8727e-6 | -3.28778e-5 | -4.84688e-5 | -4.38912e-5 | -3.34954e-5 | -2.72637e-5 | -3.65483e-5 | -6.62949e-5 | -0.000121833 | -0.000193587 | -0.000280244 | -0.000362132 | -0.000434981 | -0.000494461 | -0.000546531 | -0.000590606 | -0.000638514 | -0.000684688 | -0.000734688 | -0.000783664 | -0.000842714 | -0.000892596 | -0.000930301 | -0.000938118 | -0.000913585 | -0.000846217 | -0.000737781 | -0.000588122 | -0.000410395 | -0.000220611 | -3.69382e-5 | 0.000131072 | 0.000266078 | 0.000358377 | 0.000408684 | 0.000424528 | 0.000412147 | 0.000383896 | 0.000357957 | 0.000338385 | 0.000326749 | 0.000315572 | 0.00030542 | 0.000293671 | 0.000280005 | 0.000259482 | 0.000233697 | 0.0002044 | 0.000177199 | 0.000147989 | 0.000112325 | 7.33317e-5 | 3.48779e-5 | -2.5229e-6 | -3.27922e-5 | -5.52233e-5 | -7.06412e-5 | -7.49675e-5 | -6.44041e-5 | -4.04393e-5 | -6.50489e-6 | 3.09196e-5 | 6.87358e-5 | 0.000105202 | 0.000142313 | 0.000177182 | 0.000206652 | 0.000230788 | 0.000253703 | ⋯ |
3 | -0.000131769 | -7.8398e-5 | 7.92223e-7 | 8.90044e-5 | 0.000160022 | 0.000198435 | 0.000196598 | 0.000212225 | 0.000241109 | 0.000271235 | 0.000301045 | 0.000324921 | 0.000337619 | 0.000325857 | 0.00029979 | 0.000277167 | 0.00027018 | 0.00027165 | 0.000277606 | 0.000287722 | 0.000308203 | 0.000324847 | 0.000328573 | 0.000310806 | 0.00027728 | 0.000226898 | 0.000160474 | 8.30948e-5 | 7.98825e-6 | -5.32827e-5 | -9.57157e-5 | -0.000123438 | -0.0001371 | -0.000134382 | -0.00011527 | -9.07963e-5 | -6.97458e-5 | -6.29138e-5 | -7.14491e-5 | -9.85941e-5 | -0.000137562 | -0.000192678 | -0.000248177 | -0.000303993 | -0.000356125 | -0.000407616 | -0.0004553 | -0.000507819 | -0.000555473 | -0.000603436 | -0.000647099 | -0.000701763 | -0.000754429 | -0.000806879 | -0.000838493 | -0.000842167 | -0.000803445 | -0.000720829 | -0.000592138 | -0.000428566 | -0.000245567 | -6.43964e-5 | 0.000101193 | 0.000232242 | 0.000322133 | 0.000373605 | 0.000391817 | 0.000379332 | 0.000347829 | 0.000316495 | 0.000292236 | 0.000278431 | 0.000264621 | 0.000250305 | 0.000239387 | 0.000234504 | 0.000224633 | 0.000205684 | 0.000180408 | 0.000157615 | 0.000135108 | 0.000106871 | 7.3258e-5 | 3.90321e-5 | 7.34127e-6 | -1.78231e-5 | -3.94282e-5 | -5.6427e-5 | -6.15935e-5 | -5.19038e-5 | -2.96367e-5 | 3.09722e-6 | 3.98752e-5 | 7.62892e-5 | 0.000108271 | 0.000137632 | 0.000165624 | 0.000191182 | 0.000211586 | 0.000229586 | ⋯ |
Y = dat.Y @head Y
... (485, 4)
Row | dm | ndf | typ | test |
---|---|---|---|---|
Float64? | Float64? | String | Int64 | |
1 | 92.23 | 37.58 | Legume forages | 1 |
2 | 93.26 | 49.6462 | Legume forages | 0 |
3 | 92.9 | 63.2939 | Forage trees | 0 |
y = Y.typ # the classes test = Y.test tab(y)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "Cereal and grass forages" => 160 "Forage trees" => 101 "Legume forages" => 224
freqtable(y, test)
3×2 Named Matrix{Int64} Dim1 ╲ Dim2 │ 0 1 ─────────────────────────┼───────── Cereal and grass forages │ 100 60 Forage trees │ 56 45 Legume forages │ 167 57
wlst = names(X) wl = parse.(Int, wlst) #plotsp(X, wl; xlabel = "Wavelength (nm)", ylabel = "Absorbance").f
700-element Vector{Int64}: 1100 1102 1104 1106 1108 1110 1112 1114 1116 1118 ⋮ 2482 2484 2486 2488 2490 2492 2494 2496 2498
Note:: X-data are already preprocessed (SNV + Savitsky-Golay 2nd deriv).
The model is fitted on Train, and a Test set is projected on the training space. In this example, Train is already defined in variable test
of the dataset, and Test is defined by the remaining samples. But Tot could also be split a posteriori, for instance by sampling (random, systematic or any other designs). See for instance functions samprand
, sampsys
, etc.
s = Bool.(test) Xtrain = rmrow(X, s) ytrain = rmrow(y, s) Xtest = X[s, :] ytest = y[s] ntot = nro(X) ntrain = nro(Xtrain) ntest = nro(Xtest) (ntot = ntot, ntrain, ntest)
(ntot = 485, ntrain = 323, ntest = 162)
Since X-columns are highly colinear, the Fda requires a regularization, for instance 1) by preliminary dimension reduction, or 2) by using a ridge regularization.
model0 = pcasvd(nlv = 10) fit!(model0, Xtrain) @names model0 @names model0.fitm
(:T, :V, :sv, :xmeans, :xscales, :weights, :niter, :par)
@head Ttrain_pca = model0.fitm.T
3×10 Matrix{Float64}: -0.00669306 -0.00650046 -0.00570717 … -0.000492699 0.000134327 -0.00387969 -0.0068879 -0.00728328 0.0003361 -0.000581481 0.0022825 0.00305238 0.00109557 -0.00164726 -0.0014889 ... (323, 10)
model = fda(nlv = 2) #model = fdasvd(nlv = 2) # alternative algorithm (same result) fit!(model, Ttrain_pca, ytrain) fitm = model.fitm @names fitm
(:T, :V, :Tcenters, :eig, :sstot, :W, :xmeans, :xscales, :weights, :lev, :ni, :par)
@head Ttrain = model.fitm.T
3×2 Matrix{Float64}: 2.41503 -1.46642 1.41666 -1.4342 -1.54614 -0.344225 ... (323, 2)
lev = fitm.lev nlev = length(lev)
3
ct = fitm.Tcenters
3×2 Matrix{Float64}: -2.1618 -0.223723 1.49851 -1.67024 0.791993 0.694047
f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2], markersize = 10, color = :red) f
color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .5) f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) f
Projection of Xtest
@head Ttest_pca = transf(model0, Xtest)
3×10 Matrix{Float64}: -0.00881891 -0.00355513 0.00143197 … 0.000140379 -0.000265216 0.00647425 0.00296032 -0.000761146 -0.00113056 -0.00160408 0.00351213 0.000177464 -0.000182268 -0.00140921 -0.00232458 ... (162, 10)
@head Ttest = transf(model, Ttest_pca)
3×2 Matrix{Float64}: 1.76619 -0.105807 -2.41847 -0.524465 -2.57702 -0.907449 ... (162, 2)
i = 1 # class "i" in test color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .5) f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) s = ytest .== lev[i] zT = Ttest[s, :] scatter!(ax, zT[:, 1], zT[:, 2]; markersize = 10, color = :blue) f
Note: If the ridge parameter lb
is too small (e.g. here from around 1e-10), the model can be overfitted and predictions of new observations is misleading.
lb = 1e-5 model = fda(; nlv = 2, lb) #model = fdasvd(; nlv = 2, lb) fit!(model, Xtrain, ytrain) fitm = model.fitm
Jchemo.Fda([1.7829806394884713 -0.8923344270339282; 1.3203919349887605 -1.087112071286717; … ; -0.7042782276724117 0.523450171819936; -0.4631097344316628 1.0465334079722217], [5.171712666455356 -9.508299170939283; 5.153707863984817 -9.06078711679522; … ; 3.032245770823203 21.347000304208176; -2.8066707021640216 21.53541187066701], [-1.3213215617847378 -0.25815326934600685; 1.2872849511152809 -0.8352132611860519; 0.3595461012935211 0.43465430874861954], [0.8946608889469787, 0.2392545735461798, 8.113420527620786e-17, 7.522381965283366e-17, 1.2357532069459468e-17, 1.2357532069459468e-17, -5.640384069100635e-17, -5.640384069100635e-17, 3.953552944990857e-17, 3.953552944990857e-17 … 1.7581207434812442e-20, 2.101291977425629e-20, -1.8276419310707133e-20, 1.1596325005033801e-20, -3.75017290833722e-21, -3.75017290833722e-21, -9.096083385580036e-21, 8.960398722442791e-21, 5.4950099649394124e-21, 5.4950099649394124e-21], 1.1339154624931584, [1.0069729150340446e-5 6.666992825610627e-8 … 2.0918098869706948e-8 2.1162650478714127e-8; 6.666992825610627e-8 1.0063873539470998e-5 … 2.008643294430349e-8 2.0247774844963594e-8; … ; 2.0918098869706948e-8 2.008643294430349e-8 … 1.009178260196924e-5 9.092935251310007e-8; 2.1162650478714127e-8 2.0247774844963594e-8 … 9.092935251310007e-8 1.0095765380348832e-5], [-0.00019330623941241694, -0.0001415769474021643, -5.770661245116858e-5, 4.723558902506759e-5, 0.00014758462542250662, 0.00021809097323352518, 0.0002341317111996633, 0.000252799982295447, 0.0002796860235471441, 0.0003040280867910269 … -0.0018442000247695933, -0.0019172866156863932, -0.0019496441042941074, -0.0019473165883640694, -0.0018947013042759696, -0.0016716820169876765, -0.0013441905794144805, -0.0009918653793508437, -0.0006005900823885629, -0.00025528401464108526], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 … 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], Jchemo.Weight{Float64}([0.0030959752321981413, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981413, 0.0030959752321981413, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981417 … 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981417, 0.0030959752321981413, 0.0030959752321981413, 0.0030959752321981413, 0.0030959752321981413, 0.0030959752321981413]), ["Cereal and grass forages", "Forage trees", "Legume forages"], [100, 56, 167], Jchemo.ParFda(2, 1.0e-5, :prop, false))
lev = fitm.lev nlev = length(lev)
3
ct = fitm.Tcenters
3×2 Matrix{Float64}: -1.32132 -0.258153 1.28728 -0.835213 0.359546 0.434654
@head Ttrain = fitm.T
3×2 Matrix{Float64}: 1.78298 -0.892334 1.32039 -1.08711 -0.881565 -0.00540248 ... (323, 2)
f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) f
color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .7) f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) f
Projection of Xtest
@head Ttest = transf(model, Xtest)
3×2 Matrix{Float64}: 1.47243 0.224802 -1.39627 -0.2853 -0.995825 -0.334509 ... (162, 2)
i = 1 # class "i" in test color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .7) f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = string("Projection test-class ", lev[i], " (blue points)"), ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) s = ytest .== lev[i] zT = Ttest[s, :] scatter!(ax, zT[:, 1], zT[:, 2]; markersize = 10, color = :blue) f