using Jchemo, JchemoData using JLD2, CairoMakie using FreqTables
path_jdat = dirname(dirname(pathof(JchemoData))) db = joinpath(path_jdat, "data/forages2.jld2") @load db dat @names dat
(:X, :Y)
X = dat.X @head X
... (485, 700)
Row | 1100 | 1102 | 1104 | 1106 | 1108 | 1110 | 1112 | 1114 | 1116 | 1118 | 1120 | 1122 | 1124 | 1126 | 1128 | 1130 | 1132 | 1134 | 1136 | 1138 | 1140 | 1142 | 1144 | 1146 | 1148 | 1150 | 1152 | 1154 | 1156 | 1158 | 1160 | 1162 | 1164 | 1166 | 1168 | 1170 | 1172 | 1174 | 1176 | 1178 | 1180 | 1182 | 1184 | 1186 | 1188 | 1190 | 1192 | 1194 | 1196 | 1198 | 1200 | 1202 | 1204 | 1206 | 1208 | 1210 | 1212 | 1214 | 1216 | 1218 | 1220 | 1222 | 1224 | 1226 | 1228 | 1230 | 1232 | 1234 | 1236 | 1238 | 1240 | 1242 | 1244 | 1246 | 1248 | 1250 | 1252 | 1254 | 1256 | 1258 | 1260 | 1262 | 1264 | 1266 | 1268 | 1270 | 1272 | 1274 | 1276 | 1278 | 1280 | 1282 | 1284 | 1286 | 1288 | 1290 | 1292 | 1294 | 1296 | 1298 | ⋯ |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | ⋯ | |
1 | -0.000231591 | -0.000175945 | -8.48176e-5 | 2.05217e-5 | 0.000110094 | 0.000161757 | 0.000154953 | 0.000163754 | 0.000187602 | 0.00021499 | 0.000242479 | 0.000265498 | 0.000282141 | 0.000281442 | 0.000271025 | 0.000261075 | 0.000257284 | 0.000252177 | 0.00024293 | 0.000228295 | 0.000219097 | 0.000214136 | 0.000215612 | 0.000218982 | 0.000228004 | 0.000236081 | 0.000236017 | 0.000220327 | 0.000187096 | 0.000137138 | 7.68593e-5 | 1.13679e-5 | -5.00951e-5 | -9.54664e-5 | -0.000119199 | -0.000131897 | -0.000142349 | -0.000161489 | -0.00019387 | -0.000244808 | -0.000303259 | -0.000366904 | -0.000416738 | -0.000451535 | -0.00046995 | -0.000478637 | -0.000477348 | -0.000478142 | -0.000476719 | -0.000479701 | -0.000482037 | -0.000496769 | -0.000511959 | -0.000532094 | -0.000542661 | -0.000540188 | -0.000512715 | -0.00045798 | -0.000370395 | -0.000256256 | -0.000126907 | 1.13716e-6 | 0.000119047 | 0.000212745 | 0.000275685 | 0.000307863 | 0.000313547 | 0.000296977 | 0.000269661 | 0.000247818 | 0.000233944 | 0.000228773 | 0.000224567 | 0.000221256 | 0.000218893 | 0.000217741 | 0.000210144 | 0.00019664 | 0.000181949 | 0.000169774 | 0.000151691 | 0.00012385 | 9.23378e-5 | 5.9959e-5 | 2.58352e-5 | -4.77314e-6 | -3.21835e-5 | -5.53154e-5 | -6.71707e-5 | -6.54166e-5 | -5.16448e-5 | -2.43366e-5 | 1.12255e-5 | 4.68917e-5 | 7.773e-5 | 0.000106785 | 0.000133173 | 0.000153607 | 0.000168518 | 0.000182591 | ⋯ |
2 | -9.66352e-5 | -3.30928e-5 | 5.64966e-5 | 0.000154135 | 0.000237725 | 0.000295789 | 0.000319587 | 0.000357405 | 0.000404611 | 0.000447996 | 0.000479786 | 0.000488339 | 0.000465929 | 0.000402301 | 0.000313648 | 0.000220226 | 0.000138483 | 7.35084e-5 | 3.50018e-5 | 2.83293e-5 | 6.05478e-5 | 0.000118272 | 0.000187726 | 0.000249842 | 0.00029697 | 0.000315062 | 0.000298828 | 0.000251643 | 0.000187055 | 0.000118243 | 5.60849e-5 | 3.8727e-6 | -3.28778e-5 | -4.84688e-5 | -4.38912e-5 | -3.34954e-5 | -2.72637e-5 | -3.65483e-5 | -6.62949e-5 | -0.000121833 | -0.000193587 | -0.000280244 | -0.000362132 | -0.000434981 | -0.000494461 | -0.000546531 | -0.000590606 | -0.000638514 | -0.000684688 | -0.000734688 | -0.000783664 | -0.000842714 | -0.000892596 | -0.000930301 | -0.000938118 | -0.000913585 | -0.000846217 | -0.000737781 | -0.000588122 | -0.000410395 | -0.000220611 | -3.69382e-5 | 0.000131072 | 0.000266078 | 0.000358377 | 0.000408684 | 0.000424528 | 0.000412147 | 0.000383896 | 0.000357957 | 0.000338385 | 0.000326749 | 0.000315572 | 0.00030542 | 0.000293671 | 0.000280005 | 0.000259482 | 0.000233697 | 0.0002044 | 0.000177199 | 0.000147989 | 0.000112325 | 7.33317e-5 | 3.48779e-5 | -2.5229e-6 | -3.27922e-5 | -5.52233e-5 | -7.06412e-5 | -7.49675e-5 | -6.44041e-5 | -4.04393e-5 | -6.50489e-6 | 3.09196e-5 | 6.87358e-5 | 0.000105202 | 0.000142313 | 0.000177182 | 0.000206652 | 0.000230788 | 0.000253703 | ⋯ |
3 | -0.000131769 | -7.8398e-5 | 7.92223e-7 | 8.90044e-5 | 0.000160022 | 0.000198435 | 0.000196598 | 0.000212225 | 0.000241109 | 0.000271235 | 0.000301045 | 0.000324921 | 0.000337619 | 0.000325857 | 0.00029979 | 0.000277167 | 0.00027018 | 0.00027165 | 0.000277606 | 0.000287722 | 0.000308203 | 0.000324847 | 0.000328573 | 0.000310806 | 0.00027728 | 0.000226898 | 0.000160474 | 8.30948e-5 | 7.98825e-6 | -5.32827e-5 | -9.57157e-5 | -0.000123438 | -0.0001371 | -0.000134382 | -0.00011527 | -9.07963e-5 | -6.97458e-5 | -6.29138e-5 | -7.14491e-5 | -9.85941e-5 | -0.000137562 | -0.000192678 | -0.000248177 | -0.000303993 | -0.000356125 | -0.000407616 | -0.0004553 | -0.000507819 | -0.000555473 | -0.000603436 | -0.000647099 | -0.000701763 | -0.000754429 | -0.000806879 | -0.000838493 | -0.000842167 | -0.000803445 | -0.000720829 | -0.000592138 | -0.000428566 | -0.000245567 | -6.43964e-5 | 0.000101193 | 0.000232242 | 0.000322133 | 0.000373605 | 0.000391817 | 0.000379332 | 0.000347829 | 0.000316495 | 0.000292236 | 0.000278431 | 0.000264621 | 0.000250305 | 0.000239387 | 0.000234504 | 0.000224633 | 0.000205684 | 0.000180408 | 0.000157615 | 0.000135108 | 0.000106871 | 7.3258e-5 | 3.90321e-5 | 7.34127e-6 | -1.78231e-5 | -3.94282e-5 | -5.6427e-5 | -6.15935e-5 | -5.19038e-5 | -2.96367e-5 | 3.09722e-6 | 3.98752e-5 | 7.62892e-5 | 0.000108271 | 0.000137632 | 0.000165624 | 0.000191182 | 0.000211586 | 0.000229586 | ⋯ |
Y = dat.Y @head Y
... (485, 4)
Row | dm | ndf | typ | test |
---|---|---|---|---|
Float64? | Float64? | String | Int64 | |
1 | 92.23 | 37.58 | Legume forages | 1 |
2 | 93.26 | 49.6462 | Legume forages | 0 |
3 | 92.9 | 63.2939 | Forage trees | 0 |
y = Y.typ test = Y.test tab(y)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "Cereal and grass forages" => 160 "Forage trees" => 101 "Legume forages" => 224
freqtable(y, test)
3×2 Named Matrix{Int64} Dim1 ╲ Dim2 │ 0 1 ─────────────────────────┼───────── Cereal and grass forages │ 100 60 Forage trees │ 56 45 Legume forages │ 167 57
wlst = names(X) wl = parse.(Int, wlst) #plotsp(X, wl; xlabel = "Wavelength (nm)", ylabel = "Absorbance").f
700-element Vector{Int64}: 1100 1102 1104 1106 1108 1110 1112 1114 1116 1118 ⋮ 2482 2484 2486 2488 2490 2492 2494 2496 2498
Note:: X-data are already preprocessed (SNV + Savitsky-Golay 2nd deriv).
The model is fitted on Train, and the generalization error is estimated on Test. In this example, Train is already defined in variable typ
of the dataset, and Test is defined by the remaining samples. But Tot could also be split a posteriori, for instance by sampling (random, systematic or any other designs). See for instance functions samprand
, sampsys
, etc.
s = Bool.(test) Xtrain = rmrow(X, s) ytrain = rmrow(y, s) Xtest = X[s, :] ytest = y[s] ntot = nro(X) ntrain = nro(Xtrain) ntest = nro(Xtest) (ntot = ntot, ntrain, ntest)
(ntot = 485, ntrain = 323, ntest = 162)
tab(ytrain)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "Cereal and grass forages" => 100 "Forage trees" => 56 "Legume forages" => 167
tab(ytest)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "Cereal and grass forages" => 60 "Forage trees" => 45 "Legume forages" => 57
'PLSDA' can represent different methods, e.g. PLS-MLR-DA (or PLSR-DA), PLS-LDA, PLS-QDA or PLS-KDA-DA.
Note:
In the functions below, the default value for argument prior
is set to :prop
. If the classes are highly unbalanced, it is recommended to set prior = :unif
to avoid bias in the predictions (especially when using function plsrda
). See the help of the respective functions for more details.
For unbalanced classes, it is also recommended to use merrp
instead of errp
(see the respectve help pages) to compute the global prediction error rates in CV processes or on test sets.
nlv = 15 model = plsrda(; nlv) #model = plslda(; nlv) #model = plsqda(; nlv) #model = plsqda(; nlv, alpha = 0.5) # 'alpha' = continuum parameter #model = plskdeda(; nlv) #model = plskdeda(; nlv, a = .5) # 'a' = bandwidth parameter (see also parameter 'h')
Jchemo.JchemoModel{typeof(Jchemo.plsrda), Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{nlv::Int64}}}(Jchemo.plsrda, nothing, Base.Pairs(:nlv => 15))
fit!(model, Xtrain, ytrain) @names model
(:algo, :fitm, :kwargs)
fitm = model.fitm @names fitm
(:fitm, :lev, :ni, :par)
typeof(fitm.fitm)
Jchemo.Plsr
@names fitm.fitm
(:T, :V, :R, :W, :C, :TT, :xmeans, :xscales, :ymeans, :yscales, :weights, :niter, :par)
res = predict(model, Xtest) @names res
(:pred, :posterior)
@head pred = res.pred
3×1 Matrix{String}: "Legume forages" "Cereal and grass forages" "Cereal and grass forages" ... (162, 1)
@head res.posterior # predicted posterior probabilities
3×3 Matrix{Float64}: -0.00450279 0.0717177 0.932785 0.912764 -0.0344078 0.121644 1.07725 -0.00150365 -0.075744 ... (162, 3)
@head predict(model, Xtest; nlv = 2).pred
3×1 Matrix{String}: "Legume forages" "Cereal and grass forages" "Cereal and grass forages" ... (162, 1)
predict(model, Xtest; nlv = 0:2).pred
3-element Vector{Matrix{String}}: ["Legume forages"; "Legume forages"; … ; "Legume forages"; "Legume forages";;] ["Legume forages"; "Cereal and grass forages"; … ; "Cereal and grass forages"; "Cereal and grass forages";;] ["Legume forages"; "Cereal and grass forages"; … ; "Cereal and grass forages"; "Legume forages";;]
errp(pred, ytest)
1×1 Matrix{Float64}: 0.1111111111111111
merrp(pred, ytest)
1×1 Matrix{Float64}: 0.11910331384015593
cf = conf(pred, ytest) @names cf
(:cnt, :pct, :A, :Apct, :diagpct, :accpct, :lev)
cf.cnt
Row | y | pred_Cereal and grass forages | pred_Forage trees | pred_Legume forages |
---|---|---|---|---|
String | Int64 | Int64 | Int64 | |
1 | Cereal and grass forages | 54 | 1 | 5 |
2 | Forage trees | 0 | 35 | 10 |
3 | Legume forages | 0 | 2 | 55 |
cf.pct
Row | levels | pred_Cereal and grass forages | pred_Forage trees | pred_Legume forages |
---|---|---|---|---|
String | Float64 | Float64 | Float64 | |
1 | Cereal and grass forages | 90.0 | 1.7 | 8.3 |
2 | Forage trees | 0.0 | 77.8 | 22.2 |
3 | Legume forages | 0.0 | 3.5 | 96.5 |
cf.diagpct
Row | lev | errp_pct |
---|---|---|
String | Float64 | |
1 | Cereal and grass forages | 10.0 |
2 | Forage trees | 22.2 |
3 | Legume forages | 3.5 |
cf.accpct
Row | typ | accuracy_pct |
---|---|---|
String | Float64 | |
1 | Overall | 88.9 |
2 | Mean by class | 88.1 |
plotconf(cf).f
plotconf(cf; cnt = false).f
plotconf(cf; ptext = false).f