Prediction - forages2 - Rrda

using Jchemo, JchemoData
using JLD2, CairoMakie
using FreqTables

RR-DA is a alternative to PLSR-DA, where ridge regression is used instead of PLSR to predict the Y-dummy table.

Data importation

path_jdat = dirname(dirname(pathof(JchemoData)))
db = joinpath(path_jdat, "data/forages2.jld2") 
@load db dat
@names dat
(:X, :Y)
X = dat.X 
@head X
... (485, 700)
3×700 DataFrame
600 columns omitted
Row1100110211041106110811101112111411161118112011221124112611281130113211341136113811401142114411461148115011521154115611581160116211641166116811701172117411761178118011821184118611881190119211941196119812001202120412061208121012121214121612181220122212241226122812301232123412361238124012421244124612481250125212541256125812601262126412661268127012721274127612781280128212841286128812901292129412961298
Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64
1-0.000231591-0.000175945-8.48176e-52.05217e-50.0001100940.0001617570.0001549530.0001637540.0001876020.000214990.0002424790.0002654980.0002821410.0002814420.0002710250.0002610750.0002572840.0002521770.000242930.0002282950.0002190970.0002141360.0002156120.0002189820.0002280040.0002360810.0002360170.0002203270.0001870960.0001371387.68593e-51.13679e-5-5.00951e-5-9.54664e-5-0.000119199-0.000131897-0.000142349-0.000161489-0.00019387-0.000244808-0.000303259-0.000366904-0.000416738-0.000451535-0.00046995-0.000478637-0.000477348-0.000478142-0.000476719-0.000479701-0.000482037-0.000496769-0.000511959-0.000532094-0.000542661-0.000540188-0.000512715-0.00045798-0.000370395-0.000256256-0.0001269071.13716e-60.0001190470.0002127450.0002756850.0003078630.0003135470.0002969770.0002696610.0002478180.0002339440.0002287730.0002245670.0002212560.0002188930.0002177410.0002101440.000196640.0001819490.0001697740.0001516910.000123859.23378e-55.9959e-52.58352e-5-4.77314e-6-3.21835e-5-5.53154e-5-6.71707e-5-6.54166e-5-5.16448e-5-2.43366e-51.12255e-54.68917e-57.773e-50.0001067850.0001331730.0001536070.0001685180.000182591
2-9.66352e-5-3.30928e-55.64966e-50.0001541350.0002377250.0002957890.0003195870.0003574050.0004046110.0004479960.0004797860.0004883390.0004659290.0004023010.0003136480.0002202260.0001384837.35084e-53.50018e-52.83293e-56.05478e-50.0001182720.0001877260.0002498420.000296970.0003150620.0002988280.0002516430.0001870550.0001182435.60849e-53.8727e-6-3.28778e-5-4.84688e-5-4.38912e-5-3.34954e-5-2.72637e-5-3.65483e-5-6.62949e-5-0.000121833-0.000193587-0.000280244-0.000362132-0.000434981-0.000494461-0.000546531-0.000590606-0.000638514-0.000684688-0.000734688-0.000783664-0.000842714-0.000892596-0.000930301-0.000938118-0.000913585-0.000846217-0.000737781-0.000588122-0.000410395-0.000220611-3.69382e-50.0001310720.0002660780.0003583770.0004086840.0004245280.0004121470.0003838960.0003579570.0003383850.0003267490.0003155720.000305420.0002936710.0002800050.0002594820.0002336970.00020440.0001771990.0001479890.0001123257.33317e-53.48779e-5-2.5229e-6-3.27922e-5-5.52233e-5-7.06412e-5-7.49675e-5-6.44041e-5-4.04393e-5-6.50489e-63.09196e-56.87358e-50.0001052020.0001423130.0001771820.0002066520.0002307880.000253703
3-0.000131769-7.8398e-57.92223e-78.90044e-50.0001600220.0001984350.0001965980.0002122250.0002411090.0002712350.0003010450.0003249210.0003376190.0003258570.000299790.0002771670.000270180.000271650.0002776060.0002877220.0003082030.0003248470.0003285730.0003108060.000277280.0002268980.0001604748.30948e-57.98825e-6-5.32827e-5-9.57157e-5-0.000123438-0.0001371-0.000134382-0.00011527-9.07963e-5-6.97458e-5-6.29138e-5-7.14491e-5-9.85941e-5-0.000137562-0.000192678-0.000248177-0.000303993-0.000356125-0.000407616-0.0004553-0.000507819-0.000555473-0.000603436-0.000647099-0.000701763-0.000754429-0.000806879-0.000838493-0.000842167-0.000803445-0.000720829-0.000592138-0.000428566-0.000245567-6.43964e-50.0001011930.0002322420.0003221330.0003736050.0003918170.0003793320.0003478290.0003164950.0002922360.0002784310.0002646210.0002503050.0002393870.0002345040.0002246330.0002056840.0001804080.0001576150.0001351080.0001068717.3258e-53.90321e-57.34127e-6-1.78231e-5-3.94282e-5-5.6427e-5-6.15935e-5-5.19038e-5-2.96367e-53.09722e-63.98752e-57.62892e-50.0001082710.0001376320.0001656240.0001911820.0002115860.000229586
Y = dat.Y
@head Y
... (485, 4)
3×4 DataFrame
Rowdmndftyptest
Float64?Float64?StringInt64
192.2337.58Legume forages1
293.2649.6462Legume forages0
392.963.2939Forage trees0
y = Y.typ
test = Y.test
tab(y)
OrderedCollections.OrderedDict{String, Int64} with 3 entries:
  "Cereal and grass forages" => 160
  "Forage trees"             => 101
  "Legume forages"           => 224
freqtable(y, test)
3×2 Named Matrix{Int64}
             Dim1 ╲ Dim2 │   0    1
─────────────────────────┼─────────
Cereal and grass forages │ 100   60
Forage trees             │  56   45
Legume forages           │ 167   57
wlst = names(X)
wl = parse.(Int, wlst)
#plotsp(X, wl; xlabel = "Wavelength (nm)", ylabel = "Absorbance").f
700-element Vector{Int64}:
 1100
 1102
 1104
 1106
 1108
 1110
 1112
 1114
 1116
 1118
    ⋮
 2482
 2484
 2486
 2488
 2490
 2492
 2494
 2496
 2498

Note:: X-data are already preprocessed (SNV + Savitsky-Golay 2nd deriv).

Split Tot to Train/Test

s = Bool.(test)
Xtrain = rmrow(X, s)
ytrain = rmrow(y, s)
Xtest = X[s, :]
ytest = y[s]
ntot = nro(X)
ntrain = nro(Xtrain)
ntest = nro(Xtest)
(ntot = ntot, ntrain, ntest)
(ntot = 485, ntrain = 323, ntest = 162)
tab(ytrain)
OrderedCollections.OrderedDict{String, Int64} with 3 entries:
  "Cereal and grass forages" => 100
  "Forage trees"             => 56
  "Legume forages"           => 167
tab(ytest)
OrderedCollections.OrderedDict{String, Int64} with 3 entries:
  "Cereal and grass forages" => 60
  "Forage trees"             => 45
  "Legume forages"           => 57

Model fitting

model = rrda(; lb = 1e-5)
fit!(model, Xtrain, ytrain)
@names model
(:algo, :fitm, :kwargs)
fitm = model.fitm
@names fitm
(:fitm, :lev, :ni, :par)
typeof(fitm.fitm)
Jchemo.Rr
@names fitm.fitm
(:V, :TtDY, :sv, :xmeans, :xscales, :ymeans, :weights, :par)
res = predict(model, Xtest)
@names res
@head pred = res.pred
@head res.posterior   # predicted posterior probabilities
3×1 Matrix{String}:
 "Legume forages"
 "Cereal and grass forages"
 "Cereal and grass forages"
... (162, 1)
 
3×3 Matrix{Float64}:
 0.277612  -0.212171   0.934559
 0.647358   0.072594   0.280048
 0.611353  -0.0854871  0.474134
... (162, 3)
@head predict(model, Xtest).pred
3×1 Matrix{String}:
 "Legume forages"
 "Cereal and grass forages"
 "Cereal and grass forages"
... (162, 1)
predict(model, Xtest; lb = [.1, .01]).pred
2-element Vector{Matrix{String}}:
 ["Legume forages"; "Legume forages"; … ; "Legume forages"; "Legume forages";;]
 ["Legume forages"; "Legume forages"; … ; "Legume forages"; "Legume forages";;]
errp(pred, ytest)
1×1 Matrix{Float64}:
 0.08641975308641975
merrp(pred, ytest)
1×1 Matrix{Float64}:
 0.096588693957115

Confusion matrix

cf = conf(pred, ytest)
@names cf
(:cnt, :pct, :A, :Apct, :diagpct, :accpct, :lev)
cf.cnt
3×4 DataFrame
Rowypred_Cereal and grass foragespred_Forage treespred_Legume forages
StringInt64Int64Int64
1Cereal and grass forages5712
2Forage trees03510
3Legume forages1056
cf.pct
3×4 DataFrame
Rowlevelspred_Cereal and grass foragespred_Forage treespred_Legume forages
StringFloat64Float64Float64
1Cereal and grass forages95.01.73.3
2Forage trees0.077.822.2
3Legume forages1.80.098.2
plotconf(cf).f