using Jchemo, JchemoData using JLD2, CairoMakie
path_jdat = dirname(dirname(pathof(JchemoData))) db = joinpath(path_jdat, "data", "iris.jld2") @load db dat @names dat
(:X,)
summ(dat.X)
(res = 5×7 DataFrame Row │ variable mean std min max n nmissing │ Symbol Union… Union… Any Any Int64 Int64 ─────┼────────────────────────────────────────────────────────────────── 1 │ sepal_length 5.843 0.828 4.3 7.9 150 0 2 │ sepal_width 3.057 0.436 2.0 4.4 150 0 3 │ petal_length 3.758 1.765 1.0 6.9 150 0 4 │ petal_width 1.199 0.762 0.1 2.5 150 0 5 │ species setosa virginica 150 0, ntot = 150)
@head X = dat.X[:, 1:4]
... (150, 4)
Row | sepal_length | sepal_width | petal_length | petal_width |
---|---|---|---|---|
Float64 | Float64 | Float64 | Float64 | |
1 | 5.1 | 3.5 | 1.4 | 0.2 |
2 | 4.9 | 3.0 | 1.4 | 0.2 |
3 | 4.7 | 3.2 | 1.3 | 0.2 |
@head y = dat.X[:, 5] # the classes (species)
3-element Vector{String}: "setosa" "setosa" "setosa" ... (150,)
tab(y)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "setosa" => 50 "versicolor" => 50 "virginica" => 50
lev = mlev(y) nlev = length(lev)
3
ntot = nro(X) ntest = 30 s = samprand(ntot, ntest) Xtrain = X[s.train, :] ytrain = y[s.train] Xtest = X[s.test, :] ytest = y[s.test] ntrain = ntot - ntest (ntot = ntot, ntrain, ntest)
(ntot = 150, ntrain = 120, ntest = 30)
tab(ytrain)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "setosa" => 41 "versicolor" => 41 "virginica" => 38
tab(ytest)
OrderedCollections.OrderedDict{String, Int64} with 3 entries: "setosa" => 9 "versicolor" => 9 "virginica" => 12
model = fda(nlv = 2) #model = fdasvd(nlv = 2) # alternative algorithm (same result) fit!(model, Xtrain, ytrain) fitm = model.fitm @names fitm
(:T, :V, :Tcenters, :eig, :sstot, :W, :xmeans, :xscales, :weights, :lev, :ni, :par)
lev = fitm.lev nlev = length(lev)
3
Fda scores
@head Ttrain = fitm.T
3×2 Matrix{Float64}: -7.67227 0.280642 -6.72478 -0.799262 -7.09482 -0.198632 ... (120, 2)
Class centers projected on the score space
ct = fitm.Tcenters
3×2 Matrix{Float64}: -7.14852 0.233595 1.92642 -0.805302 5.63437 0.616841
f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) f
color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .7) f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = "Fda", ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) f
Columns of matrix V
are coefficients of the linear discriminant function, corresponding to object LD
of function lda
of the R package MASS
.
V = fitm.V
4×2 Matrix{Float64}: -0.788234 -0.263803 -1.57967 2.26533 2.11753 -0.948042 2.68483 3.15521
V' * V # not orthogonal
2×2 Matrix{Float64}: 14.8089 3.09316 3.09316 16.0554
fitm.eig
4-element Vector{Float64}: 28.780516708141555 0.36070779568823824 4.454486492572078e-15 -7.97918739539233e-16
fitm.sstot
29.141224503829797
Explained variance is computed from the Pca of the class centers in the transformed scale.
summary(model)
(explvarx = 2×4 DataFrame Row │ nlv var pvar cumpvar │ Int64 Float64 Float64 Float64 ─────┼─────────────────────────────────────── 1 │ 1 28.7805 0.987622 0.987622 2 │ 2 0.360708 0.0123779 1.0,)
@head Ttest = transf(model, Xtest)
3×2 Matrix{Float64}: -6.93948 -0.983054 -6.91444 -1.0884 -8.81819 2.69739 ... (30, 2)
i = 1 # class "i" in test color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .7) f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = string("Projection test-class ", lev[i], " (blue points)"), ellipse = true) scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red) s = ytest .== lev[i] zT = Ttest[s, :] scatter!(ax, zT[:, 1], zT[:, 2]; markersize = 10, color = :blue) f