Fda - iris

using Jchemo, JchemoData
using JLD2, CairoMakie

Data importation

path_jdat = dirname(dirname(pathof(JchemoData)))
db = joinpath(path_jdat, "data", "iris.jld2") 
@load db dat
@names dat
(:X,)
summ(dat.X)
(res = 5×7 DataFrame
 Row │ variable      mean    std     min     max        n      nmissing
     │ Symbol        Union…  Union…  Any     Any        Int64  Int64
─────┼──────────────────────────────────────────────────────────────────
   1 │ sepal_length  5.843   0.828   4.3     7.9          150         0
   2 │ sepal_width   3.057   0.436   2.0     4.4          150         0
   3 │ petal_length  3.758   1.765   1.0     6.9          150         0
   4 │ petal_width   1.199   0.762   0.1     2.5          150         0
   5 │ species                       setosa  virginica    150         0, ntot = 150)
@head X = dat.X[:, 1:4]
... (150, 4)
3×4 DataFrame
Rowsepal_lengthsepal_widthpetal_lengthpetal_width
Float64Float64Float64Float64
15.13.51.40.2
24.93.01.40.2
34.73.21.30.2
@head y = dat.X[:, 5]    # the classes (species)
3-element Vector{String}:
 "setosa"
 "setosa"
 "setosa"
... (150,)
tab(y)
OrderedCollections.OrderedDict{String, Int64} with 3 entries:
  "setosa"     => 50
  "versicolor" => 50
  "virginica"  => 50
lev = mlev(y)
nlev = length(lev)
3

Split Tot to Train/Test

ntot = nro(X)
ntest = 30
s = samprand(ntot, ntest)
Xtrain = X[s.train, :]
ytrain = y[s.train]
Xtest = X[s.test, :]
ytest = y[s.test]
ntrain = ntot - ntest
(ntot = ntot, ntrain, ntest)
(ntot = 150, ntrain = 120, ntest = 30)
tab(ytrain)
OrderedCollections.OrderedDict{String, Int64} with 3 entries:
  "setosa"     => 41
  "versicolor" => 41
  "virginica"  => 38
tab(ytest)
OrderedCollections.OrderedDict{String, Int64} with 3 entries:
  "setosa"     => 9
  "versicolor" => 9
  "virginica"  => 12

Model fitting

model = fda(nlv = 2)
#model = fdasvd(nlv = 2)     # alternative algorithm (same result)
fit!(model, Xtrain, ytrain) 
fitm = model.fitm
@names fitm
(:T, :V, :Tcenters, :eig, :sstot, :W, :xmeans, :xscales, :weights, :lev, :ni, :par)
lev = fitm.lev
nlev = length(lev)
3

Fda scores

@head Ttrain = fitm.T
3×2 Matrix{Float64}:
 -7.67227   0.280642
 -6.72478  -0.799262
 -7.09482  -0.198632
... (120, 2)

Class centers projected on the score space

ct = fitm.Tcenters
3×2 Matrix{Float64}:
 -7.14852   0.233595
  1.92642  -0.805302
  5.63437   0.616841
f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; title = "Fda", ellipse = true)
scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red)
f
color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .7)
f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, title = "Fda", ellipse = true)
scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red)
f

X-loadings matrix

Columns of matrix V are coefficients of the linear discriminant function, corresponding to object LD of function lda of the R package MASS.

V = fitm.V
4×2 Matrix{Float64}:
 -0.788234  -0.263803
 -1.57967    2.26533
  2.11753   -0.948042
  2.68483    3.15521
V' * V    # not orthogonal
2×2 Matrix{Float64}:
 14.8089    3.09316
  3.09316  16.0554

Explained variance

fitm.eig
4-element Vector{Float64}:
 28.780516708141555
  0.36070779568823824
  4.454486492572078e-15
 -7.97918739539233e-16
fitm.sstot
29.141224503829797

Explained variance is computed from the Pca of the class centers in the transformed scale.

summary(model)
(explvarx = 2×4 DataFrame
 Row │ nlv    var        pvar       cumpvar
     │ Int64  Float64    Float64    Float64
─────┼───────────────────────────────────────
   1 │     1  28.7805    0.987622   0.987622
   2 │     2   0.360708  0.0123779  1.0,)

Projection of Xtest to the score space

@head Ttest = transf(model, Xtest)
3×2 Matrix{Float64}:
 -6.93948  -0.983054
 -6.91444  -1.0884
 -8.81819   2.69739
... (30, 2)
i = 1  # class "i" in test
color = cgrad(:lightrainbow, nlev; categorical = true, alpha = .7)
f, ax = plotxy(Ttrain[:, 1], Ttrain[:, 2], ytrain; color, 
    title = string("Projection test-class ", lev[i], " (blue points)"), ellipse = true)
scatter!(ax, ct[:, 1], ct[:, 2]; markersize = 10, color = :red)
s = ytest .== lev[i]
zT = Ttest[s, :]
scatter!(ax, zT[:, 1], zT[:, 2]; markersize = 10, color = :blue)
f