Pca - cassav - with groups
using Jchemo, JchemoData
using JLD2, CairoMakie, GLMakie
Data importation
using JchemoData, JLD2, CairoMakie, GLMakie
path_jdat = dirname(dirname(pathof(JchemoData)))
db = joinpath(path_jdat, "data/cassav.jld2")
@load db dat
@names dat
(:X, :Y)
X = dat.X
@head X
... (280, 1050)
3×1050 DataFrame
950 columns omitted
1 | 0.399996 | 0.406522 | 0.413008 | 0.41958 | 0.426073 | 0.43219 | 0.438007 | 0.443949 | 0.44995 | 0.455081 | 0.45934 | 0.463535 | 0.467663 | 0.4711 | 0.473688 | 0.475809 | 0.477585 | 0.479175 | 0.480602 | 0.481708 | 0.482613 | 0.483457 | 0.484222 | 0.484873 | 0.485359 | 0.485729 | 0.486114 | 0.486469 | 0.486478 | 0.48599 | 0.485258 | 0.484175 | 0.482594 | 0.481078 | 0.48004 | 0.478911 | 0.477338 | 0.475769 | 0.474313 | 0.472419 | 0.470243 | 0.468227 | 0.465727 | 0.461779 | 0.456278 | 0.44993 | 0.442413 | 0.433093 | 0.422563 | 0.411268 | 0.399469 | 0.387154 | 0.374109 | 0.360391 | 0.346233 | 0.331832 | 0.317179 | 0.302623 | 0.288342 | 0.274052 | 0.260325 | 0.247976 | 0.236827 | 0.226326 | 0.216817 | 0.208673 | 0.201658 | 0.195395 | 0.189823 | 0.185099 | 0.181055 | 0.177544 | 0.174485 | 0.171748 | 0.169348 | 0.167329 | 0.165411 | 0.163188 | 0.161034 | 0.159611 | 0.158424 | 0.15692 | 0.155245 | 0.153579 | 0.151826 | 0.150128 | 0.148504 | 0.14687 | 0.145472 | 0.144396 | 0.143197 | 0.141824 | 0.140615 | 0.13959 | 0.138668 | 0.137875 | 0.137149 | 0.136432 | 0.135721 | 0.134834 | ⋯ |
2 | 0.460896 | 0.46706 | 0.475677 | 0.483438 | 0.490809 | 0.49877 | 0.506465 | 0.513268 | 0.519421 | 0.525145 | 0.530744 | 0.536144 | 0.540957 | 0.545328 | 0.549367 | 0.552727 | 0.555333 | 0.557708 | 0.560185 | 0.562357 | 0.563894 | 0.564909 | 0.565277 | 0.565188 | 0.565042 | 0.564877 | 0.564355 | 0.563649 | 0.563257 | 0.563012 | 0.562394 | 0.561379 | 0.560341 | 0.559398 | 0.558006 | 0.555778 | 0.553171 | 0.550812 | 0.548678 | 0.546616 | 0.544579 | 0.542149 | 0.538448 | 0.533893 | 0.529018 | 0.523422 | 0.516682 | 0.509148 | 0.501018 | 0.491444 | 0.480527 | 0.469045 | 0.456873 | 0.443933 | 0.430479 | 0.416739 | 0.402484 | 0.387522 | 0.372858 | 0.359546 | 0.347254 | 0.334975 | 0.322734 | 0.311426 | 0.301126 | 0.291658 | 0.282998 | 0.27483 | 0.266987 | 0.259567 | 0.25244 | 0.245352 | 0.238349 | 0.23153 | 0.224645 | 0.217789 | 0.211007 | 0.204099 | 0.197368 | 0.191349 | 0.185842 | 0.180374 | 0.17498 | 0.169971 | 0.165402 | 0.161085 | 0.156995 | 0.153294 | 0.150104 | 0.147443 | 0.145052 | 0.142827 | 0.140707 | 0.138663 | 0.136762 | 0.13506 | 0.133552 | 0.132191 | 0.130964 | 0.129819 | ⋯ |
3 | 0.464731 | 0.471416 | 0.47828 | 0.48733 | 0.497117 | 0.503004 | 0.505579 | 0.509316 | 0.514649 | 0.519317 | 0.523192 | 0.527309 | 0.531434 | 0.534748 | 0.537076 | 0.539033 | 0.540811 | 0.541883 | 0.542183 | 0.54232 | 0.542744 | 0.543293 | 0.543675 | 0.54372 | 0.543589 | 0.543868 | 0.544487 | 0.544648 | 0.544239 | 0.543642 | 0.542987 | 0.542122 | 0.541004 | 0.539853 | 0.538904 | 0.537862 | 0.536159 | 0.534021 | 0.532138 | 0.530054 | 0.527327 | 0.524462 | 0.521329 | 0.51703 | 0.511295 | 0.504519 | 0.496551 | 0.487221 | 0.476865 | 0.464969 | 0.451786 | 0.438225 | 0.424616 | 0.41058 | 0.395809 | 0.380595 | 0.365225 | 0.349705 | 0.334253 | 0.319378 | 0.305204 | 0.291255 | 0.277619 | 0.265214 | 0.25445 | 0.244907 | 0.236255 | 0.228498 | 0.221774 | 0.216081 | 0.210953 | 0.206254 | 0.202226 | 0.198769 | 0.195577 | 0.192592 | 0.189974 | 0.187627 | 0.185356 | 0.183314 | 0.181477 | 0.179274 | 0.176502 | 0.174001 | 0.172465 | 0.171099 | 0.169275 | 0.167275 | 0.165531 | 0.164022 | 0.162529 | 0.161149 | 0.160061 | 0.159114 | 0.157931 | 0.156577 | 0.155341 | 0.154278 | 0.153273 | 0.152306 | ⋯ |
Y = dat.Y
@head Y
... (280, 2)
1 | 2009 | 1.58068 |
2 | 2009 | 7.85516 |
3 | 2009 | 1.77595 |
year = Y.year
tab(year)
OrderedCollections.OrderedDict{Int64, Int64} with 5 entries:
2009 => 42
2010 => 47
2011 => 40
2012 => 71
2013 => 80
Model fitting and scores T
model = pcasvd(nlv = 10)
fit!(model, X)
fitm = model.fitm
@head T = fitm.T
3×10 Matrix{Float64}:
-1.74085 -1.25003 -0.0913075 … -0.0120857 0.0234582 -0.0483602
-2.17482 -1.49341 0.591863 -0.0216646 -0.0135722 -0.0162003
-1.58075 -1.37 0.35223 -0.0694361 -0.0282525 -0.0334348
... (280, 10)
2-D score spaces
CairoMakie.activate!()
plotxy(T[:, 1], T[:, 2], year; zeros = true, xlabel = "PC1", ylabel = "PC2").f
lev = mlev(year)
nlev = length(lev)
color = cgrad(:Dark2_5, nlev; categorical = true, alpha = .7)
plotlv(T[:, 1:6], year; size = (750, 400), shape = (2, 3), color = color, zeros = true,
xlabel = "PC", ylabel = "PC").f
3-D score spaces
CairoMakie.activate!()
#GLMakie.activate!() # for interactive axe-rotation
i = 1
size = (600, 350)
plotxyz(T[:, i], T[:, i + 1], T[:, i + 2], year; size, color, markersize = 10,
xlabel = string("PC", i), ylabel = string("PC", i + 1), zlabel = string("PC", i + 2),
title = "Pca score space").f