generated from Pakillo/quarto-course-website-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add Olivetti pca note
- Loading branch information
Showing
13 changed files
with
828 additions
and
0 deletions.
There are no files selected for viewing
11 changes: 11 additions & 0 deletions
11
_freeze/category/dimension-reduction/2-olivetti-face/execute-results/html.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"hash": "b3e9fbdefd68e585282e16606555fadb", | ||
"result": { | ||
"markdown": "---\ntitle: \"2-olivetti-face-project\"\ncode-fold: true\n---\n\n## 1. load package\n\n::: {.cell execution_count=1}\n``` {.julia .cell-code}\nusing MLJ,DataFrames,CSV,Random\ninclude(\"./olivetti-face-code/1-dataprocessing.jl\")\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```\nload_olivetti_faces (generic function with 1 method)\n```\n:::\n:::\n\n\n## 2. load data\n\n::: {.cell execution_count=2}\n``` {.julia .cell-code}\n(Xtrain, Xtest), (ytrain, ytest)=load_olivetti_faces()\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>((<span class=\"ansi-bold\">320×4096 DataFrame</span>\n<span class=\"ansi-bold\"> Row </span>│<span class=\"ansi-bold\"> x1 </span><span class=\"ansi-bold\"> x2 </span><span class=\"ansi-bold\"> x3 </span><span class=\"ansi-bold\"> x4 </span><span class=\"ansi-bold\"> x5 </span><span class=\"ansi-bold\"> x6 </span><span class=\"ansi-bold\"> x7 </span> ⋯\n │<span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span> ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n 1 │ 0.595041 0.640496 0.615702 0.644628 0.68595 0.72314 0.731405 ⋯\n 2 │ 0.103306 0.219008 0.177686 0.219008 0.392562 0.57438 0.669422\n 3 │ 0.289256 0.338843 0.417355 0.504132 0.553719 0.561983 0.582645\n 4 │ 0.528926 0.797521 0.826446 0.822314 0.822314 0.818182 0.805785\n 5 │ 0.161157 0.202479 0.268595 0.334711 0.384298 0.392562 0.396694 ⋯\n 6 │ 0.169422 0.293388 0.561983 0.677686 0.727273 0.756198 0.768595\n 7 │ 0.136364 0.177686 0.235537 0.289256 0.334711 0.363636 0.396694\n 8 │ 0.768595 0.756198 0.743802 0.743802 0.752066 0.747934 0.735537\n 9 │ 0.144628 0.210744 0.285124 0.342975 0.392562 0.404959 0.409091 ⋯\n 10 │ 0.566116 0.595041 0.603306 0.619835 0.636364 0.640496 0.661157\n 11 │ 0.524793 0.53719 0.578512 0.628099 0.669422 0.690083 0.68595\n ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n 311 │ 0.181818 0.338843 0.355372 0.404959 0.438017 0.458678 0.471074\n 312 │ 0.454545 0.528926 0.644628 0.747934 0.780992 0.780992 0.801653 ⋯\n 313 │ 0.553719 0.607438 0.636364 0.64876 0.652893 0.64876 0.673554\n 314 │ 0.0909091 0.136364 0.177686 0.231405 0.363636 0.504132 0.541322\n 315 │ 0.252066 0.252066 0.252066 0.252066 0.256198 0.239669 0.235537\n 316 │ 0.326446 0.483471 0.524793 0.599174 0.665289 0.702479 0.702479 ⋯\n 317 │ 0.53719 0.57438 0.553719 0.615702 0.38843 0.487603 0.690083\n 318 │ 0.128099 0.18595 0.247934 0.31405 0.38843 0.46281 0.520661\n 319 │ 0.586777 0.702479 0.731405 0.731405 0.743802 0.772727 0.793388\n 320 │ 0.136364 0.107438 0.0909091 0.115702 0.115702 0.119835 0.181818 ⋯\n<span class=\"ansi-cyan-fg\"> 4089 columns and 299 rows omitted</span>, <span class=\"ansi-bold\">80×4096 DataFrame</span>\n<span class=\"ansi-bold\"> Row </span>│<span class=\"ansi-bold\"> x1 </span><span class=\"ansi-bold\"> x2 </span><span class=\"ansi-bold\"> x3 </span><span class=\"ansi-bold\"> x4 </span><span class=\"ansi-bold\"> x5 </span><span class=\"ansi-bold\"> x6 </span><span class=\"ansi-bold\"> x7 </span><span class=\"ansi-bold\"> x</span> ⋯\n │<span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> Float64 </span><span class=\"ansi-bright-black-fg\"> F</span> ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n 1 │ 0.219008 0.235537 0.252066 0.326446 0.392562 0.553719 0.714876 0 ⋯\n 2 │ 0.194215 0.268595 0.367769 0.487603 0.545455 0.561983 0.578512 0\n 3 │ 0.210744 0.206612 0.194215 0.181818 0.219008 0.239669 0.256198 0\n 4 │ 0.392562 0.475207 0.661157 0.590909 0.471074 0.545455 0.673554 0\n 5 │ 0.719008 0.727273 0.72314 0.714876 0.72314 0.731405 0.739669 0 ⋯\n 6 │ 0.429752 0.458678 0.549587 0.623967 0.673554 0.714876 0.72314 0\n 7 │ 0.289256 0.157025 0.14876 0.190083 0.169422 0.194215 0.404959 0\n 8 │ 0.628099 0.665289 0.68595 0.694215 0.719008 0.731405 0.752066 0\n 9 │ 0.479339 0.549587 0.628099 0.690083 0.677686 0.652893 0.640496 0 ⋯\n 10 │ 0.677686 0.677686 0.681818 0.706612 0.731405 0.739669 0.756198 0\n 11 │ 0.123967 0.132231 0.11157 0.11157 0.119835 0.136364 0.136364 0\n ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n 71 │ 0.123967 0.128099 0.115702 0.136364 0.115702 0.107438 0.115702 0\n 72 │ 0.384298 0.22314 0.22314 0.305785 0.429752 0.508265 0.557851 0 ⋯\n 73 │ 0.409091 0.590909 0.657025 0.681818 0.694215 0.731405 0.760331 0\n 74 │ 0.487603 0.330578 0.252066 0.38843 0.785124 0.789256 0.780992 0\n 75 │ 0.285124 0.285124 0.272727 0.214876 0.169422 0.165289 0.264463 0\n 76 │ 0.136364 0.132231 0.123967 0.119835 0.11157 0.128099 0.132231 0 ⋯\n 77 │ 0.243802 0.243802 0.247934 0.247934 0.252066 0.256198 0.256198 0\n 78 │ 0.603306 0.586777 0.541322 0.603306 0.603306 0.607438 0.64876 0\n 79 │ 0.772727 0.764463 0.752066 0.764463 0.785124 0.793388 0.797521 0\n 80 │ 0.475207 0.491736 0.5 0.512397 0.524793 0.528926 0.545455 0 ⋯\n<span class=\"ansi-cyan-fg\"> 4089 columns and 59 rows omitted</span>), (CategoricalArrays.CategoricalValue{Int64, UInt32}[27, 3, 39, 10, 23, 24, 23, 30, 23, 14 … 21, 26, 5, 35, 28, 9, 31, 0, 15, 37], CategoricalArrays.CategoricalValue{Int64, UInt32}[6, 35, 6, 31, 5, 4, 3, 2, 19, 4 … 32, 29, 22, 36, 12, 32, 16, 14, 3, 20]))</pre>\n```\n:::\n\n:::\n:::\n\n\n", | ||
"supporting": [ | ||
"2-olivetti-face_files/figure-html" | ||
], | ||
"filters": [], | ||
"includes": {} | ||
} | ||
} |
15 changes: 15 additions & 0 deletions
15
_freeze/category/dimension-reduction/3-olivetti-face/execute-results/html.json
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file added
BIN
+912 KB
...ze/category/dimension-reduction/3-olivetti-face/figure-html/cell-6-output-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
--- | ||
title: "2-olivetti-face-project" | ||
code-fold: true | ||
--- | ||
|
||
|
||
## olivetti face recognition 的项目 | ||
|
||
## 1. dataprocessing.jl | ||
|
||
读取 csv 文件, 返回 返回 olivetti face 训练数据,测试数据和标签 | ||
`load_olivetti_faces` 函数返回训练,测试数据集和对应标签 | ||
```julia | ||
(Xtrain, Xtest), (ytrain, ytest)=load_olivetti_faces() | ||
``` | ||
|
||
## 2. train&save-model.jl | ||
|
||
导入数据,返回一个高阶函数`make_model` | ||
`make_model` 函数首先接受训练数据集, 然后等待 `dim` 需要缩减到的维度参数 | ||
|
||
最终训练的模型保存的对应的 jlso 文件中:`JLSO.save("$(pwd())/models/of-model-$(dim)pcs.jlso",:pca=>mach)` | ||
|
||
|
||
```julia | ||
function make_model(Xtr) | ||
return (dim)->begin | ||
model = PCA(maxoutdim=dim) | ||
mach = machine(model, Xtr) |> fit! | ||
try | ||
JLSO.save("$(pwd())/models/of-model-$(dim)pcs.jlso",:pca=>mach) | ||
@info "$(dim) dimension pca model saved" | ||
catch e | ||
@warn "$(e) has problem" | ||
end | ||
end | ||
end | ||
|
||
make_ol_model=make_model(Xtrain) | ||
#make_ol_model.([10,20,150]) | ||
``` | ||
|
||
|
||
## 3-transform-reconstruct-methods | ||
在 `MLJ.jl`的 pca 方法和 `MultiVariate.jl` 的方法稍有不同, 使用的是 `transform`函数,而不是`project`方法 | ||
|
||
主要函数为`transform_to_pcadata1`,为高阶函数 | ||
输入参数`dim` 为需要缩减到的维度 | ||
函数内部调用第2 步获得的模型 | ||
返回一个函数 等待 df 参数 | ||
内部调用`transform`函数对数据做降维处理 | ||
返回数据 | ||
|
||
```julia | ||
function transform_to_pcadata(dim::Int) | ||
mach = JLSO.load("$(pwd())/models/of-model-$(dim)pcs.jlso")[:pca] | ||
return (imgs::DataFrame)->begin | ||
@info "$dim pca proceeding..." | ||
pcaX = transform(mach, imgs) # 降维数据 | ||
# 返回降维数据 | ||
return pcaX | ||
end | ||
end | ||
``` | ||
|
||
|
||
`transform_to_pcadata2` 与`transform_to_pcadata1` 一样是高阶函数 | ||
但是参数输入的顺序不同, `transform_to_pcadata2`中先输入dataframe, 然后等待维度参数 | ||
```julia | ||
function transform_to_pcadata2(imgs::DataFrame) | ||
|
||
return (dim::Int)->begin | ||
@info "$dim pca proceeding..." | ||
mach = JLSO.load("$(pwd())/models/of-model-$(dim)pcs.jlso")[:pca] | ||
pcaX = transform(mach, imgs) # 降维数据 | ||
# 返回降维数据 | ||
return pcaX | ||
end | ||
end | ||
``` | ||
|
||
### 重建数据方法 | ||
从低维度数据恢复原始维度数据, 在`MLJ`中使用的方法是`inverse_transform` | ||
`reconstruct_data` 方法首先从数据 dataframe 中获取维度`column`数据 | ||
从存储模型中调用训练模型 | ||
执行重建变换 | ||
|
||
```julia | ||
""" | ||
reconstruct_data(imgs::DataFrame) | ||
从降维数据重建图片 | ||
TBW | ||
""" | ||
function reconstruct_data(imgs::DataFrame) | ||
|
||
_,cols=size(imgs) | ||
@info "imgs reconstructing from $(cols) dimension" | ||
mach = JLSO.load("$(pwd())/models/of-model-$(cols)pcs.jlso")[:pca] | ||
Xr = inverse_transform(mach, imgs) # 重建近似数据 | ||
return Xr | ||
end | ||
``` | ||
|
||
## 维度缩减到 1d 的数据 | ||
|
||
以 faces 为例,如果缩减到一个维度, 获得的数据是所有图片共用的最大元素,也就是所有人面部共用的特征 | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
--- | ||
title: "3-olivetti-face" | ||
author: math4mad | ||
code-fold: true | ||
--- | ||
|
||
## 1. load package | ||
```{julia} | ||
include("./olivetti-face-code/1-dataprocessing.jl") | ||
import MLJ: transform, inverse_transform | ||
using MLJ,DataFrames,CSV,Random,JLSO,GLMakie | ||
Random.seed!(4545343) | ||
``` | ||
|
||
## 2. import data | ||
```{julia} | ||
(Xtrain, Xtest), (ytrain, ytest)=load_olivetti_faces() | ||
``` | ||
|
||
## 3. train&save model | ||
```{julia} | ||
PCA = @load PCA pkg=MultivariateStats | ||
function make_model(Xtr) | ||
return (dim)->begin | ||
model = PCA(maxoutdim=dim) | ||
mach = machine(model, Xtr) |> fit! | ||
try | ||
JLSO.save("./olivetti-face-code/models/of-model-$(dim)pcs.jlso",:pca=>mach) | ||
@info "$(dim) dimension pca model saved" | ||
catch e | ||
@warn "$(e) has problem" | ||
end | ||
end | ||
end | ||
make_ol_model=make_model(Xtrain) | ||
make_ol_model.([1,2,3,100]) | ||
``` | ||
|
||
## 4. imgs project to low dimension feature space | ||
第三行是降维到 100的图片, 最后一行是原始图片 | ||
```{julia} | ||
include("./olivetti-face-code/3-transform-reconstruct-methods.jl") | ||
cat=ytrain|>Array|>levels | ||
rows,cols=size(Xtrain) | ||
pick20=rand(1:rows,20) | ||
pickXtrain=Xtrain[pick20,:] | ||
pickytrain=ytrain[pick20] | ||
pcaData=transform_to_2d(pickXtrain) | ||
reconstructImgs=reconstruct_data(pcaData) | ||
pcaData3=transform_to_3d(pickXtrain) | ||
reconstructImgs3=reconstruct_data(pcaData3) | ||
transform_to_100d=transform_to_pcadata1(100) | ||
pcaData100=transform_to_100d(pickXtrain) | ||
reconstructImgs100=reconstruct_data(pcaData100) | ||
df=vcat(reconstructImgs,reconstructImgs3,reconstructImgs100,pickXtrain) | ||
``` | ||
plot reconstruct imgs | ||
```{julia} | ||
function plot_img(df) | ||
fig=Figure(resolution=(130*20,130*4)) | ||
for i in 0:3 | ||
for j in 1:20 | ||
idx=i*20+j | ||
ax=Axis(fig[i+1,j],yreversed=true) | ||
img=df[idx,:]|>Array|>d->reshape(d,w,h) | ||
image!(ax,img) | ||
hidespines!(ax) | ||
hidedecorations!(ax) | ||
end | ||
end | ||
fig | ||
#save("./imgs/reconstruct-of-face.png",fig) | ||
end | ||
plot_img(df) | ||
``` | ||
|
||
|
||
|
||
|
||
|
Binary file added
BIN
+2.37 MB
category/dimension-reduction/data/scikit_fetch_olivetti_faces.csv.zip
Binary file not shown.
29 changes: 29 additions & 0 deletions
29
category/dimension-reduction/olivetti-face-code/1-dataprocessing.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
|
||
|
||
using MLJ,DataFrames,CSV,Random | ||
|
||
const w=64 | ||
const h=64 | ||
const length=4096 | ||
const str="olivetti_faces" | ||
|
||
|
||
|
||
|
||
of=olivetti_faces=CSV.File("./olivetti-face-code/olivetti_faces.csv") |> DataFrame | ||
coerce!(of,:label=>Multiclass) | ||
label,X= unpack(of, ==(:label), rng=123); | ||
|
||
|
||
""" | ||
load_olivetti_faces() | ||
返回 olivetti face 训练数据,测试数据和标签 | ||
return (Xtrain, Xtest), (ytrain, ytest) | ||
train:test=0.8 | ||
""" | ||
function load_olivetti_faces() | ||
(Xtrain, Xtest), (ytrain, ytest) = partition((X, label), 0.8, multi=true, rng=123) | ||
return (Xtrain, Xtest), (ytrain, ytest) | ||
end | ||
|
||
return load_olivetti_faces |
39 changes: 39 additions & 0 deletions
39
category/dimension-reduction/olivetti-face-code/2-train&save-model.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
|
||
using MLJ,DataFrames,CSV,Random,JLSO | ||
|
||
include("1-dataprocessing.jl") | ||
|
||
|
||
(Xtrain, Xtest), (ytrain, ytest)=load_olivetti_faces() | ||
#df=vcat(Xtrain,Xtest) | ||
|
||
|
||
PCA = @load PCA pkg=MultivariateStats | ||
|
||
""" | ||
make_model(Xtr) | ||
接收训练数据, 返回函数等待输入降维维度 | ||
Arguments | ||
- Xtr 训练数据集 | ||
return 新函数 arguments 为 降维维度 dim | ||
""" | ||
function make_model(Xtr) | ||
return (dim)->begin | ||
model = PCA(maxoutdim=dim) | ||
mach = machine(model, Xtr) |> fit! | ||
try | ||
JLSO.save("$(pwd())/models/of-model-$(dim)pcs.jlso",:pca=>mach) | ||
@info "$(dim) dimension pca model saved" | ||
catch e | ||
@warn "$(e) has problem" | ||
end | ||
end | ||
end | ||
|
||
#make_ol_model=make_model(Xtrain) | ||
#make_ol_model.([10,20,150]) |
Oops, something went wrong.