44import numpy as np
55from matplotlib import pyplot as plt
66from scipy import io as spio
7+ from sklearn .decomposition import pca
78
89'''
9- 主成分分析运行方法
10+ 主成分分析_2维数据降维1维演示函数
1011'''
11- def PCA ():
12+ def PCA_2D ():
1213 data_2d = spio .loadmat ("data.mat" )
1314 X = data_2d ['X' ]
1415 m = X .shape [0 ]
15- plt = plot_data_2d (X ) # 显示二维的数据
16+ plt = plot_data_2d (X , 'bo' ) # 显示二维的数据
1617 plt .show ()
1718
1819 X_copy = X .copy ()
1920 X_norm ,mu ,sigma = featureNormalize (X_copy ) # 归一化数据
20- plot_data_2d (X_norm ) # 显示归一化后的数据
21+ #plot_data_2d(X_norm) # 显示归一化后的数据
22+ #plt.show()
2123
22- Sigma = np .dot (np .transpose (X_norm ),X_norm )/ m
23- U ,S ,V = np .linalg .svd (Sigma )
24+ Sigma = np .dot (np .transpose (X_norm ),X_norm )/ m # 求Sigma
25+ U ,S ,V = np .linalg .svd (Sigma ) # 求Sigma的奇异值分解
2426
25- plt = plot_data_2d (X_norm )
26- plt .plot (mu .reshape (- 1 ,1 ),mu .reshape (- 1 ,1 )+ S [0 ]* (U [:,0 ].reshape (- 1 ,1 )),'k-' )
27+ plt = plot_data_2d (X ,'bo' ) # 显示原本数据
28+ drawline (plt , mu , mu + S [0 ]* (U [:,0 ]), 'r-' ) # 线,为投影的方向
29+
30+ plt .axis ('square' )
2731 plt .show ()
2832
29- K = 1
30- Z = projectData (X_norm ,U ,K )
31- print Z [0 ]
32-
33+ K = 1 # 定义降维多少维(本来是2维的,这里降维1维)
34+ '''投影之后数据(降维之后)'''
35+ Z = projectData (X_norm ,U ,K ) # 投影
36+ '''恢复数据'''
37+ X_rec = recoverData (Z ,U ,K ) # 恢复
38+ '''作图-----原数据与恢复的数据'''
39+ plt = plot_data_2d (X_norm ,'bo' )
40+ plot_data_2d (X_rec ,'ro' )
41+ for i in range (X_norm .shape [0 ]):
42+ drawline (plt , X_norm [i ,:], X_rec [i ,:], '--k' )
43+ plt .axis ('square' )
44+ plt .show ()
3345
3446
47+ '''主成分分析_PCA图像数据降维'''
48+ def PCA_faceImage ():
49+ print (u'加载图像数据.....' )
50+ data_image = spio .loadmat ('data_faces.mat' )
51+ X = data_image ['X' ]
52+ display_imageData (X [0 :100 ,:])
53+ m = X .shape [0 ] # 数据条数
54+
55+ print (u'运行PCA....' )
56+ X_norm ,mu ,sigma = featureNormalize (X ) # 归一化
57+
58+ Sigma = np .dot (np .transpose (X_norm ),X_norm )/ m # 求Sigma
59+ U ,S ,V = np .linalg .svd (Sigma ) # 奇异值分解
60+ display_imageData (np .transpose (U [:,0 :36 ])) # 显示U的数据
61+
62+ print (u'对face数据降维.....' )
63+ K = 100 # 降维100维(原先是32*32=1024维的)
64+ Z = projectData (X_norm , U , K )
65+ print (u'投影之后Z向量的大小:%d %d' % Z .shape )
66+
67+ print (u'显示降维之后的数据......' )
68+ X_rec = recoverData (Z , U , K ) # 恢复数据
69+ display_imageData (X_rec [0 :100 ,:])
70+
71+
72+
73+
3574# 可视化二维数据
36- def plot_data_2d (X ):
37- plt .plot (X [:,0 ],X [:,1 ],'bo' )
75+ def plot_data_2d (X , marker ):
76+ plt .plot (X [:,0 ],X [:,1 ],marker )
3877 return plt
3978
4079# 归一化数据
@@ -55,10 +94,48 @@ def featureNormalize(X):
5594def projectData (X_norm ,U ,K ):
5695 Z = np .zeros ((X_norm .shape [0 ],K ))
5796
58- U_reduce = U [:,0 :K ]
59- Z = np .dot (X_norm ,U_reduce )
97+ U_reduce = U [:,0 :K ] # 取前K个
98+ Z = np .dot (X_norm ,U_reduce )
6099 return Z
61100
101+ # 画一条线
102+ def drawline (plt ,p1 ,p2 ,line_type ):
103+ plt .plot (np .array ([p1 [0 ],p2 [0 ]]),np .array ([p1 [1 ],p2 [1 ]]),line_type )
104+
105+
106+ # 恢复数据
107+ def recoverData (Z ,U ,K ):
108+ X_rec = np .zeros ((Z .shape [0 ],U .shape [0 ]))
109+ U_recude = U [:,0 :K ]
110+ X_rec = np .dot (Z ,np .transpose (U_recude )) # 还原数据(近似)
111+ return X_rec
112+
113+ # 显示图片
114+ def display_imageData (imgData ):
115+ sum = 0
116+ '''
117+ 显示100个数(若是一个一个绘制将会非常慢,可以将要画的图片整理好,放到一个矩阵中,显示这个矩阵即可)
118+ - 初始化一个二维数组
119+ - 将每行的数据调整成图像的矩阵,放进二维数组
120+ - 显示即可
121+ '''
122+ m ,n = imgData .shape
123+ width = np .int32 (np .round (np .sqrt (n )))
124+ height = np .int32 (n / width );
125+ rows_count = np .int32 (np .floor (np .sqrt (m )))
126+ cols_count = np .int32 (np .ceil (m / rows_count ))
127+ pad = 1
128+ display_array = - np .ones ((pad + rows_count * (height + pad ),pad + cols_count * (width + pad )))
129+ for i in range (rows_count ):
130+ for j in range (cols_count ):
131+ max_val = np .max (np .abs (imgData [sum ,:]))
132+ display_array [pad + i * (height + pad ):pad + i * (height + pad )+ height ,pad + j * (width + pad ):pad + j * (width + pad )+ width ] = imgData [sum ,:].reshape (height ,width ,order = "F" )/ max_val # order=F指定以列优先,在matlab中是这样的,python中需要指定,默认以行
133+ sum += 1
134+
135+ plt .imshow (display_array ,cmap = 'gray' ) #显示灰度图像
136+ plt .axis ('off' )
137+ plt .show ()
62138
63139if __name__ == "__main__" :
64- PCA ()
140+ PCA_2D ()
141+ PCA_faceImage ()
0 commit comments