generated at
10. 線形代数の応用と発展

最小2乗法
lstsqr.py
from numpy import array, linspace, sqrt, random, linalg from numpy. import matplotlib.pyplot as plt n, m = 30, 1000 random.seed(2020) x = linspace(0.0, 1.0, m) w = random.normal(0.0, sqrt(1.0/m), m) y = w.cumsum() tA = array([x**j for j in range(n + 1)]) A = tA.T S = linalg.solve(tA.dot(A), tA.dot(y)) L = linalg.lstsq(A, y, rcond=None)[0] fig, axs = plt.subplots(1, 2, figsize=(15, 5)) for ax, B in zip(axs, [S, L]): z = B.dot(tA) ax.plot(x, y), ax.plot(x, z) plt.show()
ソースコード: lstsqr.py

左はlinalg.solve、右はlinalg.islsqr

複素平面に値をとる関数


最小2乗法(その2)
moji.py
from numpy import array, linspace, identity, exp, pi, linalg, ones from numpy.polynomial.legendre import Legendre import matplotlib.pyplot as plt with open('moji.txt', 'r') as fd: y = eval(fd.read()) m = len(y) x = linspace(0.0, 1.0, m) def phi1(n): return array([(x0 >= x).astype('int') for x0 in linspace(0, 1, n)]).T def phi2(n): return array([exp(2 * pi * k * 1j * x) for k in range(-n // 2, n // 2 + 1)]).T def phi3(n): return array([Legendre.basis(j, domain=[0, 1])(x) for j in range(n)]).T fig, axs = plt.subplots(3, 5, figsize=(16, 8)) for i, f in enumerate([phi1, phi2, phi3]): for j, n in enumerate([8, 16, 32, 64, 128]): ax = axs[i, j] c = linalg.lstsq(f(n), y, rcond=None)[0] z = f(n).dot(c) ax.scatter(z.real, z.imag, s=5), ax.plot(z.real, z.imag) ax.axis('scaled'), ax.set_xlim(-1, 1), ax.set_ylim(-1, 1) ax.tick_params(labelbottom=False, labelleft=False, color='white') plt.show()
ソースコード: moji.py
データファイル: moji.txt

上から、2値関数、フーリエ級数、多項式
左から、n=8, 16, 32, 64, 128


ベクトル値確率変数
probab1.py
from numpy import array from numpy.random import randint N = [1, 2, 3, 4, 5, 6] Omega = [(w1, w2) for w1 in N for w2 in N] def omega(): return Omega[randint(len(Omega))] def P(w): return 1 / len(Omega) def X(w): return array([w[0] + w[1], w[0] - w[1]]) def E(X): return sum([X(w) * P(w) for w in Omega]) for n in range(5): w = omega() print(X(w), end=' ') print() print(E(X))
ソースコード: probab1.py

probab2.py
from numpy.random import choice W1 = W2 = [1, 2, 3, 4, 5, 6] def X(w): return w[0] + w[1] def X1(w1): return X((w1, choice(W2))) for n in range(20): w1 = choice(W1) print(X1(w1), end=' ')
ソースコード: probab2.py

主成分分析
scatter.py
import numpy as np import vpython as vp import matplotlib.pyplot as plt with open('data.csv', 'r') as fd: lines = fd.readlines() data = np.array([eval(line) for line in lines[1:]]) def scatter3d(data): o = vp.vec(0, 0, 0) vp.curve(pos=[o, vp.vec(100, 0, 0)], color=vp.color.red) vp.curve(pos=[o, vp.vec(0, 100, 0)], color=vp.color.green) vp.curve(pos=[o, vp.vec(0, 0, 100)], color=vp.color.blue) vp.points(pos=[vp.vec(*a) for a in data], radius=3) def scatter2d(data): A = data.T fig, axs = plt.subplots(1, 3, figsize=(15, 5)) for n, B in enumerate([A[[0, 1]], A[[0, 2]], A[[1, 2]]]): s = B.dot(B.T) cor = s[0, 1] / np.sqrt(s[0, 0]) / np.sqrt(s[1, 1]) print(f'{cor:.3}') axs[n].scatter(B[0], B[1]) plt.show() if __name__ == '__main__': scatter3d(data) scatter2d(data)
ソースプログラム: scatter.py
データファイル: data.csv

実行結果: 相関係数
python
0.966 0.954 0.972



principal.py
from numpy.linalg import eigh from scatter import data, scatter2d, scatter3d n = len(data) mean = sum(data) / n C = data - mean A = C.T AAt = A.dot(C) / n E, U = eigh(AAt) print(E) scatter3d(C.dot(U)) scatter2d(C.dot(U))
ソースコード: principal.py

実行結果: 固有値と相関係数
python
[ 90.10130467 164.99627521 571.24846403] 9.91e-17 -7.87e-17 -3.56e-16



KL展開
KL2.py
import numpy as np import matplotlib.pyplot as plt s = 123 tmax, N = 100, 1000 dt = tmax / N np.random.seed(s) W = np.random.normal(0, dt, (2, N)) Noise = np.random.normal(0, 0.25, (4, N)) B = W.cumsum(axis=1) P = np.array([[1, 0], [1, 1], [0, 1], [1, -1]]) A = P.dot(B) + Noise U, S, V = np.linalg.svd(A) print(S) C = U[:, :2].dot(np.diag(S[:2]).dot(V[:2, :])) fig, axs = plt.subplots(1, 3, figsize=(20, 5)) T = np.linspace(0, tmax, N) for i in range(4): axs[0].plot(T, A[i]) for i in range(2): axs[1].plot(T, V[i]) for i in range(4): axs[2].plot(T, C[i]) plt.show()
ソースコード: KL2.py



手書き数字のKL展開
mean.py
import numpy as np import matplotlib.pyplot as plt N = 60000 with open('train-images.bin', 'rb') as fd: X = np.fromfile(fd, 'uint8', -1)[16:] X = X.reshape((N, 28, 28)) with open('train-labels.bin', 'rb') as fd: Y = np.fromfile(fd, 'uint8', -1)[8:] D = {y: [] for y in set(Y)} for x, y in zip(X, Y): D[y].append(x) print([len(D[y]) for y in sorted(D)]) fig, ax = plt.subplots(1, 10, figsize=(10, 1)) for y in D: A = 255 - sum([x.astype('float') for x in D[y]]) / len(D[y]) ax[y].imshow(A, 'gray') ax[y].tick_params(labelbottom=False, labelleft=False, color='white') plt.show()
ソースコード: mean.py


mean2.py
import numpy as np import matplotlib.pyplot as plt N = 60000 with open('train-images.bin', 'rb') as fd: X = np.fromfile(fd, 'uint8', -1)[16:] X = X.reshape((N, 28, 28)) with open('train-labels.bin', 'rb') as fd: Y = np.fromfile(fd, 'uint8', -1)[8:] D = {y: [] for y in set(Y)} for x, y in zip(X, Y): D[y].append(x) print([len(D[y]) for y in sorted(D)]) A = 255 - sum([x.astype('float') for x in X]) / N plt.imshow(A, 'gray') plt.tick_params(labelbottom=False, labelleft=False, color='white') plt.show()
ソースコード: mean2.py


mean_KL2.py
import numpy as np import matplotlib.pyplot as plt cutoff = 14 N = 60000 with open('train-images.bin', 'rb') as fd: X = np.fromfile(fd, 'uint8', -1)[16:] X = X.reshape((N, 28, 28)) with open('train-labels.bin', 'rb') as fd: Y = np.fromfile(fd, 'uint8', -1)[8:] D = {y: [] for y in set(Y)} for x, y in zip(X, Y): D[y].append(x) A = sum([x.astype('float') for x in X]) / N U, Sigma, V = np.linalg.svd(A) print(Sigma) def proj(X, U, V, k): U1, V1 = U[:, :k], V[:k, :] P, Q = U1.dot(U1.T), V1.T.dot(V1) return P.dot(X.dot(Q)) fig, axs = plt.subplots(10, 10) for y in D: for k in range(10): ax = axs[y][k] A = D[y][k] B = proj(A, U, V, cutoff) ax.imshow(255 - B, 'gray') ax.tick_params(labelbottom=False, labelleft=False, color='white') plt.show()
ソーシコード: mean_KL2.py




線形回帰による確率変数の実現値の推定
estimate1.py
from numpy import array, random, linalg import matplotlib.pyplot as plt n = 10 x1 = [random.uniform(-0.5, 0.5) for n in range(n)] x2 = [random.uniform(-0.5, 0.5) for n in range(n)] x3 = [random.normal(0, 1) for n in range(n)] x4 = [random.normal(0, 1) for n in range(n)] A = array([[1, 0, 1, 0], [0, 1, 0, 1]]) y1, y2 = A.dot([x1, x2, x3, x4]) B = linalg.pinv(A) z1, z2, z3, z4 = B.dot([y1, y2]) plt.scatter(x1, x2, s=50, marker='o') plt.scatter(y1, y2, s=50, marker='x') plt.scatter(z1, z2, s=50, marker='s') xy = xz = 0 for u1, u2, v1, v2, w1, w2 in zip(x1, x2, y1, y2, z1, z2): plt.plot([u1, v1], [u2, v2], color='black') plt.plot([u1, w1], [u2, w2], color='black') xy += (u1 - v1)**2 + (u2 - v2)**2 xz += (u1 - w1)**2 + (u2 - w2)**2 print(f'||x-y||^2 = {xy / n}') print(f'||x-z||^2 = {xz / n}') plt.show()
ソースコード: estimate1.py


estimate2.py
from numpy import zeros, arange, random, linalg import matplotlib.pyplot as plt N, rho, sigma, tau = 100, 1.0, 0.1, 0.1 random.seed(123) x, y = zeros(N), zeros(N) for i in range(N): x[i] = rho*x[i - 1] + sigma*random.normal(0, 1) y[i] = x[i] + tau*random.normal(0, 1) A = zeros((N, 2 * N)) for i in range(N): for j in range(i + 1): A[i, j] = rho**(i - j) * sigma A[i, N + i] = tau B = linalg.pinv(A) v = B.dot(y) z = zeros(N) for i in range(N): z[i] = rho*z[i - 1] + sigma*v[i] print(f'(y-x)^2 = {sum((y-x) ** 2)}') print(f'(z-x)^2 = {sum((z-x) ** 2)}') plt.figure(figsize=(20, 5)) t = arange(N) plt.plot(t, x, color='red') plt.plot(t, y, color='green') plt.plot(t, z, color='blue') plt.show()
ソースコード: estimate2.py

実行結果: 2乗誤差
python
(y-x)^2 = 1.2353743601911984 (z-x)^2 = 0.43958177113082514

赤が真値、緑が観測値、青が推定値

カルマン・フィルタ
kalman2.py
from numpy import zeros, random import matplotlib.pyplot as plt random.seed(123) N, r, s, t = 100, 1.0, 0.1, 0.1 T = range(N) x, y, z = zeros(N), zeros(N), zeros(N) a = s**2 for i in range(N): x[i] = r * x[i - 1] + s * random.normal(0, 1) y[i] = x[i] + t * random.normal(0, 1) z[i] = r * z[i - 1] + a / (t**2 + a) * (y[i] - r * z[i - 1]) c = a - a**2 / (t**2 + a) a = r * c + s**2 print(f'(y-x)^2 = {sum((y-x)**2)}') print(f'(z-x)^2 = {sum((z-x)**2)}') plt.figure(figsize=(20, 5)) plt.plot(T, x, color='red') plt.plot(T, y, color='green') plt.plot(T, z, color='blue') plt.show()
ソースコード: kalman2.py

実行結果: 2乗誤差
python
(y-x)^2 = 1.2353743601911977 (z-x)^2 = 0.6170319148795381

赤が真値、緑が観測値、青が推定値