# 神经网络向量化

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

## 正向传播

\begin{align} z^{(2)} &= W^{(1)} x + b^{(1)} \\ a^{(2)} &= f(z^{(2)}) \\ z^{(3)} &= W^{(2)} a^{(2)} + b^{(2)} \\ h_{W,b}(x) &= a^{(3)} = f(z^{(3)}) \end{align}

% 非向量化实现
for i=1:m,
z2 = W1 * x(:,i) + b1;
a2 = f(z2);
z3 = W2 * a2 + b2;
h(:,i) = f(z3);
end;

% 向量化实现 (忽略 b1, b2)
z2 = W1 * x;
a2 = f(z2);
z3 = W2 * a2;
h = f(z3)

% 低效的、非向量化的激活函数实现
function output = unvectorized_f(z)
output = zeros(size(z))
for i=1:size(z,1),
for j=1:size(z,2),
output(i,j) = 1/(1+exp(-z(i,j)));
end;
end;
end

% 高效的、向量化激活函数实现
function output = vectorized_f(z)
output = 1./(1+exp(-z));     % "./" 在Matlab或Octave中表示对矩阵的每个元素分别进行除法操作
end

% 正向传播的向量化实现
z2 = W1 * x + repmat(b1,1,m);
a2 = f(z2);
z3 = W2 * a2 + repmat(b2,1,m);
h = f(z3)

repmat(b1,1,m)的运算效果是，它把列向量b1拷贝m份，然后堆叠成如下矩阵：

$\begin{bmatrix} | & | & & | \\ {\rm b1} & {\rm b1} & \cdots & {\rm b1} \\ | & | & & | \end{bmatrix}.$

## 反向传播

\begin{align} \delta^{(3)} &= - (y - a^{(3)}) \bullet f'(z^{(3)}), \\ \delta^{(2)} &= ((W^{(2)})^T\delta^{(3)}) \bullet f'(z^{(2)}), \\ \nabla_{W^{(2)}} J(W,b;x,y) &= \delta^{(3)} (a^{(2)})^T, \\ \nabla_{W^{(1)}} J(W,b;x,y) &= \delta^{(2)} (a^{(1)})^T. \end{align}

gradW1 = zeros(size(W1));
for i=1:m,
delta3 = -(y(:,i) - h(:,i)) .* fprime(z3(:,i));
delta2 = W2'*delta3(:,i) .* fprime(z2(:,i));

end;

## 稀疏自编码网络

\begin{align} \delta^{(2)}_i = \left( \left( \sum_{j=1}^{s_{2}} W^{(2)}_{ji} \delta^{(3)}_j \right) + \beta \left( - \frac{\rho}{\hat\rho_i} + \frac{1-\rho}{1-\hat\rho_i} \right) \right) f'(z^{(2)}_i) . \end{align}

% 稀疏惩罚Delta
sparsity_delta = - rho ./ rho_hat + (1 - rho) ./ (1 - rho_hat);
for i=1:m,
...
delta2 = (W2'*delta3(:,i) + beta*sparsity_delta).* fprime(z2(:,i));
...
end;

## 中文译者

Language : English