# 熵的定義

(1)
\begin{align} H(X) = \operatorname{E}(I(X)). \end{align}

(2)
\begin{align} H(X) = \sum_{i=1}^n {p(x_i)\,I(x_i)} = -\sum_{i=1}^n {p(x_i) \log p(x_i)} \end{align}

H(X) 乃是一個凸函數 (Convex Function)，因此具有許多良好的數學特性，這些特性讓熵再數學上成為一個相當有價值的工具。圖一顯示了一個只有兩個可能性 $p(x=1), p(x=0)$ 的機率分布，您可以從中觀察到這個凸函數的特性。

# 條件熵

(3)
\begin{eqnarray} H(Y|X)\ &\equiv& \sum_{x\in\mathcal X}\,p(x)\,H(Y|X=x)\\ &=& -\sum_{x\in\mathcal X}p(x)\sum_{y\in\mathcal Y}\,p(y|x)\,\log\,p(y|x)\\ &=& -\sum_{x\in\mathcal X}\sum_{y\in\mathcal Y}\,p(y,x)\,\log\,p(y|x)\\ &=& -\sum_{x\in\mathcal X, y\in\mathcal Y}p(x,y)\log\,p(y|x). \end{eqnarray}

(4)
\begin{align} H(Y|X)\,=\,H(Y,X)-H(X) \, . \end{align}

(5)
\begin{eqnarray} H(X,Y) &=& -\sum_{x\in\mathcal X, y\in\mathcal Y}p(x,y)log\,p(x,y)\\ &=& -\sum_{x\in\mathcal X, y\in\mathcal Y}p(x,y)log\left(p(y|x)p(x)\right)\\ &=& -\sum_{x\in\mathcal X, y\in\mathcal Y}p(x,y)log\,p(y|x) - \sum_{x\in\mathcal X, y\in\mathcal Y} p(x,y) log\,p(x)\\ &=& H(Y|X)-\sum_{x\in\mathcal X, y\in\mathcal Y}p(x,y)log\,p(x)\\ &=& H(Y|X)-\sum_{x\in\mathcal X}\sum_{y\in\mathcal Y}p(x,y)log\,p(x)\\ &=& H(Y|X)-\sum_{x\in\mathcal X}log\,p(x)\sum_{y\in\mathcal Y}p(x,y)\\ &=& H(Y|X)-\sum_{x\in\mathcal X}(log\,p(x))p(x)\\ &=& H(Y|X)-\sum_{x\in\mathcal X}p(x)log\,p(x)\\ &=& H(Y|X)+H(X)\\ &=& H(X)+H(Y|X)\\ \end{eqnarray}

# 互資訊

(6)
\begin{align} I(X;Y) = \sum_{y \in Y} \sum_{x \in X} p(x,y) \log{ \left( \frac{p(x,y)}{p_1(x)\,p_2(y)} \right) }, \,\! \end{align}

(7)
\begin{align} \log{ \left( \frac{p(x,y)}{p_1(x)\,p_2(y)} \right) } = \log{1} = 0 \end{align}

(8)
\begin{eqnarray} I(X;Y) &=& H(X) - H(X|Y) \\ &=& H(Y) - H(Y|X) \\ &=& H(X) + H(Y) - H(X,Y) \\ &=& H(X,Y) - H(X|Y) - H(Y|X) \end{eqnarray}

(9)
$$d(X,Y) = H(X,Y)-I(X;Y)$$

(10)
\begin{align} D(X,Y) = d(X,Y)/H(X,Y) \le 1 \end{align}

# 條件互資訊

(11)
\begin{align} I(X;Y|Z) & \equiv & \mathbb E_Z \big(I(X;Y)|Z\big) & = & \sum_{z\in Z} \sum_{y\in Y} \sum_{x\in X} p_Z(z) p_{X,Y|Z}(x,y|z) \log \frac{p_{X,Y|Z}(x,y|z)}{p_{X|Z}(x|z)p_{Y|Z}(y|z)} \end{align}

(12)
\begin{align} I(X;Y|Z) = \sum_{z\in Z} \sum_{y\in Y} \sum_{x\in X} p_{X,Y,Z}(x,y,z) \log \frac{p_Z(z)p_{X,Y,Z}(x,y,z)}{p_{X,Z}(x,z)p_{Y,Z}(y,z)}. \end{align}

(13)
\begin{align} I(X_1;\,...\,;X_n) = I(X_1;\,...\,;X_{n-1}) - I(X_1;\,...\,;X_{n-1}|X_n) \end{align}
(14)
\begin{align} I(X_1;\,...\,;X_{n-1}|X_n) = \mathbb E_{X_n} \big(I(X_1;\,...\,;X_{n-1})|X_n\big) \end{align}