从最小化KL散度到最大化ELBO

$$ \begin{aligned} \text{solve } q_\phi(z) \rightarrow p(z|x) \\ \text{KL}(q_\phi(z) \parallel p(z|x)) &= \int q_\phi(z) \ln \frac{q_\phi(z)}{p(z|x)} \, dz \\ &= \int q_\phi(z) \ln q_\phi(z) \, dz - \int q_\phi(z) \ln p(z|x) \, dz \\ &= \mathbb{E}q [\ln q\phi(z)] - \mathbb{E}_q [\ln p(z|x)] \\ &= \mathbb{E}q [\ln q\phi(z)] - \mathbb{E}_q [\ln p(z,x) - \ln p(x)] \\ &= \mathbb{E}q [\ln q\phi(z)] - \mathbb{E}q [\ln p(z,x)] + \mathbb{E}q [\ln p(x)] \\ &= \mathbb{E}q [\ln q\phi(z)] - \mathbb{E}q [\ln p(z,x)] + \ln p(x) \\ &= \mathbb{E}q [\ln q\phi(z)] - \mathbb{E}q [\ln p(z,x)] \\ \min\phi \text{KL}(q\phi(z) \parallel p(z|x)) & = \min\phi \mathbb{E}q \left[ \ln \frac{q\phi(z)}{p(z,x)} \right]\\ & = \max\phi \mathcal{L}(\phi) \end{aligned} $$

ELBO梯度转为期望

$$ \begin{aligned} \mathcal{L}(\phi) & =\mathrm{E}{q}\left[\ln{\frac{p(x,z)}{q\phi(z)} }\right] \\ & = \int q_\phi(z)\left(\ln{p(x,z)}-\ln{q_\phi(z)}\right)dz \\\\ \text{note } &q_\phi(z)=q,\ p(x,z)=p,\ \nabla_\phi=\nabla\\\\ \nabla\mathcal{L}(\phi) &= \int q_\phi(z)\left(\ln{p(x,z)}-\ln{q_\phi(z)}\right)dz = \int ( (\ln{p}-\ln{q})\nabla{q} - \frac{\nabla{q}}{q}\cdot q) dz = \int q\cdot \frac{\nabla{q}}{q} (\ln{p}-\ln q - 1 )dz \\ &= \mathrm{E}_q \left[\frac{\nabla q}{q} (\ln{p}-\ln q - 1)\right] = \mathrm{E}_q \left[\nabla{\ln{q}} (\ln{p}-\ln q) \right] - \mathrm{E}_q\left[\nabla \ln{q}\right] = \mathrm{E}_q \left[\frac{\nabla q}{q} (\ln{p}-\ln q) \right] - \mathrm{E}q\left[ \frac{\nabla q}{q}\right] \\\\ \mathrm{E}q\left[ \frac{\nabla q}{q}\right] &= \int \nabla q\cdot dz = \nabla \int q\cdot dz = \nabla 1 = 0 \\\\ \nabla\phi \mathcal{L}(\phi) &= \mathrm{E}q \left[\nabla{\ln q\phi(z)} (\ln{p(x,z)}-\ln q\phi(z)) \right] \end{aligned} $$