diff --git a/doc/LectureNotes/chapteroptimization.ipynb b/doc/LectureNotes/chapteroptimization.ipynb
index 5d83c5b50..fea66ed89 100644
--- a/doc/LectureNotes/chapteroptimization.ipynb
+++ b/doc/LectureNotes/chapteroptimization.ipynb
@@ -2727,7 +2727,7 @@
    },
    "source": [
     "$$\n",
-    "\\boldsymbol{\\mathbf{m}}_t={\\mathbf{m}_t \\over 1-\\beta_1^t} \\nonumber\n",
+    "\\hat{\\mathbf{m}}_t={\\mathbf{m}_t \\over 1-\\beta_1^t} \\nonumber\n",
     "$$"
    ]
   },
@@ -2739,7 +2739,7 @@
    },
    "source": [
     "$$\n",
-    "\\boldsymbol{\\mathbf{s}}_t ={\\mathbf{s}_t \\over1-\\beta_2^t} \\nonumber\n",
+    "\\hat{\\mathbf{s}}_t ={\\mathbf{s}_t \\over1-\\beta_2^t} \\nonumber\n",
     "$$"
    ]
   },
@@ -2751,7 +2751,7 @@
    },
    "source": [
     "$$\n",
-    "\\boldsymbol{\\theta}_{t+1}=\\boldsymbol{\\theta}_t - \\eta_t { \\boldsymbol{\\mathbf{m}}_t \\over \\sqrt{\\boldsymbol{\\mathbf{s}}_t} +\\epsilon}, \\nonumber\n",
+    "\\boldsymbol{\\theta}_{t+1}=\\boldsymbol{\\theta}_t - \\eta_t { \\hat{\\mathbf{m}}_t \\over \\sqrt{\\hat{\\mathbf{s}}_t} +\\epsilon}, \\nonumber\n",
     "$$"
    ]
   },
@@ -2786,8 +2786,8 @@
     "Like in RMSprop, the effective step size of a parameter depends on the\n",
     "magnitude of its gradient squared.  To understand this better, let us\n",
     "rewrite this expression in terms of the variance\n",
-    "$\\boldsymbol{\\sigma}_t^2 = \\boldsymbol{\\mathbf{s}}_t -\n",
-    "(\\boldsymbol{\\mathbf{m}}_t)^2$. Consider a single parameter $\\theta_t$. The\n",
+    "$\\hat{\\sigma}_t^2 = \\hat{\\mathbf{s}}_t -\n",
+    "(\\hat{\\mathbf{m}}_t)^2$. Consider a single parameter $\\theta_t$. The\n",
     "update rule for this parameter is given by"
    ]
   },
@@ -2799,7 +2799,7 @@
    },
    "source": [
     "$$\n",
-    "\\Delta \\theta_{t+1}= -\\eta_t { \\boldsymbol{m}_t \\over \\sqrt{\\sigma_t^2 +  m_t^2 }+\\epsilon}.\n",
+    "\\Delta \\theta_{t+1}= -\\eta_t { \\hat{\\mathbf{m}}_t \\over \\sqrt{\\hat{\\sigma}_t^2 +  \\hat{m}_t^2 }+\\epsilon}.\n",
     "$$"
    ]
   },