\begin{eqnarray*} y & = & f(t) \\ t & = & g(x) \\ y & =& f(g(x)) \\ \frac {dy}{dx} & = & \frac {dy}{dt} * \frac {dt}{dx} \\ & & \frac {dy}{dt} = f'(t) = f'(g(x)) \;\; \text{and } \\ & & \frac {dt}{dx} = g'(x) \\ \therefore{ \;\; } \frac {dy}{dx} & = & f'(g(x)) * g'(x) \\ \end{eqnarray*}
\begin{eqnarray*} y & = & (2x^2 + 1)^2 \\ t & = & 2x^2 + 1 \\ y & = & t^2 \\ t & = & 2x^2 + 1 \\ \\ &\phantom{=}\, \frac{dy}{dt} & = 2t \\ &\phantom{=}\, & = 2 (2x^2 + 1) \\ &\phantom{=}\, & = (4x^2 + 2) \\ \\ &\phantom{=}\, \frac{dt}{dx} & = 4x \\ \\ \frac{dy}{dx} & = & \frac{dy}{dt}*\frac{dt}{dx} \\ & = & (4x^2 + 2) * 4x \\ & = & 16x^3 + 8x \\ \end{eqnarray*}
see gradient descent
\begin{eqnarray*}
\because{ \;\; } \text{predicted value } \; \hat{y} & = & a + b x \\
\text{and }\;\; \text{residual} & = & y - \hat{y} \\
\therefore{} \;\; \text{residual}^2 & = & (y - (a + b x)) \\
\therefore{} \sum{\text{residual}^2} & = & \sum{(y - (a + b x))^2} \\
& = & \text{SSE, sum of square residuals} \\
\\
\dfrac{\text{dSSE}}{\text{da}} & = & \\
\end{eqnarray*}
y.hat = a + b * x
a = intercept
residuals = (y - y.hat)
d.sum.of.residuals^2 / d.intercept
= d.sum.of.residuals^2 / d.sum.of.residuals * d.sum.of.residuals / d.intercept
= (2 * residual) * d(y - y.hat)/d.intercept
= (2 * residual) * d(y - (a + bx))
= (2 * residual) * d(y - a - bx)
= (2 * residual) * -1
= -2 * residual
y.hat = a + b * x
b = slope
d.sum.of.square.res / d.slope
= d.sum.of.square.res / d.sum.of.res * d.sum.of.res / d.slope
= d.sum.of.square.res / d.slope
= (2 * residual) * d(y - a - bx)
= (2 * residual) * - x
= - 2 * x * residual