Key Word(s): Neural Networks, Gradient Descent
Set a learning rate
between 1 and 0.001 and max_iter
(maximum number of iterations) to a number between 5 and 50 . Then fill in the blanks to:
- Find the derivative,
delta
, off(x)
wherex = cur_x
- Update the current value of x,
cur_x
- Create the boolean expression has_converged that ends the algorithm if
True
You can experiment with how different values for max_iter
, and learning_rate
affect your results. Change the numbers in the figsize()
so your figure grid is more visible.
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
Here is our function of interest and its derivative.
def f(x):
return np.cos(3*np.pi*x)/x
def der_f(x):
'''derivative of f(x)'''
return -(3*np.pi*x*np.sin(3*np.pi*x)+np.cos(3*np.pi*x))/x**2
# the part of the function we will focus on
FUNC_RANGE = (0.1, 3)
x = np.linspace(*FUNC_RANGE, 200)
fig, ax = plt.subplots(figsize=(4,3))
plt.plot(x,f(x))
plt.xlim(x.min(), x.max());
plt.xlabel('x')
plt.ylabel('y');
These 2 functions are just here to help us visualize the gradient descent. You should inspect them later to see how the work.
def get_tangent_line(x, x_range=.5):
'''returns information about the tangent line of f(x)
at a given x
Returns:
x: np.array - x-values in the tangent line segment
y: np.array - y-values in tangent line segment
m: float - slope of tangent line'''
y = f(x)
m = der_f(x)
# get tangent line points
# slope point form: y-y_1 = m(x-x_1)
# y = m(x-x_1)+y_1
x1, y1 = x, y
x = np.linspace(x1-x_range/2, x1+x_range/2, 50)
y = m*(x-x1)+y1
return x, y, m
def plot_it(cur_x, title='', ax=plt):
'''plots the point cur_x on the curve f(x) as well as
the tangent line on f(x) where x=cur_x'''
y = f(x)
ax.plot(x,y)
ax.scatter(cur_x, f(cur_x), c='r', s=80, alpha=1);
x_tan, y_tan, der = get_tangent_line(cur_x)
ax.plot(x_tan, y_tan, ls='--', c='r')
# indicate if our location is outside the x range
if cur_x > x.max():
ax.axvline(x.max(), c='r', lw=3)
ax.arrow(x.max()/1.6, y.max()/2, x.max()/5, 0, color='r', head_width=.25)
if cur_x < x.min():
ax.axvline(x.min(), c='r', lw=3)
ax.arrow(x.max()/2.5, y.max()/2, -x.max()/5, 0, color='r', head_width=.25)
ax.set_xlim(x.min(), x.max())
ax.set_ylim(-3.5, 3.5)
ax.set_title(title)
Set a learning rate between 1 and 0.001. Then fill in the blanks to:
- Find the derivative,
delta
, off(x)
wherex = cur_x
- Update the current value of x,
cur_x
- Create the boolean expression
has_converged
that ends the algorithm ifTrue
You can experiment with how different values for learning_rate
and max_iter
affect your results.
### edTest(test_convergence) ###
converged = False
max_iter = __
# Play with figsize=(25,20) to make more visible plots
fig, axs = plt.subplots(max_iter//5,5, figsize=(25,20), sharey=True)
cur_x = 0.75 # initial value of x
learning_rate = __ # controls how large our update steps are
epsilon = 0.0025 # minimum update magnitude
for i, ax in enumerate(axs.ravel()):
plot_it(cur_x, title=f"{i} step{'' if i == 1 else 's'}", ax=ax)
prev_x = cur_x # remember what x was
delta = __ # find derivative (Hint: use der_f())
cur_x = __ # update current x-value (Hint: use learning_rate & delta)
# stop algorithm if we've converged
# boolean expression (Hint: last update size & epsilon)
has_converged = __
if has_converged:
converged = True
# hide unused subplots
for ax in axs.ravel()[i+1:]:
ax.axis('off')
break
plt.tight_layout()
if not converged:
print('Did not converge!')
else:
converged = True
print('Converged to a local minimum!')
Did you get $x$ to converge to a local minimum? Did it converge at all? If not, how would you describe the problem? What might we do to address this issue?
Mindchow 🍲¶
Why does the algorithm not find the first local minimum which is actually the global minimum?
Try changing the initial value of cur_x
to see if you can have the function converge to this global minimum
your answer here