82 lines
3.0 KiB
Python
82 lines
3.0 KiB
Python
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
# --- FUNCTION DEFINITION CORRECTED HERE ---
|
|
def g(t):
|
|
"""
|
|
Calculates g(t) = t^4 - 3t^3 + 2t^2 in a vectorized way.
|
|
It handles both single float inputs and NumPy array inputs.
|
|
If an element's absolute value is too large, it returns infinity
|
|
to prevent overflow and handle plotting of diverging paths.
|
|
"""
|
|
# Use np.where for a vectorized if-else statement.
|
|
# This correctly handles both scalar and array inputs.
|
|
return np.where(np.abs(t) > 1e10, # Condition
|
|
np.inf, # Value if condition is True
|
|
t**4 - 3*t**3 + 2*t**2) # Value if condition is False
|
|
|
|
# Define the gradient (derivative) of g(t)
|
|
def grad_g(t):
|
|
# This function is only called with scalars in our loop,
|
|
# but it's good practice for it to be vectorization-safe as well.
|
|
# Standard arithmetic operations are already vectorized in NumPy.
|
|
return 4*t**3 - 9*t**2 + 4*t
|
|
|
|
# Implement the gradient descent algorithm
|
|
def gradient_descent(x0, alpha, iterations=100):
|
|
history = [x0]
|
|
x = x0
|
|
for _ in range(iterations):
|
|
grad = grad_g(x)
|
|
|
|
if np.isinf(x) or np.isnan(x) or np.abs(x) > 1e9:
|
|
print(f"Divergence detected for alpha = {alpha}. Stopping early.")
|
|
break
|
|
|
|
x = x - alpha * grad
|
|
history.append(x)
|
|
if not np.isinf(x) and not np.isnan(x) and np.abs(x) <= 1e9:
|
|
print(f"Alpha: {alpha:<.2f}, Current x: {x:<.2f}, g(x): {g(x):<.2f}")
|
|
return history
|
|
|
|
# --- Main part of the experiment ---
|
|
|
|
# Set an initial point
|
|
initial_xs = [-1.0, 0.5, 0.65, 2.5]
|
|
|
|
# Define a list of different alpha values to test
|
|
alphas_to_test = [0.05, 0.15, 0.25, 0.4, 0.5, 0.6]
|
|
|
|
# Run the experiment for each alpha value and plot the results
|
|
for initial_x in initial_xs:
|
|
print(f"Running gradient descent from initial x = {initial_x}")
|
|
# Create a plot
|
|
plt.figure(figsize=(14, 8))
|
|
|
|
# Plot the function g(t) for context
|
|
t_plot = np.linspace(-1.5, 3.5, 400)
|
|
plt.plot(t_plot, g(t_plot), 'k-', label='g(t) = t^4 - 3t^3 + 2t^2', linewidth=2)
|
|
for alpha in alphas_to_test:
|
|
history = gradient_descent(initial_x, alpha)
|
|
history_np = np.array(history)
|
|
plt.plot(history_np, g(history_np), 'o-', label=f'alpha = {alpha}', markersize=4, alpha=0.8)
|
|
|
|
# Add stationary points
|
|
minima1 = 0.0
|
|
minima2 = (9 + np.sqrt(17)) / 8
|
|
maxima = (9 - np.sqrt(17)) / 8
|
|
plt.plot(minima1, g(minima1), 'g*', markersize=15, label=f'Local Minimum at t={minima1:.2f}')
|
|
plt.plot(minima2, g(minima2), 'g*', markersize=15, label=f'Local Minimum at t≈{minima2:.2f}')
|
|
plt.plot(maxima, g(maxima), 'rX', markersize=10, label=f'Local Maximum at t≈{maxima:.2f}')
|
|
|
|
# Final plot formatting
|
|
plt.title('Gradient Descent Convergence for Different Step Sizes (alpha, initial x={})'.format(initial_x))
|
|
plt.xlabel('t')
|
|
plt.ylabel('g(t)')
|
|
plt.legend()
|
|
plt.grid(True)
|
|
plt.ylim(-1, 5)
|
|
plt.xlim(-1.5, 3.5)
|
|
plt.savefig(f'Homework8_Q5_Gradient_Descent_Convergence_x={initial_x}.png')
|
|
|