import numpy as np import matplotlib.pyplot as plt # --- FUNCTION DEFINITION CORRECTED HERE --- def g(t): """ Calculates g(t) = t^4 - 3t^3 + 2t^2 in a vectorized way. It handles both single float inputs and NumPy array inputs. If an element's absolute value is too large, it returns infinity to prevent overflow and handle plotting of diverging paths. """ # Use np.where for a vectorized if-else statement. # This correctly handles both scalar and array inputs. return np.where(np.abs(t) > 1e10, # Condition np.inf, # Value if condition is True t**4 - 3*t**3 + 2*t**2) # Value if condition is False # Define the gradient (derivative) of g(t) def grad_g(t): # This function is only called with scalars in our loop, # but it's good practice for it to be vectorization-safe as well. # Standard arithmetic operations are already vectorized in NumPy. return 4*t**3 - 9*t**2 + 4*t # Implement the gradient descent algorithm def gradient_descent(x0, alpha, iterations=100): history = [x0] x = x0 for _ in range(iterations): grad = grad_g(x) if np.isinf(x) or np.isnan(x) or np.abs(x) > 1e9: print(f"Divergence detected for alpha = {alpha}. Stopping early.") break x = x - alpha * grad history.append(x) if not np.isinf(x) and not np.isnan(x) and np.abs(x) <= 1e9: print(f"Alpha: {alpha:<.2f}, Current x: {x:<.2f}, g(x): {g(x):<.2f}") return history # --- Main part of the experiment --- # Set an initial point initial_xs = [-1.0, 0.5, 0.65, 2.5] # Define a list of different alpha values to test alphas_to_test = [0.05, 0.15, 0.25, 0.4, 0.5, 0.6] # Run the experiment for each alpha value and plot the results for initial_x in initial_xs: print(f"Running gradient descent from initial x = {initial_x}") # Create a plot plt.figure(figsize=(14, 8)) # Plot the function g(t) for context t_plot = np.linspace(-1.5, 3.5, 400) plt.plot(t_plot, g(t_plot), 'k-', label='g(t) = t^4 - 3t^3 + 2t^2', linewidth=2) for alpha in alphas_to_test: history = gradient_descent(initial_x, alpha) history_np = np.array(history) plt.plot(history_np, g(history_np), 'o-', label=f'alpha = {alpha}', markersize=4, alpha=0.8) # Add stationary points minima1 = 0.0 minima2 = (9 + np.sqrt(17)) / 8 maxima = (9 - np.sqrt(17)) / 8 plt.plot(minima1, g(minima1), 'g*', markersize=15, label=f'Local Minimum at t={minima1:.2f}') plt.plot(minima2, g(minima2), 'g*', markersize=15, label=f'Local Minimum at t≈{minima2:.2f}') plt.plot(maxima, g(maxima), 'rX', markersize=10, label=f'Local Maximum at t≈{maxima:.2f}') # Final plot formatting plt.title('Gradient Descent Convergence for Different Step Sizes (alpha, initial x={})'.format(initial_x)) plt.xlabel('t') plt.ylabel('g(t)') plt.legend() plt.grid(True) plt.ylim(-1, 5) plt.xlim(-1.5, 3.5) plt.savefig(f'Homework8_Q5_Gradient_Descent_Convergence_x={initial_x}.png')