math6601_projects/homework8/Homework8_Q5.py

import numpy as np
import matplotlib.pyplot as plt

# --- FUNCTION DEFINITION CORRECTED HERE ---
def g(t):
    """
    Calculates g(t) = t^4 - 3t^3 + 2t^2 in a vectorized way.
    It handles both single float inputs and NumPy array inputs.
    If an element's absolute value is too large, it returns infinity
    to prevent overflow and handle plotting of diverging paths.
    """
    # Use np.where for a vectorized if-else statement.
    # This correctly handles both scalar and array inputs.
    return np.where(np.abs(t) > 1e10,  # Condition
                    np.inf,             # Value if condition is True
                    t**4 - 3*t**3 + 2*t**2) # Value if condition is False

# Define the gradient (derivative) of g(t)
def grad_g(t):
    # This function is only called with scalars in our loop,
    # but it's good practice for it to be vectorization-safe as well.
    # Standard arithmetic operations are already vectorized in NumPy.
    return 4*t**3 - 9*t**2 + 4*t

# Implement the gradient descent algorithm
def gradient_descent(x0, alpha, iterations=100):
    history = [x0]
    x = x0
    for _ in range(iterations):
        grad = grad_g(x)

        if np.isinf(x) or np.isnan(x) or np.abs(x) > 1e9:
            print(f"Divergence detected for alpha = {alpha}. Stopping early.")
            break

        x = x - alpha * grad
        history.append(x)
    if not np.isinf(x) and not np.isnan(x) and np.abs(x) <= 1e9:
        print(f"Alpha: {alpha:<.2f}, Current x: {x:<.2f}, g(x): {g(x):<.2f}")
    return history

# --- Main part of the experiment ---

# Set an initial point
initial_xs = [-1.0, 0.5, 0.65, 2.5]

# Define a list of different alpha values to test
alphas_to_test = [0.05, 0.15, 0.25, 0.4, 0.5, 0.6]

# Run the experiment for each alpha value and plot the results
for initial_x in initial_xs:
    print(f"Running gradient descent from initial x = {initial_x}")
    # Create a plot
    plt.figure(figsize=(14, 8))

    # Plot the function g(t) for context
    t_plot = np.linspace(-1.5, 3.5, 400)
    plt.plot(t_plot, g(t_plot), 'k-', label='g(t) = t^4 - 3t^3 + 2t^2', linewidth=2)
    for alpha in alphas_to_test:
        history = gradient_descent(initial_x, alpha)
        history_np = np.array(history)
        plt.plot(history_np, g(history_np), 'o-', label=f'alpha = {alpha}', markersize=4, alpha=0.8)

    # Add stationary points
    minima1 = 0.0
    minima2 = (9 + np.sqrt(17)) / 8
    maxima = (9 - np.sqrt(17)) / 8
    plt.plot(minima1, g(minima1), 'g*', markersize=15, label=f'Local Minimum at t={minima1:.2f}')
    plt.plot(minima2, g(minima2), 'g*', markersize=15, label=f'Local Minimum at t≈{minima2:.2f}')
    plt.plot(maxima, g(maxima), 'rX', markersize=10, label=f'Local Maximum at t≈{maxima:.2f}')

    # Final plot formatting
    plt.title('Gradient Descent Convergence for Different Step Sizes (alpha, initial x={})'.format(initial_x))
    plt.xlabel('t')
    plt.ylabel('g(t)')
    plt.legend()
    plt.grid(True)
    plt.ylim(-1, 5)
    plt.xlim(-1.5, 3.5)
    plt.savefig(f'Homework8_Q5_Gradient_Descent_Convergence_x={initial_x}.png')