import numpy as np
import matplotlib.pyplot as plt

# --- 1. Define Function, Gradient, and Hessian ---
# Objective function: Rosenbrock function
def objective_function(x):
    x1, x2 = x[0], x[1]
    return 100 * (x2 - x1**2)**2 + (1 - x1)**2

# Gradient vector calculation
def gradient(x):
    x1, x2 = x[0], x[1]
    df_dx1 = -400 * x1 * (x2 - x1**2) - 2 * (1 - x1)
    df_dx2 = 200 * (x2 - x1**2)
    return np.array([df_dx1, df_dx2])

# Hessian matrix calculation
def hessian(x):
    x1, x2 = x[0], x[1]
    d2f_dx1dx1 = -400 * (x2 - 3 * x1**2) + 2
    d2f_dx1dx2 = -400 * x1
    d2f_dx2dx1 = -400 * x1
    d2f_dx2dx2 = 200
    return np.array([[d2f_dx1dx1, d2f_dx1dx2], [d2f_dx2dx1, d2f_dx2dx2]])

# --- 2. Backtracking Line Search ---
def backtracking_line_search(x, p, grad, alpha_bar=1.0, rho=0.5, c1=1e-3):
    """
    Finds a step size alpha that satisfies the 1st Wolfe condition (Armijo condition).
    Condition: f(x + alpha*p) <= f(x) + c1 * alpha * grad^T * p
    """
    alpha = alpha_bar
    f_val = objective_function(x)
    grad_dot_p = np.dot(grad, p)
    
    # Loop until condition is met
    while objective_function(x + alpha * p) > f_val + c1 * alpha * grad_dot_p:
        alpha *= rho
        # Safety break to prevent infinite loops if alpha becomes too small
        if alpha < 1e-16: 
            break
    return alpha

# --- 3. Optimization Algorithms ---
def run_optimization(method, x0, tol_ratio=1e-12, max_iter=20000):
    """
    Runs the specified optimization method.
    Returns: list of alphas used, total iterations
    """
    x = x0.copy()
    grad_0_norm = np.linalg.norm(gradient(x0))
    
    alphas = []
    
    print(f"[{method}] Starting from {x0}...")
    
    for i in range(max_iter):
        grad = gradient(x)
        grad_norm = np.linalg.norm(grad)
        
        # Convergence Check: ||grad f(xk)|| < 10^-12 * ||grad f(x0)||
        if grad_norm < tol_ratio * grad_0_norm:
            print(f"   -> Converged at iteration {i}. Final Loss: {objective_function(x):.6e}")
            return alphas, i
            
        # Determine Search Direction
        if method == "Steepest_Descent":
            direction = -grad
        elif method == "Newton":
            hess = hessian(x)
            try:
                # Solve H * d = -g for d
                direction = np.linalg.solve(hess, -grad) 
            except np.linalg.LinAlgError:
                # Fallback if Hessian is singular
                direction = -grad

        # Perform Backtracking Line Search
        alpha = backtracking_line_search(x, direction, grad, alpha_bar=1.0, rho=0.5, c1=1e-3)
        
        # Update Position
        x = x + alpha * direction
        
        alphas.append(alpha)
        
    print(f"   -> Did not converge within {max_iter} iterations.")
    print(f"   -> Final Gradient Norm: {grad_norm:.6e}")
    return alphas, max_iter

# --- 4. Main Execution and Plotting ---

# Configuration
initial_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0])]
methods = ["Steepest_Descent", "Newton"]

# Dictionary to hold simulation results
results = {}

# Run simulations
for x0 in initial_points:
    for method in methods:
        # Construct a unique key for storage
        key = f"{method}_x0_{x0}"
        alphas, num_iters = run_optimization(method, x0)
        results[key] = {'alphas': alphas, 'iters': num_iters, 'x0': x0}

# Plot configuration
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
plt.subplots_adjust(hspace=0.4, wspace=0.3)

# Mapping results to subplots
plot_mapping = {
    ("Steepest_Descent", str(initial_points[0])): (0, 0),
    ("Newton", str(initial_points[0])): (0, 1),
    ("Steepest_Descent", str(initial_points[1])): (1, 0),
    ("Newton", str(initial_points[1])): (1, 1),
}

for key, data in results.items():
    method_name = key.split("_x0_")[0]
    x0_str = str(data['x0'])
    
    ax_idx = plot_mapping[(method_name, x0_str)]
    ax = axes[ax_idx]
    
    y_data = data['alphas']
    x_data = range(len(y_data))
    
    # --- PLOTTING LOGIC MODIFICATION ---
    # For Steepest Descent, show only first {limit} steps
    if method_name == "Steepest_Descent":
        limit = 30
        if len(y_data) > limit:
            y_data = y_data[:limit]
            x_data = range(limit)
            title_suffix = f" (First {limit} Steps Shown)"
        else:
            title_suffix = ""
    else:
        title_suffix = ""
        
    ax.plot(x_data, y_data, marker='o', linestyle='-', markersize=4, linewidth=1.5)
    
    # Titles and Labels
    ax.set_title(f"{method_name}\nStart: {data['x0']}\nTotal Iters: {data['iters']}{title_suffix}")
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Step Size (Alpha)")
    ax.grid(True, which='both', linestyle='--', alpha=0.7)
    
    # Add annotation for total iterations
    ax.text(0.95, 0.95, f"Total Iters: {data['iters']}", 
            transform=ax.transAxes, 
            verticalalignment='top', horizontalalignment='right',
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Save and Show
#plt.suptitle("Step Size vs Iteration (Rosenbrock Function)", fontsize=16)
plt.savefig("optimization_results.png")
#plt.show()