diff --git a/homework11/Homework11_Q5.py b/homework11/Homework11_Q5.py new file mode 100644 index 0000000..b38baa1 --- /dev/null +++ b/homework11/Homework11_Q5.py @@ -0,0 +1,159 @@ +import numpy as np +import matplotlib.pyplot as plt + +# --- 1. Define Function, Gradient, and Hessian --- +# Objective function: Rosenbrock function +def objective_function(x): + x1, x2 = x[0], x[1] + return 100 * (x2 - x1**2)**2 + (1 - x1)**2 + +# Gradient vector calculation +def gradient(x): + x1, x2 = x[0], x[1] + df_dx1 = -400 * x1 * (x2 - x1**2) - 2 * (1 - x1) + df_dx2 = 200 * (x2 - x1**2) + return np.array([df_dx1, df_dx2]) + +# Hessian matrix calculation +def hessian(x): + x1, x2 = x[0], x[1] + d2f_dx1dx1 = -400 * (x2 - 3 * x1**2) + 2 + d2f_dx1dx2 = -400 * x1 + d2f_dx2dx1 = -400 * x1 + d2f_dx2dx2 = 200 + return np.array([[d2f_dx1dx1, d2f_dx1dx2], [d2f_dx2dx1, d2f_dx2dx2]]) + +# --- 2. Backtracking Line Search --- +def backtracking_line_search(x, p, grad, alpha_bar=1.0, rho=0.5, c1=1e-3): + """ + Finds a step size alpha that satisfies the 1st Wolfe condition (Armijo condition). + Condition: f(x + alpha*p) <= f(x) + c1 * alpha * grad^T * p + """ + alpha = alpha_bar + f_val = objective_function(x) + grad_dot_p = np.dot(grad, p) + + # Loop until condition is met + while objective_function(x + alpha * p) > f_val + c1 * alpha * grad_dot_p: + alpha *= rho + # Safety break to prevent infinite loops if alpha becomes too small + if alpha < 1e-16: + break + return alpha + +# --- 3. Optimization Algorithms --- +def run_optimization(method, x0, tol_ratio=1e-12, max_iter=20000): + """ + Runs the specified optimization method. + Returns: list of alphas used, total iterations + """ + x = x0.copy() + grad_0_norm = np.linalg.norm(gradient(x0)) + + alphas = [] + + print(f"[{method}] Starting from {x0}...") + + for i in range(max_iter): + grad = gradient(x) + grad_norm = np.linalg.norm(grad) + + # Convergence Check: ||grad f(xk)|| < 10^-12 * ||grad f(x0)|| + if grad_norm < tol_ratio * grad_0_norm: + print(f" -> Converged at iteration {i}. Final Loss: {objective_function(x):.6e}") + return alphas, i + + # Determine Search Direction + if method == "Steepest_Descent": + direction = -grad + elif method == "Newton": + hess = hessian(x) + try: + # Solve H * d = -g for d + direction = np.linalg.solve(hess, -grad) + except np.linalg.LinAlgError: + # Fallback if Hessian is singular + direction = -grad + + # Perform Backtracking Line Search + alpha = backtracking_line_search(x, direction, grad, alpha_bar=1.0, rho=0.5, c1=1e-3) + + # Update Position + x = x + alpha * direction + + alphas.append(alpha) + + print(f" -> Did not converge within {max_iter} iterations.") + print(f" -> Final Gradient Norm: {grad_norm:.6e}") + return alphas, max_iter + +# --- 4. Main Execution and Plotting --- + +# Configuration +initial_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0])] +methods = ["Steepest_Descent", "Newton"] + +# Dictionary to hold simulation results +results = {} + +# Run simulations +for x0 in initial_points: + for method in methods: + # Construct a unique key for storage + key = f"{method}_x0_{x0}" + alphas, num_iters = run_optimization(method, x0) + results[key] = {'alphas': alphas, 'iters': num_iters, 'x0': x0} + +# Plot configuration +fig, axes = plt.subplots(2, 2, figsize=(14, 10)) +plt.subplots_adjust(hspace=0.4, wspace=0.3) + +# Mapping results to subplots +plot_mapping = { + ("Steepest_Descent", str(initial_points[0])): (0, 0), + ("Newton", str(initial_points[0])): (0, 1), + ("Steepest_Descent", str(initial_points[1])): (1, 0), + ("Newton", str(initial_points[1])): (1, 1), +} + +for key, data in results.items(): + method_name = key.split("_x0_")[0] + x0_str = str(data['x0']) + + ax_idx = plot_mapping[(method_name, x0_str)] + ax = axes[ax_idx] + + y_data = data['alphas'] + x_data = range(len(y_data)) + + # --- PLOTTING LOGIC MODIFICATION --- + # For Steepest Descent, show only first {limit} steps + if method_name == "Steepest_Descent": + limit = 30 + if len(y_data) > limit: + y_data = y_data[:limit] + x_data = range(limit) + title_suffix = f" (First {limit} Steps Shown)" + else: + title_suffix = "" + else: + title_suffix = "" + + ax.plot(x_data, y_data, marker='o', linestyle='-', markersize=4, linewidth=1.5) + + # Titles and Labels + ax.set_title(f"{method_name}\nStart: {data['x0']}\nTotal Iters: {data['iters']}{title_suffix}") + ax.set_xlabel("Iteration") + ax.set_ylabel("Step Size (Alpha)") + ax.grid(True, which='both', linestyle='--', alpha=0.7) + + # Add annotation for total iterations + ax.text(0.95, 0.95, f"Total Iters: {data['iters']}", + transform=ax.transAxes, + verticalalignment='top', horizontalalignment='right', + bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)) + +# Save and Show +#plt.suptitle("Step Size vs Iteration (Rosenbrock Function)", fontsize=16) +plt.savefig("optimization_results.png") +#plt.show()