Compare commits
12 Commits
ba2802187d
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| ee51578f99 | |||
| 621fb1feb2 | |||
| 33a3157bc8 | |||
| f9fdf4002c | |||
| dd4675235d | |||
| bd5715388b | |||
| 323f757b14 | |||
| 1ecd5a4bbe | |||
| 5acb5d6900 | |||
| ad53ec4eeb | |||
| 20965947f0 | |||
| 248720434f |
4
.gitignore
vendored
@@ -1 +1,5 @@
|
|||||||
.vscode
|
.vscode
|
||||||
|
.DS_Store
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
*Zone.Identifier
|
||||||
249
homework10/Homework10_Q1.py
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Configuration for plotting
|
||||||
|
plt.style.use('seaborn-v0_8')
|
||||||
|
plt.rcParams['figure.figsize'] = (18, 6)
|
||||||
|
|
||||||
|
def newton_method(f, df, x0, tolerance=1e-12, max_iter=20):
|
||||||
|
"""
|
||||||
|
Implements Newton's Method with overflow protection.
|
||||||
|
Returns: list of iteration history [(iter, x, error)]
|
||||||
|
"""
|
||||||
|
history = []
|
||||||
|
x = x0
|
||||||
|
|
||||||
|
for i in range(max_iter):
|
||||||
|
try:
|
||||||
|
# Check for divergence first
|
||||||
|
if abs(x) > 1e100:
|
||||||
|
break
|
||||||
|
|
||||||
|
fx = f(x)
|
||||||
|
dfx = df(x)
|
||||||
|
|
||||||
|
if abs(dfx) < 1e-15: # Avoid division by zero
|
||||||
|
break
|
||||||
|
|
||||||
|
history.append(x)
|
||||||
|
|
||||||
|
x_new = x - fx / dfx
|
||||||
|
|
||||||
|
# Check convergence
|
||||||
|
if abs(x_new - x) < tolerance:
|
||||||
|
x = x_new
|
||||||
|
history.append(x)
|
||||||
|
break
|
||||||
|
|
||||||
|
x = x_new
|
||||||
|
except OverflowError:
|
||||||
|
# Catch errors if numbers become too large
|
||||||
|
break
|
||||||
|
|
||||||
|
return history
|
||||||
|
|
||||||
|
def chord_method(f, df_x0, x0, tolerance=1e-12, max_iter=50):
|
||||||
|
"""
|
||||||
|
Implements Chord Method using fixed slope m = f'(x0) with overflow protection.
|
||||||
|
Returns: list of iteration history
|
||||||
|
"""
|
||||||
|
history = []
|
||||||
|
x = x0
|
||||||
|
m = df_x0 # Fixed slope
|
||||||
|
|
||||||
|
for i in range(max_iter):
|
||||||
|
try:
|
||||||
|
if abs(x) > 1e100:
|
||||||
|
break
|
||||||
|
|
||||||
|
fx = f(x)
|
||||||
|
history.append(x)
|
||||||
|
|
||||||
|
if abs(m) < 1e-15:
|
||||||
|
break
|
||||||
|
|
||||||
|
x_new = x - fx / m
|
||||||
|
|
||||||
|
if abs(x_new - x) < tolerance:
|
||||||
|
x = x_new
|
||||||
|
history.append(x)
|
||||||
|
break
|
||||||
|
x = x_new
|
||||||
|
except OverflowError:
|
||||||
|
break
|
||||||
|
|
||||||
|
return history
|
||||||
|
|
||||||
|
def secant_method(f, x0, x_minus_1, tolerance=1e-12, max_iter=20):
|
||||||
|
"""
|
||||||
|
Implements Secant Method with overflow protection.
|
||||||
|
Returns: list of iteration history
|
||||||
|
"""
|
||||||
|
history = []
|
||||||
|
x_curr = x0
|
||||||
|
x_prev = x_minus_1
|
||||||
|
|
||||||
|
for i in range(max_iter):
|
||||||
|
try:
|
||||||
|
if abs(x_curr) > 1e100:
|
||||||
|
break
|
||||||
|
|
||||||
|
history.append(x_curr)
|
||||||
|
|
||||||
|
fx_curr = f(x_curr)
|
||||||
|
fx_prev = f(x_prev)
|
||||||
|
|
||||||
|
if abs(fx_curr - fx_prev) < 1e-15:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Secant update
|
||||||
|
x_new = x_curr - fx_curr * (x_curr - x_prev) / (fx_curr - fx_prev)
|
||||||
|
|
||||||
|
if abs(x_new - x_curr) < tolerance:
|
||||||
|
x_curr = x_new
|
||||||
|
history.append(x_curr)
|
||||||
|
break
|
||||||
|
|
||||||
|
x_prev = x_curr
|
||||||
|
x_curr = x_new
|
||||||
|
except OverflowError:
|
||||||
|
break
|
||||||
|
|
||||||
|
return history
|
||||||
|
|
||||||
|
def solve_and_save():
|
||||||
|
# Definition of problems
|
||||||
|
problems = [
|
||||||
|
{
|
||||||
|
'id': 'a',
|
||||||
|
'title': r'$f(x) = \cos(x) - x, x_0 = 0.5$',
|
||||||
|
'func_name': 'cos(x) - x',
|
||||||
|
'f': lambda x: np.cos(x) - x,
|
||||||
|
'df': lambda x: -np.sin(x) - 1,
|
||||||
|
'x0': 0.5,
|
||||||
|
'true_root': 0.73908513321516064165531208767387340401341175890075746496568063577328465488354759 # From wolframalpha
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'id': 'b',
|
||||||
|
'title': r'$f(x) = \sin(x), x_0 = 3$',
|
||||||
|
'func_name': 'sin(x)',
|
||||||
|
'f': lambda x: np.sin(x),
|
||||||
|
'df': lambda x: np.cos(x),
|
||||||
|
'x0': 3.0,
|
||||||
|
'true_root': np.pi
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'id': 'c',
|
||||||
|
'title': r'$f(x) = x^2 + 1, x_0 = 0.5$',
|
||||||
|
'func_name': 'x^2 + 1',
|
||||||
|
'f': lambda x: x**2 + 1,
|
||||||
|
'df': lambda x: 2*x,
|
||||||
|
'x0': 0.5,
|
||||||
|
'true_root': None # No real root
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Open file for writing text results
|
||||||
|
output_filename = 'numerical_results.txt'
|
||||||
|
image_filename = 'convergence_results.png'
|
||||||
|
|
||||||
|
with open(output_filename, 'w', encoding='utf-8') as txt_file:
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(1, 3)
|
||||||
|
|
||||||
|
for idx, prob in enumerate(problems):
|
||||||
|
header_str = f"\n{'='*60}\nProblem ({prob['id']}): {prob['func_name']}, x0={prob['x0']}\n{'='*60}\n"
|
||||||
|
print(header_str) # Print to console to show progress
|
||||||
|
txt_file.write(header_str)
|
||||||
|
|
||||||
|
x0 = prob['x0']
|
||||||
|
f = prob['f']
|
||||||
|
df = prob['df']
|
||||||
|
|
||||||
|
# 1. Run Newton
|
||||||
|
hist_newton = newton_method(f, df, x0)
|
||||||
|
|
||||||
|
# 2. Run Chord (m = f'(x0))
|
||||||
|
hist_chord = chord_method(f, df(x0), x0)
|
||||||
|
|
||||||
|
# 3. Run Secant (x_-1 = 0.99 * x0)
|
||||||
|
x_minus_1 = 0.99 * x0
|
||||||
|
hist_secant = secant_method(f, x0, x_minus_1)
|
||||||
|
|
||||||
|
methods_data = {
|
||||||
|
"Newton": hist_newton,
|
||||||
|
"Chord": hist_chord,
|
||||||
|
"Secant": hist_secant
|
||||||
|
}
|
||||||
|
|
||||||
|
ax = axes[idx]
|
||||||
|
|
||||||
|
for m_name, history in methods_data.items():
|
||||||
|
iters = list(range(len(history)))
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
# Calculate errors for plotting
|
||||||
|
# Note: Handling overflow for plotting large errors in problem c
|
||||||
|
for x in history:
|
||||||
|
try:
|
||||||
|
if prob['true_root'] is not None:
|
||||||
|
val = abs(x - prob['true_root'])
|
||||||
|
else:
|
||||||
|
val = abs(f(x)) # Residual for problem c
|
||||||
|
errors.append(val)
|
||||||
|
except OverflowError:
|
||||||
|
errors.append(float('inf'))
|
||||||
|
|
||||||
|
# Write to TXT file
|
||||||
|
txt_file.write(f"\nMethod: {m_name}\n")
|
||||||
|
txt_file.write(f"{'Iter':<5} | {'Value (x_k)':<25} | {'Error/Resid':<25}\n")
|
||||||
|
txt_file.write("-" * 60 + "\n")
|
||||||
|
|
||||||
|
for i, val in enumerate(history):
|
||||||
|
# Only write full history if short, or head/tail if long
|
||||||
|
if len(history) < 20 or i < 5 or i > len(history) - 3:
|
||||||
|
if prob['true_root'] is not None:
|
||||||
|
err_val = abs(val - prob['true_root'])
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
err_val = abs(f(val))
|
||||||
|
except OverflowError:
|
||||||
|
err_val = float('inf')
|
||||||
|
|
||||||
|
txt_file.write(f"{i:<5} | {val:<25.10g} | {err_val:<25.6e}\n")
|
||||||
|
elif i == 5:
|
||||||
|
txt_file.write("...\n")
|
||||||
|
|
||||||
|
# Add to plot
|
||||||
|
if prob['true_root'] is not None:
|
||||||
|
ylabel_text = "Error |x_k - x*|"
|
||||||
|
log_scale = True
|
||||||
|
ax.plot(iters, errors, marker='o', label=m_name)
|
||||||
|
else:
|
||||||
|
ylabel_text = "Residual |f(x_k)|"
|
||||||
|
log_scale = False
|
||||||
|
# For problem C, data explodes, so we limit plotting range to avoid ruining the graph
|
||||||
|
valid_errors = [e for e in errors if e < 1e10]
|
||||||
|
valid_iters = iters[:len(valid_errors)]
|
||||||
|
ax.plot(valid_iters, valid_errors, marker='x', label=m_name)
|
||||||
|
|
||||||
|
ax.set_title(f"Problem ({prob['id']})\n{prob['title']}")
|
||||||
|
ax.set_xlabel("Iteration")
|
||||||
|
ax.set_ylabel(ylabel_text)
|
||||||
|
if log_scale:
|
||||||
|
ax.set_yscale('log')
|
||||||
|
ax.legend()
|
||||||
|
if prob['id'] == 'c':
|
||||||
|
ax.set_ylim(0, 20)
|
||||||
|
ax.set_xticks(range(0, len(iters), 1))
|
||||||
|
ax.grid(True)
|
||||||
|
|
||||||
|
print(f"\nText results saved to: {output_filename}")
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(image_filename)
|
||||||
|
print(f"Chart saved to: {image_filename}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
solve_and_save()
|
||||||
1
homework10/comment.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
For problems (a) and (b), which have real roots, all methods converged; Newton's method was the fastest (quadratic), followed by the Secant method (superlinear), with the Chord method being the slowest (linear). However, for problem (c) (x^2 + 1), which has no real roots, all methods failed: the Chord method rapidly diverged to infinity, while Secant and Newton's method oscillated.
|
||||||
BIN
homework10/convergence_results.png
Normal file
|
After Width: | Height: | Size: 128 KiB |
112
homework10/numerical_results.txt
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
|
||||||
|
============================================================
|
||||||
|
Problem (a): cos(x) - x, x0=0.5
|
||||||
|
============================================================
|
||||||
|
|
||||||
|
Method: Newton
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 0.5 | 2.390851e-01
|
||||||
|
1 | 0.7552224171 | 1.613728e-02
|
||||||
|
2 | 0.7391416661 | 5.653293e-05
|
||||||
|
3 | 0.7390851339 | 7.056461e-10
|
||||||
|
4 | 0.7390851332 | 0.000000e+00
|
||||||
|
5 | 0.7390851332 | 0.000000e+00
|
||||||
|
|
||||||
|
Method: Chord
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 0.5 | 2.390851e-01
|
||||||
|
1 | 0.7552224171 | 1.613728e-02
|
||||||
|
2 | 0.7369022576 | 2.182876e-03
|
||||||
|
3 | 0.7393704622 | 2.853290e-04
|
||||||
|
4 | 0.7390476612 | 3.747205e-05
|
||||||
|
...
|
||||||
|
13 | 0.7390851332 | 4.333200e-13
|
||||||
|
14 | 0.7390851332 | 5.695444e-14
|
||||||
|
|
||||||
|
Method: Secant
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 0.5 | 2.390851e-01
|
||||||
|
1 | 0.7556018135 | 1.651668e-02
|
||||||
|
2 | 0.738106944 | 9.781892e-04
|
||||||
|
3 | 0.7390815948 | 3.538433e-06
|
||||||
|
4 | 0.739085134 | 7.646579e-10
|
||||||
|
5 | 0.7390851332 | 5.551115e-16
|
||||||
|
6 | 0.7390851332 | 0.000000e+00
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
Problem (b): sin(x), x0=3.0
|
||||||
|
============================================================
|
||||||
|
|
||||||
|
Method: Newton
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 3 | 1.415927e-01
|
||||||
|
1 | 3.142546543 | 9.538895e-04
|
||||||
|
2 | 3.141592653 | 2.893161e-10
|
||||||
|
3 | 3.141592654 | 0.000000e+00
|
||||||
|
4 | 3.141592654 | 0.000000e+00
|
||||||
|
|
||||||
|
Method: Chord
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 3 | 1.415927e-01
|
||||||
|
1 | 3.142546543 | 9.538895e-04
|
||||||
|
2 | 3.141583011 | 9.642404e-06
|
||||||
|
3 | 3.141592751 | 9.747184e-08
|
||||||
|
4 | 3.141592653 | 9.853101e-10
|
||||||
|
5 | 3.141592654 | 9.960477e-12
|
||||||
|
6 | 3.141592654 | 1.003642e-13
|
||||||
|
7 | 3.141592654 | 1.332268e-15
|
||||||
|
|
||||||
|
Method: Secant
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 3 | 1.415927e-01
|
||||||
|
1 | 3.142873442 | 1.280788e-03
|
||||||
|
2 | 3.141588403 | 4.250744e-06
|
||||||
|
3 | 3.141592654 | 1.158629e-12
|
||||||
|
4 | 3.141592654 | 0.000000e+00
|
||||||
|
5 | 3.141592654 | 0.000000e+00
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
Problem (c): x^2 + 1, x0=0.5
|
||||||
|
============================================================
|
||||||
|
|
||||||
|
Method: Newton
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 0.5 | 1.250000e+00
|
||||||
|
1 | -0.75 | 1.562500e+00
|
||||||
|
2 | 0.2916666667 | 1.085069e+00
|
||||||
|
3 | -1.568452381 | 3.460043e+00
|
||||||
|
4 | -0.4654406117 | 1.216635e+00
|
||||||
|
...
|
||||||
|
18 | -1.820817243 | 4.315375e+00
|
||||||
|
19 | -0.6358066524 | 1.404250e+00
|
||||||
|
|
||||||
|
Method: Chord
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 0.5 | 1.250000e+00
|
||||||
|
1 | -0.75 | 1.562500e+00
|
||||||
|
2 | -2.3125 | 6.347656e+00
|
||||||
|
3 | -8.66015625 | 7.599831e+01
|
||||||
|
4 | -84.65846252 | 7.168055e+03
|
||||||
|
...
|
||||||
|
8 | -7.660265913e+30 | 5.867967e+61
|
||||||
|
9 | -5.867967385e+61 | 3.443304e+123
|
||||||
|
|
||||||
|
Method: Secant
|
||||||
|
Iter | Value (x_k) | Error/Resid
|
||||||
|
------------------------------------------------------------
|
||||||
|
0 | 0.5 | 1.250000e+00
|
||||||
|
1 | -0.756281407 | 1.571962e+00
|
||||||
|
2 | 5.37745098 | 2.991698e+01
|
||||||
|
3 | -1.096446714 | 2.202195e+00
|
||||||
|
4 | -1.610857647 | 3.594862e+00
|
||||||
|
...
|
||||||
|
18 | 0.3478146096 | 1.120975e+00
|
||||||
|
19 | -2.615940634 | 7.843145e+00
|
||||||
159
homework11/Homework11_Q5.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# --- 1. Define Function, Gradient, and Hessian ---
|
||||||
|
# Objective function: Rosenbrock function
|
||||||
|
def objective_function(x):
|
||||||
|
x1, x2 = x[0], x[1]
|
||||||
|
return 100 * (x2 - x1**2)**2 + (1 - x1)**2
|
||||||
|
|
||||||
|
# Gradient vector calculation
|
||||||
|
def gradient(x):
|
||||||
|
x1, x2 = x[0], x[1]
|
||||||
|
df_dx1 = -400 * x1 * (x2 - x1**2) - 2 * (1 - x1)
|
||||||
|
df_dx2 = 200 * (x2 - x1**2)
|
||||||
|
return np.array([df_dx1, df_dx2])
|
||||||
|
|
||||||
|
# Hessian matrix calculation
|
||||||
|
def hessian(x):
|
||||||
|
x1, x2 = x[0], x[1]
|
||||||
|
d2f_dx1dx1 = -400 * (x2 - 3 * x1**2) + 2
|
||||||
|
d2f_dx1dx2 = -400 * x1
|
||||||
|
d2f_dx2dx1 = -400 * x1
|
||||||
|
d2f_dx2dx2 = 200
|
||||||
|
return np.array([[d2f_dx1dx1, d2f_dx1dx2], [d2f_dx2dx1, d2f_dx2dx2]])
|
||||||
|
|
||||||
|
# --- 2. Backtracking Line Search ---
|
||||||
|
def backtracking_line_search(x, p, grad, alpha_bar=1.0, rho=0.5, c1=1e-3):
|
||||||
|
"""
|
||||||
|
Finds a step size alpha that satisfies the 1st Wolfe condition (Armijo condition).
|
||||||
|
Condition: f(x + alpha*p) <= f(x) + c1 * alpha * grad^T * p
|
||||||
|
"""
|
||||||
|
alpha = alpha_bar
|
||||||
|
f_val = objective_function(x)
|
||||||
|
grad_dot_p = np.dot(grad, p)
|
||||||
|
|
||||||
|
# Loop until condition is met
|
||||||
|
while objective_function(x + alpha * p) > f_val + c1 * alpha * grad_dot_p:
|
||||||
|
alpha *= rho
|
||||||
|
# Safety break to prevent infinite loops if alpha becomes too small
|
||||||
|
if alpha < 1e-16:
|
||||||
|
break
|
||||||
|
return alpha
|
||||||
|
|
||||||
|
# --- 3. Optimization Algorithms ---
|
||||||
|
def run_optimization(method, x0, tol_ratio=1e-12, max_iter=20000):
|
||||||
|
"""
|
||||||
|
Runs the specified optimization method.
|
||||||
|
Returns: list of alphas used, total iterations
|
||||||
|
"""
|
||||||
|
x = x0.copy()
|
||||||
|
grad_0_norm = np.linalg.norm(gradient(x0))
|
||||||
|
|
||||||
|
alphas = []
|
||||||
|
|
||||||
|
print(f"[{method}] Starting from {x0}...")
|
||||||
|
|
||||||
|
for i in range(max_iter):
|
||||||
|
grad = gradient(x)
|
||||||
|
grad_norm = np.linalg.norm(grad)
|
||||||
|
|
||||||
|
# Convergence Check: ||grad f(xk)|| < 10^-12 * ||grad f(x0)||
|
||||||
|
if grad_norm < tol_ratio * grad_0_norm:
|
||||||
|
print(f" -> Converged at iteration {i}. Final Loss: {objective_function(x):.6e}")
|
||||||
|
return alphas, i
|
||||||
|
|
||||||
|
# Determine Search Direction
|
||||||
|
if method == "Steepest_Descent":
|
||||||
|
direction = -grad
|
||||||
|
elif method == "Newton":
|
||||||
|
hess = hessian(x)
|
||||||
|
try:
|
||||||
|
# Solve H * d = -g for d
|
||||||
|
direction = np.linalg.solve(hess, -grad)
|
||||||
|
except np.linalg.LinAlgError:
|
||||||
|
# Fallback if Hessian is singular
|
||||||
|
direction = -grad
|
||||||
|
|
||||||
|
# Perform Backtracking Line Search
|
||||||
|
alpha = backtracking_line_search(x, direction, grad, alpha_bar=1.0, rho=0.5, c1=1e-3)
|
||||||
|
|
||||||
|
# Update Position
|
||||||
|
x = x + alpha * direction
|
||||||
|
|
||||||
|
alphas.append(alpha)
|
||||||
|
|
||||||
|
print(f" -> Did not converge within {max_iter} iterations.")
|
||||||
|
print(f" -> Final Gradient Norm: {grad_norm:.6e}")
|
||||||
|
return alphas, max_iter
|
||||||
|
|
||||||
|
# --- 4. Main Execution and Plotting ---
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
initial_points = [np.array([1.2, 1.2]), np.array([-1.2, 1.0])]
|
||||||
|
methods = ["Steepest_Descent", "Newton"]
|
||||||
|
|
||||||
|
# Dictionary to hold simulation results
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
# Run simulations
|
||||||
|
for x0 in initial_points:
|
||||||
|
for method in methods:
|
||||||
|
# Construct a unique key for storage
|
||||||
|
key = f"{method}_x0_{x0}"
|
||||||
|
alphas, num_iters = run_optimization(method, x0)
|
||||||
|
results[key] = {'alphas': alphas, 'iters': num_iters, 'x0': x0}
|
||||||
|
|
||||||
|
# Plot configuration
|
||||||
|
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
||||||
|
plt.subplots_adjust(hspace=0.4, wspace=0.3)
|
||||||
|
|
||||||
|
# Mapping results to subplots
|
||||||
|
plot_mapping = {
|
||||||
|
("Steepest_Descent", str(initial_points[0])): (0, 0),
|
||||||
|
("Newton", str(initial_points[0])): (0, 1),
|
||||||
|
("Steepest_Descent", str(initial_points[1])): (1, 0),
|
||||||
|
("Newton", str(initial_points[1])): (1, 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, data in results.items():
|
||||||
|
method_name = key.split("_x0_")[0]
|
||||||
|
x0_str = str(data['x0'])
|
||||||
|
|
||||||
|
ax_idx = plot_mapping[(method_name, x0_str)]
|
||||||
|
ax = axes[ax_idx]
|
||||||
|
|
||||||
|
y_data = data['alphas']
|
||||||
|
x_data = range(len(y_data))
|
||||||
|
|
||||||
|
# --- PLOTTING LOGIC MODIFICATION ---
|
||||||
|
# For Steepest Descent, show only first {limit} steps
|
||||||
|
if method_name == "Steepest_Descent":
|
||||||
|
limit = 30
|
||||||
|
if len(y_data) > limit:
|
||||||
|
y_data = y_data[:limit]
|
||||||
|
x_data = range(limit)
|
||||||
|
title_suffix = f" (First {limit} Steps Shown)"
|
||||||
|
else:
|
||||||
|
title_suffix = ""
|
||||||
|
else:
|
||||||
|
title_suffix = ""
|
||||||
|
|
||||||
|
ax.plot(x_data, y_data, marker='o', linestyle='-', markersize=4, linewidth=1.5)
|
||||||
|
|
||||||
|
# Titles and Labels
|
||||||
|
ax.set_title(f"{method_name}\nStart: {data['x0']}\nTotal Iters: {data['iters']}{title_suffix}")
|
||||||
|
ax.set_xlabel("Iteration")
|
||||||
|
ax.set_ylabel("Step Size (Alpha)")
|
||||||
|
ax.grid(True, which='both', linestyle='--', alpha=0.7)
|
||||||
|
|
||||||
|
# Add annotation for total iterations
|
||||||
|
ax.text(0.95, 0.95, f"Total Iters: {data['iters']}",
|
||||||
|
transform=ax.transAxes,
|
||||||
|
verticalalignment='top', horizontalalignment='right',
|
||||||
|
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
|
||||||
|
|
||||||
|
# Save and Show
|
||||||
|
#plt.suptitle("Step Size vs Iteration (Rosenbrock Function)", fontsize=16)
|
||||||
|
plt.savefig("optimization_results.png")
|
||||||
|
#plt.show()
|
||||||
BIN
homework11/optimization_results.png
Normal file
|
After Width: | Height: | Size: 142 KiB |
81
homework8/Homework8_Q5.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# --- FUNCTION DEFINITION CORRECTED HERE ---
|
||||||
|
def g(t):
|
||||||
|
"""
|
||||||
|
Calculates g(t) = t^4 - 3t^3 + 2t^2 in a vectorized way.
|
||||||
|
It handles both single float inputs and NumPy array inputs.
|
||||||
|
If an element's absolute value is too large, it returns infinity
|
||||||
|
to prevent overflow and handle plotting of diverging paths.
|
||||||
|
"""
|
||||||
|
# Use np.where for a vectorized if-else statement.
|
||||||
|
# This correctly handles both scalar and array inputs.
|
||||||
|
return np.where(np.abs(t) > 1e10, # Condition
|
||||||
|
np.inf, # Value if condition is True
|
||||||
|
t**4 - 3*t**3 + 2*t**2) # Value if condition is False
|
||||||
|
|
||||||
|
# Define the gradient (derivative) of g(t)
|
||||||
|
def grad_g(t):
|
||||||
|
# This function is only called with scalars in our loop,
|
||||||
|
# but it's good practice for it to be vectorization-safe as well.
|
||||||
|
# Standard arithmetic operations are already vectorized in NumPy.
|
||||||
|
return 4*t**3 - 9*t**2 + 4*t
|
||||||
|
|
||||||
|
# Implement the gradient descent algorithm
|
||||||
|
def gradient_descent(x0, alpha, iterations=100):
|
||||||
|
history = [x0]
|
||||||
|
x = x0
|
||||||
|
for _ in range(iterations):
|
||||||
|
grad = grad_g(x)
|
||||||
|
|
||||||
|
if np.isinf(x) or np.isnan(x) or np.abs(x) > 1e9:
|
||||||
|
print(f"Divergence detected for alpha = {alpha}. Stopping early.")
|
||||||
|
break
|
||||||
|
|
||||||
|
x = x - alpha * grad
|
||||||
|
history.append(x)
|
||||||
|
if not np.isinf(x) and not np.isnan(x) and np.abs(x) <= 1e9:
|
||||||
|
print(f"Alpha: {alpha:<.2f}, Current x: {x:<.2f}, g(x): {g(x):<.2f}")
|
||||||
|
return history
|
||||||
|
|
||||||
|
# --- Main part of the experiment ---
|
||||||
|
|
||||||
|
# Set an initial point
|
||||||
|
initial_xs = [-1.0, 0.5, 0.65, 2.5]
|
||||||
|
|
||||||
|
# Define a list of different alpha values to test
|
||||||
|
alphas_to_test = [0.05, 0.15, 0.25, 0.4, 0.5, 0.6]
|
||||||
|
|
||||||
|
# Run the experiment for each alpha value and plot the results
|
||||||
|
for initial_x in initial_xs:
|
||||||
|
print(f"Running gradient descent from initial x = {initial_x}")
|
||||||
|
# Create a plot
|
||||||
|
plt.figure(figsize=(14, 8))
|
||||||
|
|
||||||
|
# Plot the function g(t) for context
|
||||||
|
t_plot = np.linspace(-1.5, 3.5, 400)
|
||||||
|
plt.plot(t_plot, g(t_plot), 'k-', label='g(t) = t^4 - 3t^3 + 2t^2', linewidth=2)
|
||||||
|
for alpha in alphas_to_test:
|
||||||
|
history = gradient_descent(initial_x, alpha)
|
||||||
|
history_np = np.array(history)
|
||||||
|
plt.plot(history_np, g(history_np), 'o-', label=f'alpha = {alpha}', markersize=4, alpha=0.8)
|
||||||
|
|
||||||
|
# Add stationary points
|
||||||
|
minima1 = 0.0
|
||||||
|
minima2 = (9 + np.sqrt(17)) / 8
|
||||||
|
maxima = (9 - np.sqrt(17)) / 8
|
||||||
|
plt.plot(minima1, g(minima1), 'g*', markersize=15, label=f'Local Minimum at t={minima1:.2f}')
|
||||||
|
plt.plot(minima2, g(minima2), 'g*', markersize=15, label=f'Local Minimum at t≈{minima2:.2f}')
|
||||||
|
plt.plot(maxima, g(maxima), 'rX', markersize=10, label=f'Local Maximum at t≈{maxima:.2f}')
|
||||||
|
|
||||||
|
# Final plot formatting
|
||||||
|
plt.title('Gradient Descent Convergence for Different Step Sizes (alpha, initial x={})'.format(initial_x))
|
||||||
|
plt.xlabel('t')
|
||||||
|
plt.ylabel('g(t)')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
plt.ylim(-1, 5)
|
||||||
|
plt.xlim(-1.5, 3.5)
|
||||||
|
plt.savefig(f'Homework8_Q5_Gradient_Descent_Convergence_x={initial_x}.png')
|
||||||
|
|
||||||
28
homework8/Homework8_Q5.txt
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
Running gradient descent from initial x = -1.0
|
||||||
|
Alpha: 0.05, Current x: -0.00, g(x): 0.00
|
||||||
|
Alpha: 0.15, Current x: 1.64, g(x): -0.62
|
||||||
|
Divergence detected for alpha = 0.25. Stopping early.
|
||||||
|
Divergence detected for alpha = 0.4. Stopping early.
|
||||||
|
Divergence detected for alpha = 0.5. Stopping early.
|
||||||
|
Divergence detected for alpha = 0.6. Stopping early.
|
||||||
|
Running gradient descent from initial x = 0.5
|
||||||
|
Alpha: 0.05, Current x: 0.00, g(x): 0.00
|
||||||
|
Alpha: 0.15, Current x: 0.00, g(x): 0.00
|
||||||
|
Alpha: 0.25, Current x: 0.00, g(x): 0.00
|
||||||
|
Alpha: 0.40, Current x: -0.00, g(x): 0.00
|
||||||
|
Alpha: 0.50, Current x: 0.02, g(x): 0.00
|
||||||
|
Alpha: 0.60, Current x: 0.18, g(x): 0.05
|
||||||
|
Running gradient descent from initial x = 0.65
|
||||||
|
Alpha: 0.05, Current x: 1.64, g(x): -0.62
|
||||||
|
Alpha: 0.15, Current x: 1.64, g(x): -0.62
|
||||||
|
Alpha: 0.25, Current x: 1.64, g(x): -0.62
|
||||||
|
Alpha: 0.40, Current x: 1.51, g(x): -0.57
|
||||||
|
Alpha: 0.50, Current x: 0.01, g(x): 0.00
|
||||||
|
Divergence detected for alpha = 0.6. Stopping early.
|
||||||
|
Running gradient descent from initial x = 2.5
|
||||||
|
Alpha: 0.05, Current x: 1.64, g(x): -0.62
|
||||||
|
Alpha: 0.15, Current x: 0.00, g(x): 0.00
|
||||||
|
Divergence detected for alpha = 0.25. Stopping early.
|
||||||
|
Divergence detected for alpha = 0.4. Stopping early.
|
||||||
|
Divergence detected for alpha = 0.5. Stopping early.
|
||||||
|
Divergence detected for alpha = 0.6. Stopping early.
|
||||||
BIN
homework8/Homework8_Q5_Gradient_Descent_Convergence_x=-1.0.png
Normal file
|
After Width: | Height: | Size: 82 KiB |
BIN
homework8/Homework8_Q5_Gradient_Descent_Convergence_x=0.5.png
Normal file
|
After Width: | Height: | Size: 66 KiB |
BIN
homework8/Homework8_Q5_Gradient_Descent_Convergence_x=0.65.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
homework8/Homework8_Q5_Gradient_Descent_Convergence_x=2.5.png
Normal file
|
After Width: | Height: | Size: 82 KiB |
@@ -16,15 +16,15 @@ As $\epsilon \rightarrow 0$, the normal equation method numerically unstable. A
|
|||||||
Results:
|
Results:
|
||||||
| $\epsilon$ | Normal Equation Method `proj(A, b)` | SVD Method `proj_SVD(A, b)` | Difference (Normal Eq - SVD) |
|
| $\epsilon$ | Normal Equation Method `proj(A, b)` | SVD Method `proj_SVD(A, b)` | Difference (Normal Eq - SVD) |
|
||||||
|:---:|:---|:---|:---|
|
|:---:|:---|:---|:---|
|
||||||
| **1.0** | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[ 1.78e-15 -2.22e-15 -8.88e-16 8.88e-16 0.00e+00]` |
|
| **1.0** | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[ 2.22e-16 1.55e-15 -4.44e-16 2.22e-16 -8.88e-16]` |
|
||||||
| **0.1** | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[ 7.28e-14 -4.44e-16 -2.66e-14 -1.62e-14 -5.28e-14]` |
|
| **0.1** | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[ 8.08e-14 5.87e-14 -8.88e-16 4.82e-14 -1.38e-14]` |
|
||||||
| **0.01** | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[-5.45e-12 -7.28e-12 -3.45e-13 -4.24e-12 -4.86e-12]` |
|
| **0.01** | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[-4.77e-12 -1.84e-14 5.54e-13 -4.00e-12 1.50e-12]` |
|
||||||
| **1e-4** | `[1.85714297 1.00000012 3.14285716 1.28571442 3.85714302]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[1.11e-07 1.19e-07 1.92e-08 1.36e-07 1.67e-07]` |
|
| **1e-4** | `[1.85714282 1. 3.14285716 1.28571427 3.85714291]` | `[1.85714286 1. 3.14285714 1.28571429 3.85714286]` | `[-3.60e-08 9.11e-13 1.94e-08 -1.20e-08 4.80e-08]` |
|
||||||
| **1e-8** | **Error:** `LinAlgError: Singular matrix` | `[1.85714286 1. 3.14285714 1.28571428 3.85714286]` | `Could not compute difference due to previous error.` |
|
| **1e-8** | `[-1.87500007 0.99999993 -3.12499997 -2.62500007 3.49999996]` | `[1.85714286 1. 3.14285714 1.28571427 3.85714286]` | `[-3.73e+00 -7.45e-08 -6.27e+00 -3.91e+00 -3.57e-01]` |
|
||||||
| **1e-16**| **Error:** `ValueError: Matrix A must be full rank` | `[1.81820151 1. 3.18179849 2.29149804 2.89030045]` | `Could not compute difference due to previous error.` |
|
| **1e-16**| **Error:** `ValueError: Matrix A must be full rank` | `[3.4 1. 1.6 1.8 1.8]` | `Could not compute difference due to previous error.` |
|
||||||
| **1e-32**| **Error:** `ValueError: Matrix A must be full rank` | `[2. 1. 3. 2.5 2.5]` | `Could not compute difference due to previous error.` |
|
| **1e-32**| **Error:** `ValueError: Matrix A must be full rank` | `[3.4 1. 1.6 1.8 1.8]` | `Could not compute difference due to previous error.` |
|
||||||
|
|
||||||
Numerical experiments show that as $\epsilon$ becomes small, the difference between the two methods increases. When $\epsilon$ becomes very small (e.g., 1e-8), the normal equation method fails while the SVD method continues to provide a stable solution.
|
Numerical experiments show that as $\epsilon$ becomes small, the difference between the two methods increases. When $\epsilon$ becomes very small (e.g., 1e-8), the normal equation method can't give a valid result and fails when $\epsilon$ continues to decrease (e.g., 1e-16), while the SVD method continues to provide a stable solution.
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ Projection of b onto the column space of A(1): (Using proj function)
|
|||||||
Projection of b onto the column space of A(1): (Using proj_SVD function)
|
Projection of b onto the column space of A(1): (Using proj_SVD function)
|
||||||
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
[ 1.77635684e-15 -2.22044605e-15 -8.88178420e-16 8.88178420e-16
|
[ 2.22044605e-16 1.55431223e-15 -4.44089210e-16 2.22044605e-16
|
||||||
0.00000000e+00]
|
-8.88178420e-16]
|
||||||
|
|
||||||
Question 1(b):
|
Question 1(b):
|
||||||
|
|
||||||
@@ -15,8 +15,8 @@ Projection of b onto the column space of A(1.0):
|
|||||||
Projection of b onto the column space of A(1.0) using SVD:
|
Projection of b onto the column space of A(1.0) using SVD:
|
||||||
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
[ 1.77635684e-15 -2.22044605e-15 -8.88178420e-16 8.88178420e-16
|
[ 2.22044605e-16 1.55431223e-15 -4.44089210e-16 2.22044605e-16
|
||||||
0.00000000e+00]
|
-8.88178420e-16]
|
||||||
|
|
||||||
For ε = 0.1:
|
For ε = 0.1:
|
||||||
Projection of b onto the column space of A(0.1):
|
Projection of b onto the column space of A(0.1):
|
||||||
@@ -24,8 +24,8 @@ Projection of b onto the column space of A(0.1):
|
|||||||
Projection of b onto the column space of A(0.1) using SVD:
|
Projection of b onto the column space of A(0.1) using SVD:
|
||||||
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
[ 7.28306304e-14 -4.44089210e-16 -2.66453526e-14 -1.62092562e-14
|
[ 8.08242362e-14 5.87307980e-14 -8.88178420e-16 4.81836793e-14
|
||||||
-5.28466160e-14]
|
-1.37667655e-14]
|
||||||
|
|
||||||
For ε = 0.01:
|
For ε = 0.01:
|
||||||
Projection of b onto the column space of A(0.01):
|
Projection of b onto the column space of A(0.01):
|
||||||
@@ -33,35 +33,37 @@ Projection of b onto the column space of A(0.01):
|
|||||||
Projection of b onto the column space of A(0.01) using SVD:
|
Projection of b onto the column space of A(0.01) using SVD:
|
||||||
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
[-5.45297141e-12 -7.28239691e-12 -3.44613227e-13 -4.24371649e-12
|
[-4.76907402e-12 -1.84297022e-14 5.53779245e-13 -4.00324218e-12
|
||||||
-4.86499729e-12]
|
1.49613655e-12]
|
||||||
|
|
||||||
For ε = 0.0001:
|
For ε = 0.0001:
|
||||||
Projection of b onto the column space of A(0.0001):
|
Projection of b onto the column space of A(0.0001):
|
||||||
[1.85714297 1.00000012 3.14285716 1.28571442 3.85714302]
|
[1.85714282 1. 3.14285716 1.28571427 3.85714291]
|
||||||
Projection of b onto the column space of A(0.0001) using SVD:
|
Projection of b onto the column space of A(0.0001) using SVD:
|
||||||
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
[1.85714286 1. 3.14285714 1.28571429 3.85714286]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
[1.11406275e-07 1.19209290e-07 1.91642426e-08 1.35516231e-07
|
[-3.59740875e-08 9.10937992e-13 1.94238092e-08 -1.19938544e-08
|
||||||
1.67459071e-07]
|
4.79721098e-08]
|
||||||
|
|
||||||
For ε = 1e-08:
|
For ε = 1e-08:
|
||||||
LinAlgError for eps=1e-08: Singular matrix
|
Projection of b onto the column space of A(1e-08):
|
||||||
|
[-1.87500007 0.99999993 -3.12499997 -2.62500007 3.49999996]
|
||||||
Projection of b onto the column space of A(1e-08) using SVD:
|
Projection of b onto the column space of A(1e-08) using SVD:
|
||||||
[1.85714286 1. 3.14285714 1.28571428 3.85714286]
|
[1.85714286 1. 3.14285714 1.28571427 3.85714286]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
Could not compute difference due to previous error.
|
[-3.73214294e+00 -7.45058057e-08 -6.26785711e+00 -3.91071435e+00
|
||||||
|
-3.57142909e-01]
|
||||||
|
|
||||||
For ε = 1e-16:
|
For ε = 1e-16:
|
||||||
ValueError for eps=1e-16: Matrix A must be full rank.
|
ValueError for eps=1e-16: Matrix A must be full rank.
|
||||||
Projection of b onto the column space of A(1e-16) using SVD:
|
Projection of b onto the column space of A(1e-16) using SVD:
|
||||||
[1.81820151 1. 3.18179849 2.29149804 2.89030045]
|
[3.4 1. 1.6 1.8 1.8]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
Could not compute difference due to previous error.
|
Could not compute difference due to previous error.
|
||||||
|
|
||||||
For ε = 1e-32:
|
For ε = 1e-32:
|
||||||
ValueError for eps=1e-32: Matrix A must be full rank.
|
ValueError for eps=1e-32: Matrix A must be full rank.
|
||||||
Projection of b onto the column space of A(1e-32) using SVD:
|
Projection of b onto the column space of A(1e-32) using SVD:
|
||||||
[2. 1. 3. 2.5 2.5]
|
[3.4 1. 1.6 1.8 1.8]
|
||||||
Difference between the two methods:
|
Difference between the two methods:
|
||||||
Could not compute difference due to previous error.
|
Could not compute difference due to previous error.
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 71 KiB |
BIN
project1/Project1_A3d_least_squares_approximation.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 38 KiB |
@@ -38,9 +38,13 @@ def proj_SVD(A:np.ndarray, b:np.ndarray) -> np.ndarray:
|
|||||||
= U S V^* (V S^2 V^*)^(-1) V S U^* b
|
= U S V^* (V S^2 V^*)^(-1) V S U^* b
|
||||||
= U S V^* V S^(-2) V^* V S U^* b
|
= U S V^* V S^(-2) V^* V S U^* b
|
||||||
= U U^* b
|
= U U^* b
|
||||||
|
If A = U S V^*, then the projection onto the column space of A is:
|
||||||
|
proj_A(b) = U_r U_r^* b
|
||||||
|
where U_r are the left singular vectors corresponding to nonzero singular values.
|
||||||
"""
|
"""
|
||||||
|
U_r = U[:, :np.linalg.matrix_rank(A)] # Take only the relevant columns
|
||||||
# Compute the projection using the SVD components
|
# Compute the projection using the SVD components
|
||||||
projection = U @ U.conj().T @ b
|
projection = U_r @ U_r.conj().T @ b
|
||||||
return projection
|
return projection
|
||||||
|
|
||||||
def build_A(eps: float) -> np.ndarray:
|
def build_A(eps: float) -> np.ndarray:
|
||||||
|
|||||||
@@ -73,18 +73,18 @@ def f1(t: float) -> float:
|
|||||||
"""
|
"""
|
||||||
return 1.0 / (1.0 + t**2)
|
return 1.0 / (1.0 + t**2)
|
||||||
|
|
||||||
def build_A(N: int, n: int, f: callable) -> tuple[np.ndarray, np.ndarray]:
|
def build_A(N: int, n: int, f: callable, f_range: tuple) -> tuple[np.ndarray, np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Build the matrix A for the least squares problem.
|
Build the matrix A for the least squares problem.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
n + 1 (int): The number of columns in the matrix A.
|
n + 1 (int): The number of columns in the matrix A.
|
||||||
|
f_range (tuple): The range of the function f.
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: The constructed matrix A.
|
np.ndarray: The constructed matrix A.
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
{t_j}j=0->N, t_0=-5, t_N=5, t_j divides [-5, 5] equally
|
{t_j}j=0->N, t_0=f_range[0], t_N=f_range[1], t_j divides [f_range[0], f_range[1]] equally
|
||||||
A = [[1, t_0, t_0^2, t_0^3, ..., t_0^n],
|
A = [[1, t_0, t_0^2, t_0^3, ..., t_0^n],
|
||||||
[1, t_1, t_1^2, t_1^3, ..., t_1^n],
|
[1, t_1, t_1^2, t_1^3, ..., t_1^n],
|
||||||
...
|
...
|
||||||
@@ -95,7 +95,7 @@ def build_A(N: int, n: int, f: callable) -> tuple[np.ndarray, np.ndarray]:
|
|||||||
Ax = b, x is the coeffs of the polynomial
|
Ax = b, x is the coeffs of the polynomial
|
||||||
"""
|
"""
|
||||||
A = np.zeros((N + 1, n + 1), dtype=float)
|
A = np.zeros((N + 1, n + 1), dtype=float)
|
||||||
t = np.linspace(-5, 5, N + 1)
|
t = np.linspace(f_range[0], f_range[1], N + 1)
|
||||||
for i in range(N + 1):
|
for i in range(N + 1):
|
||||||
A[i, :] = [t[i]**j for j in range(n + 1)]
|
A[i, :] = [t[i]**j for j in range(n + 1)]
|
||||||
b = np.array([f(t_i) for t_i in t])
|
b = np.array([f(t_i) for t_i in t])
|
||||||
@@ -113,7 +113,7 @@ def question_3c():
|
|||||||
N = Ns[i]
|
N = Ns[i]
|
||||||
n = ns[i]
|
n = ns[i]
|
||||||
A = np.zeros((N + 1, n + 1), dtype=float)
|
A = np.zeros((N + 1, n + 1), dtype=float)
|
||||||
A, b = build_A(N, n, f1)
|
A, b = build_A(N, n, f1, (-5, 5))
|
||||||
x = householder_lstsq(A, b)
|
x = householder_lstsq(A, b)
|
||||||
p_t = sum([x[j] * t**j for j in range(n + 1)])
|
p_t = sum([x[j] * t**j for j in range(n + 1)])
|
||||||
plt.subplot(1, 3, i + 1)
|
plt.subplot(1, 3, i + 1)
|
||||||
@@ -135,6 +135,32 @@ def f2(n:int, t: float) -> float:
|
|||||||
"""
|
"""
|
||||||
return sum([t**i for i in range(n + 1)])
|
return sum([t**i for i in range(n + 1)])
|
||||||
|
|
||||||
|
def question_3d():
|
||||||
|
Ns = [25, 50, 100]
|
||||||
|
ns = Ns
|
||||||
|
t = np.linspace(-5, 5, 1000)
|
||||||
|
f_t = f1(t)
|
||||||
|
plt.figure(figsize=(15, 10))
|
||||||
|
for i in range(len(Ns)):
|
||||||
|
N = Ns[i]
|
||||||
|
n = ns[i]
|
||||||
|
A = np.zeros((N + 1, n + 1), dtype=float)
|
||||||
|
A, b = build_A(N, n, f1, (-5, 5))
|
||||||
|
x = householder_lstsq(A, b)
|
||||||
|
p_t = sum([x[j] * t**j for j in range(n + 1)])
|
||||||
|
plt.subplot(1, 3, i + 1)
|
||||||
|
plt.plot(t, f_t, label='f(t)', color='blue')
|
||||||
|
plt.plot(t, p_t, label='p(t)', color='red', linestyle='--')
|
||||||
|
plt.title(f'N={N}, n={n}')
|
||||||
|
plt.xlabel('t')
|
||||||
|
plt.ylabel('y')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True, alpha=0.3)
|
||||||
|
plt.suptitle('Least Squares Polynomial Approximation using Householder QR')
|
||||||
|
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
|
||||||
|
plt.savefig('Project1_A3d_least_squares_approximation.png')
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
def question_3e():
|
def question_3e():
|
||||||
# n = 4, 6, 8, 10, ..., 20
|
# n = 4, 6, 8, 10, ..., 20
|
||||||
ns = list(range(4, 21, 2))
|
ns = list(range(4, 21, 2))
|
||||||
@@ -143,7 +169,7 @@ def question_3e():
|
|||||||
losses = []
|
losses = []
|
||||||
for n in ns:
|
for n in ns:
|
||||||
N = 2 * n
|
N = 2 * n
|
||||||
A, b = build_A(N, n, lambda t: f2(n, t))
|
A, b = build_A(N, n, lambda t: f2(n, t), (0, 1))
|
||||||
x = householder_lstsq(A, b)
|
x = householder_lstsq(A, b)
|
||||||
true_coeffs = np.array([1.0 for _ in range(n + 1)])
|
true_coeffs = np.array([1.0 for _ in range(n + 1)])
|
||||||
loss = np.linalg.norm(x - true_coeffs)
|
loss = np.linalg.norm(x - true_coeffs)
|
||||||
@@ -161,6 +187,7 @@ def question_3e():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
question_3c()
|
question_3c()
|
||||||
|
question_3d()
|
||||||
question_3e()
|
question_3e()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ conda activate math6601_env
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
### Question 1
|
### Run programs
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python Project1_Q1.py > Project1_A1.txt
|
python Project1_Q1.py > Project1_A1.txt
|
||||||
|
|||||||
162
project1/main.tex
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
\documentclass{article}
|
||||||
|
|
||||||
|
% Language setting
|
||||||
|
\usepackage[english]{babel}
|
||||||
|
|
||||||
|
% Set page size and margins
|
||||||
|
\usepackage[letterpaper,top=2cm,bottom=2cm,left=3cm,right=3cm,marginparwidth=1.75cm]{geometry}
|
||||||
|
|
||||||
|
% Useful packages
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{amsfonts}
|
||||||
|
\usepackage[colorlinks=true, allcolors=blue]{hyperref}
|
||||||
|
\usepackage{array} % For better table column definitions
|
||||||
|
|
||||||
|
\title{MATH 6601 - Project 1 Report}
|
||||||
|
\author{Zhe Yuan (yuan.1435)}
|
||||||
|
\date{Sep 2025}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
\section{Projections}
|
||||||
|
|
||||||
|
\subsection*{(a)}
|
||||||
|
The projection of vector $b$ onto the column space of a full-rank matrix $A$ is given by $p=A(A^*A)^{-1}A^*b$. A function \texttt{proj(A, b)} was implemented based on this formula (see \texttt{Project1\_Q1.py}). For $\epsilon = 1$, the projection is:
|
||||||
|
\begin{verbatim}
|
||||||
|
p = [1.85714286, 1.0, 3.14285714, 1.28571429, 3.85714286]
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
\subsection*{(b)}
|
||||||
|
As $\epsilon \rightarrow 0$, the normal equation method becomes numerically unstable. A more robust SVD-based method, \texttt{proj\_SVD(A, b)}, was implemented to handle this (see \texttt{Project1\_Q1.py}). The results are summarized in Table~\ref{tab:proj_results}.
|
||||||
|
|
||||||
|
\begin{table}[h!]
|
||||||
|
\centering
|
||||||
|
\small % Use a smaller font for the table
|
||||||
|
\begin{tabular}{|c|>{\ttfamily}p{3cm}|>{\ttfamily}p{3cm}|p{3.5cm}|}
|
||||||
|
\hline
|
||||||
|
\textbf{$\epsilon$} & \textbf{Normal Equation Method \texttt{proj(A,b)}} & \textbf{SVD Method \texttt{proj\_SVD(A,b)}} & \textbf{Difference (Normal Eq - SVD)} \\
|
||||||
|
\hline
|
||||||
|
\textbf{1.0} & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [ 2.22e-16 1.55e-15 -4.44e-16 2.22e-16 -8.88e-16] \\
|
||||||
|
\hline
|
||||||
|
\textbf{0.1} & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [ 8.08e-14 5.87e-14 -8.88e-16 4.82e-14 -1.38e-14] \\
|
||||||
|
\hline
|
||||||
|
\textbf{0.01} & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [-4.77e-12 -1.84e-14 5.54e-13 -4.00e-12 1.50e-12] \\
|
||||||
|
\hline
|
||||||
|
\textbf{1e-4} & [1.85714282 1. 3.14285716 1.28571427 3.85714291] & [1.85714286 1. 3.14285714 1.28571429 3.85714286] & [-3.60e-08 9.11e-13 1.94e-08 -1.20e-08 4.80e-08] \\
|
||||||
|
\hline
|
||||||
|
\textbf{1e-8} & [-1.87500007 0.99999993 -3.12499997 -2.62500007 3.49999996] & [1.85714286 1. 3.14285714 1.28571427 3.85714286] & [-3.73e+00 -7.45e-08 -6.27e+00 -3.91e+00 -3.57e-01] \\
|
||||||
|
\hline
|
||||||
|
\textbf{1e-16} & \textbf{Error:} \texttt{ValueError: Matrix A must be full rank} & [3.4 1. 1.6 1.8 1.8] & Could not compute difference due to previous error. \\
|
||||||
|
\hline
|
||||||
|
\textbf{1e-32} & \textbf{Error:} \texttt{ValueError: Matrix A must be full rank} & [3.4 1. 1.6 1.8 1.8] & Could not compute difference due to previous error. \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Comparison of results from the normal equation and SVD-based methods.}
|
||||||
|
\label{tab:proj_results}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
\noindent Numerical experiments show that as $\epsilon$ becomes small, the difference between the two methods increases. When $\epsilon$ becomes very small (e.g., 1e-8), the normal equation method can't give a valid result and fails when $\epsilon$ continues to decrease (e.g., 1e-16), while the SVD method continues to provide a stable solution.
|
||||||
|
|
||||||
|
\section{QR Factorisation}
|
||||||
|
|
||||||
|
\subsection*{(a, b, c)}
|
||||||
|
Four QR factorization algorithms were implemented to find an orthonormal basis $Q$ for the column space of the $n \times n$ Hilbert matrix (for $n=2$ to $20$):
|
||||||
|
\begin{enumerate}
|
||||||
|
\item Classical Gram-Schmidt (CGS, \texttt{cgs\_q})
|
||||||
|
\item Modified Gram-Schmidt (MGS, \texttt{mgs\_q})
|
||||||
|
\item Householder QR (\texttt{householder\_q})
|
||||||
|
\item Classical Gram-Schmidt Twice (CGS-Twice, \texttt{cgs\_twice\_q})
|
||||||
|
\end{enumerate}
|
||||||
|
For implementation details, please see the \texttt{Project1\_Q2.py} file.
|
||||||
|
|
||||||
|
\subsection*{(d)}
|
||||||
|
The quality of the computed $Q$ from each method was assessed by plotting the orthogonality loss, $\log_{10}(\|I - Q^T Q\|_F)$, versus the matrix size $n$, as shown in Figure~\ref{fig:ortho_loss}.
|
||||||
|
|
||||||
|
\begin{figure}[h!]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{Project1_A2d_orthogonality_loss.png}
|
||||||
|
\caption{Orthogonality Loss vs. Matrix Size for QR Methods.}
|
||||||
|
\label{fig:ortho_loss}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
A summary of the loss and computation time for $n = 4, 9, 11$ is provided in Table~\ref{tab:qr_results}.
|
||||||
|
|
||||||
|
\begin{table}[h!]
|
||||||
|
\centering
|
||||||
|
\begin{tabular}{|c|l|r|r|}
|
||||||
|
\hline
|
||||||
|
\textbf{n} & \textbf{Method} & \textbf{Loss ($log_{10}$)} & \textbf{Time (s)} \\
|
||||||
|
\hline
|
||||||
|
4 & Classical Gram-Schmidt & -10.311054 & 4.28e-05 \\
|
||||||
|
4 & Modified Gram-Schmidt & -12.374458 & 5.89e-05 \\
|
||||||
|
4 & Householder & -15.351739 & 1.42e-04 \\
|
||||||
|
4 & Classical Gram-Schmidt Twice & -15.570756 & 8.99e-05 \\
|
||||||
|
\hline
|
||||||
|
9 & Classical Gram-Schmidt & 0.389425 & 2.89e-04 \\
|
||||||
|
9 & Modified Gram-Schmidt & -5.257679 & 3.50e-04 \\
|
||||||
|
9 & Householder & -14.715202 & 6.19e-04 \\
|
||||||
|
9 & Classical Gram-Schmidt Twice & -8.398044 & 6.07e-04 \\
|
||||||
|
\hline
|
||||||
|
11 & Classical Gram-Schmidt & 0.609491 & 5.23e-04 \\
|
||||||
|
11 & Modified Gram-Schmidt & -2.005950 & 5.77e-04 \\
|
||||||
|
11 & Householder & -14.697055 & 8.68e-04 \\
|
||||||
|
11 & Classical Gram-Schmidt Twice & -1.745087 & 1.07e-03 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Loss and computation time for selected matrix sizes.}
|
||||||
|
\label{tab:qr_results}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
\subsection*{(e)}
|
||||||
|
The speed and accuracy of the four methods were compared based on the plots in Figure~\ref{fig:ortho_loss} and Figure~\ref{fig:time_taken}.
|
||||||
|
|
||||||
|
\begin{figure}[h!]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{Project1_A2d_time_taken.png}
|
||||||
|
\caption{Computation Time vs. Matrix Size for QR Methods.}
|
||||||
|
\label{fig:time_taken}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item \textbf{Accuracy:} Householder is the most accurate and stable. MGS is significantly better than CGS. CGS-Twice improves on CGS, achieving accuracy comparable to MGS. CGS is highly unstable and loses orthogonality quickly.
|
||||||
|
\item \textbf{Speed:} CGS and MGS are the fastest. Householder is slightly slower, and CGS-Twice is the slowest.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\section{Least Square Problem}
|
||||||
|
|
||||||
|
\subsection*{(a)}
|
||||||
|
The problem is formulated as $Ax = b$, where $A$ is an $(N+1) \times (n+1)$ Vandermonde matrix, $x$ is the $(n+1) \times 1$ vector of unknown polynomial coefficients $[a_0, \dots, a_n]^T$, and $b$ is the $(N+1) \times 1$ vector of known function values $f(t_j)$.
|
||||||
|
|
||||||
|
\subsection*{(b)}
|
||||||
|
The least squares problem was solved using the implemented Householder QR factorization (\texttt{householder\_lstsq} in \texttt{Project1\_Q3.py}, the same function in Question 2) to find the coefficient vector $x$.
|
||||||
|
|
||||||
|
\subsection*{(c)}
|
||||||
|
The function $f(t) = 1 / (1 + t^2)$ and its polynomial approximation $p(t)$ were plotted for $N=30$ and $n=5, 15, 30$, as shown in Figure~\ref{fig:ls_approx}.
|
||||||
|
|
||||||
|
\begin{figure}[h!]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{Project1_A3c_least_squares_approximation.png}
|
||||||
|
\caption{Least squares approximation of $f(t)=1/(1+t^2)$ for varying polynomial degrees $n$.}
|
||||||
|
\label{fig:ls_approx}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
When $n=15$, the approximation is excellent. When $n=30$, the polynomial interpolates the points but exhibits wild oscillations between them.
|
||||||
|
|
||||||
|
\subsection*{(d)}
|
||||||
|
No, $p(t)$ will not converge to $f(t)$ if $N = n$ tends to infinity. Polynomial interpolation on equally spaced nodes diverges for this function, as demonstrated by the severe oscillations in the $n=30$ case shown in Figure~\ref{fig:ls_approx}.
|
||||||
|
|
||||||
|
\subsection*{(e)}
|
||||||
|
The error between the computed and true coefficients was plotted against the polynomial degree $n$, as shown in Figure~\ref{fig:coeff_error}.
|
||||||
|
|
||||||
|
\begin{figure}[h!]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{Project1_A3e_coefficient_error.png}
|
||||||
|
\caption{Error in computed polynomial coefficients vs. polynomial degree $n$.}
|
||||||
|
\label{fig:coeff_error}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The error in the computed coefficients grows significantly as $n$ increases.
|
||||||
|
|
||||||
|
\end{document}
|
||||||
BIN
project1/project-1.pdf
Normal file
154
project2/iterative_solvers.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
import numpy as np
|
||||||
|
from typing import Callable, Any, List, Tuple
|
||||||
|
|
||||||
|
# Tolerance and maximum iterations
|
||||||
|
TOL = 1e-10
|
||||||
|
MAX_ITER = 20000
|
||||||
|
|
||||||
|
def stationary_method(
|
||||||
|
b: np.ndarray,
|
||||||
|
method: str,
|
||||||
|
get_diag_inv: Callable[[int, Any], float],
|
||||||
|
get_off_diag_sum: Callable[[np.ndarray, int, Any], float],
|
||||||
|
mat_vec_prod: Callable[[np.ndarray, Any], np.ndarray],
|
||||||
|
omega: float = 1.0,
|
||||||
|
**kwargs
|
||||||
|
) -> Tuple[np.ndarray, int, List[float]]:
|
||||||
|
"""
|
||||||
|
Solves Ax=b using stationary methods (Jacobi, Gauss-Seidel, SOR).
|
||||||
|
"""
|
||||||
|
n = len(b)
|
||||||
|
x = np.zeros(n)
|
||||||
|
r0 = b.copy()
|
||||||
|
r0_norm = np.linalg.norm(r0)
|
||||||
|
if r0_norm == 0:
|
||||||
|
return x, 0, [0.0]
|
||||||
|
|
||||||
|
residual_history = []
|
||||||
|
|
||||||
|
for k in range(MAX_ITER):
|
||||||
|
# 1. Calculate current residual and its norm
|
||||||
|
r = b - mat_vec_prod(x, **kwargs)
|
||||||
|
res_norm = np.linalg.norm(r)
|
||||||
|
|
||||||
|
# 2. Append to history
|
||||||
|
residual_history.append(res_norm)
|
||||||
|
|
||||||
|
# 3. Check for convergence
|
||||||
|
if res_norm / r0_norm < TOL:
|
||||||
|
return x, k, residual_history
|
||||||
|
|
||||||
|
# 4. If not converged, perform the next iteration update
|
||||||
|
if method == 'jacobi':
|
||||||
|
x_old = x.copy()
|
||||||
|
for i in range(n):
|
||||||
|
off_diag_val = get_off_diag_sum(x_old, i, **kwargs)
|
||||||
|
diag_inv_val = get_diag_inv(i, **kwargs)
|
||||||
|
x[i] = diag_inv_val * (b[i] - off_diag_val)
|
||||||
|
else: # Gauss-Seidel and SOR
|
||||||
|
x_k_iter = x.copy() # Needed for SOR
|
||||||
|
for i in range(n):
|
||||||
|
off_diag_val = get_off_diag_sum(x, i, **kwargs)
|
||||||
|
diag_inv_val = get_diag_inv(i, **kwargs)
|
||||||
|
x_gs = diag_inv_val * (b[i] - off_diag_val)
|
||||||
|
|
||||||
|
if method == 'gauss_seidel':
|
||||||
|
x[i] = x_gs
|
||||||
|
elif method == 'sor':
|
||||||
|
x[i] = (1 - omega) * x_k_iter[i] + omega * x_gs
|
||||||
|
|
||||||
|
# If loop finishes (MAX_ITER reached), calculate and append final residual
|
||||||
|
r_final = b - mat_vec_prod(x, **kwargs)
|
||||||
|
residual_history.append(np.linalg.norm(r_final))
|
||||||
|
return x, MAX_ITER, residual_history
|
||||||
|
|
||||||
|
|
||||||
|
def steepest_descent(
|
||||||
|
mat_vec_prod: Callable[[np.ndarray, Any], np.ndarray],
|
||||||
|
b: np.ndarray,
|
||||||
|
**kwargs
|
||||||
|
) -> Tuple[np.ndarray, int, List[float]]:
|
||||||
|
"""
|
||||||
|
Solves Ax=b using Steepest Descent, with corrected residual logging.
|
||||||
|
"""
|
||||||
|
x = np.zeros_like(b)
|
||||||
|
r = b.copy()
|
||||||
|
|
||||||
|
r0_norm = np.linalg.norm(r)
|
||||||
|
if r0_norm == 0:
|
||||||
|
return x, 0, [0.0]
|
||||||
|
|
||||||
|
residual_history = []
|
||||||
|
|
||||||
|
for k in range(MAX_ITER):
|
||||||
|
# 1. Current residual norm is from previous step (or initial)
|
||||||
|
res_norm = np.linalg.norm(r)
|
||||||
|
|
||||||
|
# 2. Append to history
|
||||||
|
residual_history.append(res_norm)
|
||||||
|
|
||||||
|
# 3. Check for convergence BEFORE the update
|
||||||
|
if res_norm / r0_norm < TOL:
|
||||||
|
return x, k, residual_history
|
||||||
|
|
||||||
|
# 4. If not converged, compute update
|
||||||
|
Ar = mat_vec_prod(r, **kwargs)
|
||||||
|
r_dot_Ar = np.dot(r, Ar)
|
||||||
|
if r_dot_Ar == 0: # Should not happen for SPD if r!=0
|
||||||
|
break
|
||||||
|
alpha = res_norm**2 / r_dot_Ar
|
||||||
|
|
||||||
|
x += alpha * r
|
||||||
|
r -= alpha * Ar
|
||||||
|
|
||||||
|
# If loop finishes, append final residual
|
||||||
|
residual_history.append(np.linalg.norm(r))
|
||||||
|
return x, MAX_ITER, residual_history
|
||||||
|
|
||||||
|
|
||||||
|
def conjugate_gradient(
|
||||||
|
mat_vec_prod: Callable[[np.ndarray, Any], np.ndarray],
|
||||||
|
b: np.ndarray,
|
||||||
|
**kwargs
|
||||||
|
) -> Tuple[np.ndarray, int, List[float]]:
|
||||||
|
"""
|
||||||
|
Solves Ax=b using Conjugate Gradient, with corrected residual logging.
|
||||||
|
"""
|
||||||
|
x = np.zeros_like(b)
|
||||||
|
r = b.copy()
|
||||||
|
p = r.copy()
|
||||||
|
|
||||||
|
r0_norm = np.linalg.norm(r)
|
||||||
|
if r0_norm == 0:
|
||||||
|
return x, 0, [0.0]
|
||||||
|
|
||||||
|
res_norm_old = r0_norm
|
||||||
|
residual_history = []
|
||||||
|
|
||||||
|
for k in range(MAX_ITER):
|
||||||
|
# 1. Current residual norm is from previous step
|
||||||
|
# 2. Append to history
|
||||||
|
residual_history.append(res_norm_old)
|
||||||
|
|
||||||
|
# 3. Check for convergence
|
||||||
|
if res_norm_old / r0_norm < TOL:
|
||||||
|
return x, k, residual_history
|
||||||
|
|
||||||
|
# 4. If not converged, compute update
|
||||||
|
Ap = mat_vec_prod(p, **kwargs)
|
||||||
|
p_dot_Ap = np.dot(p, Ap)
|
||||||
|
if p_dot_Ap == 0:
|
||||||
|
break
|
||||||
|
alpha = res_norm_old**2 / p_dot_Ap
|
||||||
|
|
||||||
|
x += alpha * p
|
||||||
|
r -= alpha * Ap
|
||||||
|
|
||||||
|
res_norm_new = np.linalg.norm(r)
|
||||||
|
beta = res_norm_new**2 / res_norm_old**2
|
||||||
|
|
||||||
|
p = r + beta * p
|
||||||
|
res_norm_old = res_norm_new
|
||||||
|
# If loop finishes, append final residual
|
||||||
|
residual_history.append(res_norm_old)
|
||||||
|
return x, MAX_ITER, residual_history
|
||||||
187
project2/main.tex
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
\documentclass{article}
|
||||||
|
|
||||||
|
% Language setting
|
||||||
|
\usepackage[english]{babel}
|
||||||
|
|
||||||
|
% Set page size and margins
|
||||||
|
\usepackage[letterpaper,top=2cm,bottom=2cm,left=3cm,right=3cm,marginparwidth=1.75cm]{geometry}
|
||||||
|
|
||||||
|
% Useful packages
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{amsfonts}
|
||||||
|
\usepackage{float} % For better table and figure placement
|
||||||
|
\usepackage[colorlinks=true, allcolors=blue]{hyperref}
|
||||||
|
\usepackage{array} % For better table column definitions
|
||||||
|
|
||||||
|
\title{MATH 6601 - Project 2 Report}
|
||||||
|
\author{Zhe Yuan (yuan.1435)}
|
||||||
|
\date{Nov 2025}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
\section*{Common Iterative Methods for Both Problems}
|
||||||
|
This project implements five iterative methods to solve large, sparse, symmetric positive-definite (SPD) linear systems of the form $Ax=b$. The implemented methods are:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Jacobi method
|
||||||
|
\item Gauss-Seidel method
|
||||||
|
\item Successive Over-Relaxation (SOR) method
|
||||||
|
\item Steepest Descent method
|
||||||
|
\item Conjugate Gradient method
|
||||||
|
\end{itemize}
|
||||||
|
All five methods are implemented in Python. The core logic for each solver is contained in the `iterative\_solvers.py' file. The specific problems are set up and solved in `problem1.py' and `problem2.py', respectively.
|
||||||
|
|
||||||
|
The initial guess was $x^{(0)}=0$, and the stopping condition was $\|r^{(k)}\|_2 / \|r^{(0)}\|_2 < 10^{-10}$. All $A$ were not explicitly constructed.
|
||||||
|
|
||||||
|
\section{Problem 1: Ordinary Differential Equations}
|
||||||
|
This problem solves the boundary value problem $-u'' = t$ on $(0,1)$ with $u(0)=u(1)=0$. To solve the problem numerically, the interval $(0,1)$ is discretized into $n+1$ subintervals of width $h=1/(n+1)$. The second derivative $u''(t_i)$ is approximated using a central difference scheme:
|
||||||
|
\[ u''(t_i) = -t_i \approx \frac{u(t_{i-1}) - 2u(t_i) + u(t_{i+1})}{h^2} \quad \implies \quad -u_{i-1} + 2u_i - u_{i+1} = h^2 t_i \]
|
||||||
|
for the internal grid points $i=1, \dots, n$. Combined with the boundary conditions $u_0=0$ and $u_{n+1}=0$, this forms a linear system $Au=b$, where $u = [u_1, \dots, u_n]^T$. The matrix $A \in \mathbb{R}^{n \times n}$ is a tridiagonal matrix, and the vector $b \in \mathbb{R}^n$ is given by:
|
||||||
|
\[
|
||||||
|
A = \begin{pmatrix}
|
||||||
|
2 & -1 & & & \\
|
||||||
|
-1 & 2 & -1 & & \\
|
||||||
|
& \ddots & \ddots & \ddots & \\
|
||||||
|
& & -1 & 2 & -1 \\
|
||||||
|
& & & -1 & 2
|
||||||
|
\end{pmatrix}, \quad
|
||||||
|
b = h^2 \begin{pmatrix} t_1 \\ t_2 \\ \vdots \\ t_n \end{pmatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\subsection*{(a) Exact Solution}
|
||||||
|
The boundary value problem is given by $-u'' = t$ on the interval $(0, 1)$ with boundary conditions $u(0) = u(1) = 0$.
|
||||||
|
Integrating twice gives the general solution:
|
||||||
|
\[ u(t) = -\frac{t^3}{6} - C_1 t - C_2 \]
|
||||||
|
Applying the boundary conditions $u(0)=0$ and $u(1)=0$, we find the constants $C_2=0$ and $C_1 = -1/6$. Thus, the exact solution is:
|
||||||
|
\[ u(t) = \frac{t - t^3}{6} \]
|
||||||
|
The plot of this exact solution is shown in Figure~\ref{fig:p1_exact_solution}.
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{problem1_exact_solution.png}
|
||||||
|
\caption{Plot of the exact solution $u(t) = (t-t^3)/6$.}
|
||||||
|
\label{fig:p1_exact_solution}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\subsection*{(b, c) Iteration Solution}
|
||||||
|
According to the requirments from 1(b), The linear system was solved for $n=20$ and $n=40$. For SOR, the optimal relaxation parameter $\omega = \frac{2}{1+\sin(\pi h)}$ was used. The number of iterations required for each method to converge is summarized in Table~\ref{tab:p1_iterations}.
|
||||||
|
|
||||||
|
\begin{table}[H]
|
||||||
|
\centering
|
||||||
|
\begin{tabular}{|l|r|r|}
|
||||||
|
\hline
|
||||||
|
\textbf{Method} & \textbf{Iterations (n=20)} & \textbf{Iterations (n=40)} \\
|
||||||
|
\hline
|
||||||
|
Jacobi & 2031 & 7758 \\
|
||||||
|
Gauss-Seidel & 1018 & 3882 \\
|
||||||
|
SOR & 94 & 183 \\
|
||||||
|
Steepest Descent & 2044 & 7842\\
|
||||||
|
Conjugate Gradient & 20 & 40 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Number of iterations for each method for Problem 1.}
|
||||||
|
\label{tab:p1_iterations}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
\noindent \textbf{Observations:}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item The Conjugate Gradient method is the most efficient, requiring the fewest iterations. SOR is the second-best with a significant speedup over Gauss-Seidel. Gauss-Seidel converges about twice as fast as Jacobi. The Steepest Descent method performs poorly, rivaling Jacobi in its slow convergence.
|
||||||
|
\item As $n$ doubles, the number of iterations increases for all methods. The iteration count for Jacobi, Gauss-Seidel, and Steepest Descent increases by a factor of roughly 4, while for SOR and CG, the increase is closer to a factor of 2.
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
\subsection*{(d) Convergence History}
|
||||||
|
The convergence history is shown in Figures~\ref{fig:p1_conv_n20} and \ref{fig:p1_conv_n40}. The stationary methods and SD show linear convergence, while CG shows superlinear convergence.
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{problem1_convergence_n20.png}
|
||||||
|
\caption{Convergence history for Problem 1 with n=20.}
|
||||||
|
\label{fig:p1_conv_n20}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{problem1_convergence_n40.png}
|
||||||
|
\caption{Convergence history for Problem 1 with n=40.}
|
||||||
|
\label{fig:p1_conv_n40}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\newpage
|
||||||
|
\section{Problem 2: Partial Differential Equation}
|
||||||
|
This problem involves solving the PDE $-\nabla^2 u + u = 1$ (this should be $-\nabla^2 u + u = 1$ instead of $-\nabla u(\text{vector}) + u(\text{scalar}) = 1$ in question) on the unit square $[0,1]^2$ with zero boundary conditions. To solve the problem numerically, a finite difference approximation on an $n \times n$ grid of interior points leads to the discrete equations:
|
||||||
|
\[ (4+h^2)u_{i,j} - u_{i-1,j} - u_{i,j-1} - u_{i+1,j} - u_{i,j+1} = h^2, \quad i,j=1,\dots,n \]
|
||||||
|
To formulate this as a standard matrix system $Ax=b$, we reshape the 2D array of unknowns $u_{i,j}$ into a single 1D vector $x$ of length $N=n^2$. After flattening, $u_{i,j}$ maps to the index $k = (i-1)n + j$ in the vector $x$, the matrix $A \in \mathbb{R}^{N \times N}$ becomes a large, sparse, block tridiagonal matrix:
|
||||||
|
\[
|
||||||
|
A = \begin{pmatrix}
|
||||||
|
T & -I & & & \\
|
||||||
|
-I & T & -I & & \\
|
||||||
|
& \ddots & \ddots & \ddots & \\
|
||||||
|
& & -I & T & -I \\
|
||||||
|
& & & -I & T
|
||||||
|
\end{pmatrix}_{n^2 \times n^2}
|
||||||
|
\]
|
||||||
|
where $I$ is the $n \times n$ identity matrix, and $T$ is an $n \times n$ tridiagonal matrix given by:
|
||||||
|
\[
|
||||||
|
T = \begin{pmatrix}
|
||||||
|
4+h^2 & -1 & & & \\
|
||||||
|
-1 & 4+h^2 & -1 & & \\
|
||||||
|
& \ddots & \ddots & \ddots & \\
|
||||||
|
& & -1 & 4+h^2 & -1 \\
|
||||||
|
& & & -1 & 4+h^2
|
||||||
|
\end{pmatrix}_{n \times n}
|
||||||
|
\]
|
||||||
|
The right-hand side vector $b \in \mathbb{R}^{N}$ is a constant vector where every element is $h^2$. So:
|
||||||
|
|
||||||
|
\[
|
||||||
|
(A_{i,i}=T)_{j,j}x_{(i-1)n + j} = (4+h^2)u_{i,j}, (A_{i-1,i}=I)_{j,j}x_{(i-2)n + j} = u_{i-1,j},\dots
|
||||||
|
\]
|
||||||
|
|
||||||
|
\subsection*{(a) Iterative Solution}
|
||||||
|
The system was solved for $n=16$ ($N=256$) and $n=32$ ($N=1024$) using the same five iterative methods. The number of iterations to reach convergence is reported in Table~\ref{tab:p2_iterations}.
|
||||||
|
|
||||||
|
\begin{table}[H]
|
||||||
|
\centering
|
||||||
|
\begin{tabular}{|l|r|r|}
|
||||||
|
\hline
|
||||||
|
\textbf{Method} & \textbf{Iterations (n=16)} & \textbf{Iterations (n=32)} \\
|
||||||
|
\hline
|
||||||
|
Jacobi & 1268 & 4792 \\
|
||||||
|
Gauss-Seidel & 635 & 2397 \\
|
||||||
|
SOR & 71 & 138 \\
|
||||||
|
Steepest Descent & 1247 & 4807 \\
|
||||||
|
Conjugate Gradient & 31 & 66 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Number of iterations for each method for Problem 2.}
|
||||||
|
\label{tab:p2_iterations}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
\noindent \textbf{Observations:}
|
||||||
|
The relative performance of the methods remains consistent with Problem 1: CG is the fastest, followed by SOR, Gauss-Seidel, Jacobi, and Steepest Descent. Doubling $n$ from 16 to 32 quadruples the number of unknowns. Again, the iteration counts for slower methods increase by a factor of approximately 4, while for CG and SOR, the increase is only about a factor of 2. This reinforces the conclusion that CG and SOR scale much better with problem size.
|
||||||
|
|
||||||
|
\subsection*{(b) Convergence History}
|
||||||
|
The convergence history plots are shown in Figures~\ref{fig:p2_conv_n16} and \ref{fig:p2_conv_n32}.
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{problem2_convergence_n16.png}
|
||||||
|
\caption{Convergence history for Problem 2 with n=16.}
|
||||||
|
\label{fig:p2_conv_n16}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}[H]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.8\textwidth]{problem2_convergence_n32.png}
|
||||||
|
\caption{Convergence history for Problem 2 with n=32.}
|
||||||
|
\label{fig:p2_conv_n32}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\subsection*{(c) Advantages of Iterative Methods}
|
||||||
|
For large, sparse linear systems arising from PDE discretizations, iterative methods offer significant advantages over direct methods:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item Iterative methods typically only require storing a few vectors of size $N$, leading to a memory complexity of $O(N)$. In contrast, direct methods require denser helper matrices like $Q$ and $R$.
|
||||||
|
\item The cost per iteration for these problems is dominated by the sparse matrix-vector product, which is $O(N)$. If the method converges in $K$ iterations, the total cost is $O(KN)$. If a good iterative method is used, $K$ can be much smaller than $N$ (like C.G.). Direct sparse solvers often have a higher complexity. When $N$ is very large, iterative methods can be much faster.
|
||||||
|
\item Implementing iterative methods is often simpler than implementing a direct solver for sparse matrix.
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
160
project2/problem1.py
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from iterative_solvers import *
|
||||||
|
|
||||||
|
# (a): Exact Solution
|
||||||
|
def exact_solution(t: np.ndarray) -> np.ndarray:
|
||||||
|
"""Computes the exact solution u(t) = t/6 * (1 - t^2)."""
|
||||||
|
return t / 6.0 * (1 - t**2)
|
||||||
|
|
||||||
|
def plot_exact_solution():
|
||||||
|
"""Plots the exact solution of the BVP."""
|
||||||
|
t_fine = np.linspace(0, 1, 500)
|
||||||
|
u_exact_fine = exact_solution(t_fine)
|
||||||
|
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
plt.plot(t_fine, u_exact_fine, 'k-', label='Exact Solution $u(t) = t/6 \cdot (1 - t^2)$')
|
||||||
|
plt.title('Exact Solution of $-u\'\'=t$ with $u(0)=u(1)=0$')
|
||||||
|
plt.xlabel('$t$')
|
||||||
|
plt.ylabel('$u(t)$')
|
||||||
|
plt.grid(True)
|
||||||
|
plt.legend()
|
||||||
|
plt.savefig('problem1_exact_solution.png')
|
||||||
|
# plt.show()
|
||||||
|
print("Exact solution plot saved to 'problem1_exact_solution.png'")
|
||||||
|
|
||||||
|
|
||||||
|
# (b): Helper functions for iterative methods
|
||||||
|
def mat_vec_prod_p1(x: np.ndarray, n: int) -> np.ndarray:
|
||||||
|
"""Computes the matrix-vector product Ax for Problem 1."""
|
||||||
|
y = np.zeros(n)
|
||||||
|
# A is tridiagonal with [ -1, 2, -1 ]
|
||||||
|
# y_i = -x_{i-1} + 2x_i - x_{i+1}
|
||||||
|
for i in range(n):
|
||||||
|
term_prev = -x[i-1] if i > 0 else 0
|
||||||
|
term_next = -x[i+1] if i < n - 1 else 0
|
||||||
|
y[i] = term_prev + 2 * x[i] + term_next
|
||||||
|
return y
|
||||||
|
|
||||||
|
def get_diag_inv_p1(i: int, n: int) -> float:
|
||||||
|
"""Returns 1/A_ii for Problem 1. Diagonal elements are all 2, so inverse is 1/2."""
|
||||||
|
return 1.0 / 2.0
|
||||||
|
|
||||||
|
def get_off_diag_sum_p1(x: np.ndarray, i: int, n: int) -> float:
|
||||||
|
"""Computes sum_{j!=i} A_ij * x_j. For Problem 1, A_ij = -1 for j=i-1, i+1, else 0."""
|
||||||
|
sum_val = 0.0
|
||||||
|
if i > 0:
|
||||||
|
sum_val += -x[i-1]
|
||||||
|
if i < n - 1:
|
||||||
|
sum_val += -x[i+1]
|
||||||
|
return sum_val
|
||||||
|
|
||||||
|
def solve_and_report(n: int):
|
||||||
|
"""
|
||||||
|
Solves the linear system for a given n and reports results.
|
||||||
|
"""
|
||||||
|
print(f"\n--- Solving for n = {n} ---")
|
||||||
|
h = 1.0 / (n + 1)
|
||||||
|
|
||||||
|
# Setup the right-hand side vector b
|
||||||
|
t = np.linspace(h, 1.0 - h, n)
|
||||||
|
b = (h**2) * t
|
||||||
|
|
||||||
|
# Calculate SOR parameter omega
|
||||||
|
omega = 2.0 / (1.0 + np.sin(np.pi * h))
|
||||||
|
|
||||||
|
# Store results
|
||||||
|
iteration_counts = []
|
||||||
|
all_residuals = {}
|
||||||
|
all_solutions = {}
|
||||||
|
# Define methods and run solvers
|
||||||
|
methods_to_run = {
|
||||||
|
'Jacobi': ('jacobi', {}),
|
||||||
|
'Gauss-Seidel': ('gauss_seidel', {}),
|
||||||
|
'SOR': ('sor', {'omega': omega}),
|
||||||
|
'Steepest Descent': (steepest_descent, {}),
|
||||||
|
'Conjugate Gradient': (conjugate_gradient, {})
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nStarting iterative solvers...")
|
||||||
|
for name, (method_func, params) in methods_to_run.items():
|
||||||
|
print(f"Running {name}...")
|
||||||
|
common_args = {'b': b, 'n': n}
|
||||||
|
if isinstance(method_func, str):
|
||||||
|
solver_args = { 'method': method_func, 'get_diag_inv': get_diag_inv_p1,
|
||||||
|
'get_off_diag_sum': get_off_diag_sum_p1, 'mat_vec_prod': mat_vec_prod_p1,
|
||||||
|
**common_args, **params }
|
||||||
|
solution, iters, res_hist = stationary_method(**solver_args)
|
||||||
|
else:
|
||||||
|
solver_args = {'mat_vec_prod': mat_vec_prod_p1, **common_args}
|
||||||
|
solution, iters, res_hist = method_func(**solver_args)
|
||||||
|
iteration_counts.append((name, iters))
|
||||||
|
all_residuals[name] = res_hist
|
||||||
|
all_solutions[name] = solution if iters < MAX_ITER else None
|
||||||
|
# (c): Report iteration counts
|
||||||
|
print("\n--- Iteration Counts ---")
|
||||||
|
for name, iters in iteration_counts:
|
||||||
|
status = "converged" if iters < MAX_ITER else "did NOT converge"
|
||||||
|
print(f"{name:<20}: {iters} iterations ({status}, last residual = {all_residuals[name][-1]:.2e})")
|
||||||
|
# (d): Plot residue history
|
||||||
|
plt.figure(figsize=(12, 8))
|
||||||
|
for name, res_hist in all_residuals.items():
|
||||||
|
res_log = np.log10(np.array(res_hist) + 1e-20)
|
||||||
|
plt.plot(res_log, label=name, linestyle='-')
|
||||||
|
|
||||||
|
plt.title(f'Convergence History for n = {n}')
|
||||||
|
plt.xlabel('Iteration Step (m)')
|
||||||
|
plt.ylabel('log10(||r^(m)||_2)')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
|
||||||
|
# Set the y-axis limit to make the plot readable
|
||||||
|
current_ylim = plt.ylim()
|
||||||
|
plt.ylim(bottom=max(current_ylim[0], -13), top=current_ylim[1])
|
||||||
|
plt.savefig(f'problem1_convergence_n{n}.png')
|
||||||
|
# plt.show()
|
||||||
|
print(f"\nConvergence plot saved to 'problem1_convergence_n{n}.png'")
|
||||||
|
|
||||||
|
if all_solutions:
|
||||||
|
plt.figure(figsize=(12, 8))
|
||||||
|
|
||||||
|
# Plot exact solution on a fine grid
|
||||||
|
t_fine = np.linspace(0, 1, 500)
|
||||||
|
u_exact_fine = exact_solution(t_fine)
|
||||||
|
plt.plot(t_fine, u_exact_fine, 'k-', label='Exact Solution', linewidth=1, zorder=10) # Black, thick, on top
|
||||||
|
|
||||||
|
# Define some styles for the different numerical methods
|
||||||
|
styles = {
|
||||||
|
'Jacobi': {'color': 'red', 'linestyle': '--'},
|
||||||
|
'Gauss-Seidel': {'color': 'blue', 'linestyle': '-.'},
|
||||||
|
'SOR': {'color': 'green', 'linestyle': ':'},
|
||||||
|
'Steepest Descent':{'color': 'purple','linestyle': '--'},
|
||||||
|
'Conjugate Gradient':{'color': 'orange','linestyle': '-.'},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Plot each numerical solution
|
||||||
|
for name, solution in all_solutions.items():
|
||||||
|
# Add boundary points for a complete plot
|
||||||
|
t_numerical_with_bounds = np.concatenate(([0], t, [1]))
|
||||||
|
u_numerical_with_bounds = np.concatenate(([0], solution, [0]))
|
||||||
|
|
||||||
|
style = styles.get(name, {'color': 'gray', 'linestyle': '-'})
|
||||||
|
plt.plot(t_numerical_with_bounds, u_numerical_with_bounds,
|
||||||
|
label=f'Numerical ({name})', **style, linewidth=2)
|
||||||
|
|
||||||
|
plt.title(f'Comparison of Exact and Numerical Solutions for n = {n}')
|
||||||
|
plt.xlabel('$t$')
|
||||||
|
plt.ylabel('$u(t)$')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
plt.savefig(f'problem1_all_solutions_comparison_n{n}.png')
|
||||||
|
# plt.show()
|
||||||
|
print(f"All solutions comparison plot saved to 'problem1_all_solutions_comparison_n{n}.png'")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Part (a)
|
||||||
|
plot_exact_solution()
|
||||||
|
|
||||||
|
# Part (b), (c), (d)
|
||||||
|
solve_and_report(n=20)
|
||||||
|
solve_and_report(n=40)
|
||||||
BIN
project2/problem1_all_solutions_comparison_n20.png
Normal file
|
After Width: | Height: | Size: 68 KiB |
BIN
project2/problem1_all_solutions_comparison_n40.png
Normal file
|
After Width: | Height: | Size: 68 KiB |
BIN
project2/problem1_convergence_n20.png
Normal file
|
After Width: | Height: | Size: 72 KiB |
BIN
project2/problem1_convergence_n40.png
Normal file
|
After Width: | Height: | Size: 72 KiB |
BIN
project2/problem1_exact_solution.png
Normal file
|
After Width: | Height: | Size: 39 KiB |
39
project2/problem1_output.txt
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
Exact solution plot saved to 'problem1_exact_solution.png'
|
||||||
|
|
||||||
|
--- Solving for n = 20 ---
|
||||||
|
|
||||||
|
Starting iterative solvers...
|
||||||
|
Running Jacobi...
|
||||||
|
Running Gauss-Seidel...
|
||||||
|
Running SOR...
|
||||||
|
Running Steepest Descent...
|
||||||
|
Running Conjugate Gradient...
|
||||||
|
|
||||||
|
--- Iteration Counts ---
|
||||||
|
Jacobi : 2031 iterations (converged, last residual = 5.78e-13)
|
||||||
|
Gauss-Seidel : 1018 iterations (converged, last residual = 5.72e-13)
|
||||||
|
SOR : 94 iterations (converged, last residual = 4.52e-13)
|
||||||
|
Steepest Descent : 2044 iterations (converged, last residual = 5.74e-13)
|
||||||
|
Conjugate Gradient : 20 iterations (converged, last residual = 1.39e-18)
|
||||||
|
|
||||||
|
Convergence plot saved to 'problem1_convergence_n20.png'
|
||||||
|
All solutions comparison plot saved to 'problem1_all_solutions_comparison_n20.png'
|
||||||
|
|
||||||
|
--- Solving for n = 40 ---
|
||||||
|
|
||||||
|
Starting iterative solvers...
|
||||||
|
Running Jacobi...
|
||||||
|
Running Gauss-Seidel...
|
||||||
|
Running SOR...
|
||||||
|
Running Steepest Descent...
|
||||||
|
Running Conjugate Gradient...
|
||||||
|
|
||||||
|
--- Iteration Counts ---
|
||||||
|
Jacobi : 7758 iterations (converged, last residual = 2.15e-13)
|
||||||
|
Gauss-Seidel : 3882 iterations (converged, last residual = 2.16e-13)
|
||||||
|
SOR : 183 iterations (converged, last residual = 2.04e-13)
|
||||||
|
Steepest Descent : 7842 iterations (converged, last residual = 2.16e-13)
|
||||||
|
Conjugate Gradient : 40 iterations (converged, last residual = 5.40e-19)
|
||||||
|
|
||||||
|
Convergence plot saved to 'problem1_convergence_n40.png'
|
||||||
|
All solutions comparison plot saved to 'problem1_all_solutions_comparison_n40.png'
|
||||||
147
project2/problem2.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from iterative_solvers import *
|
||||||
|
|
||||||
|
def mat_vec_prod_p2(x_flat: np.ndarray, n: int, h_sq: float) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Computes the matrix-vector product y = Ax for the 2D PDE problem.
|
||||||
|
"""
|
||||||
|
x_2d = x_flat.reshape((n, n))
|
||||||
|
x_padded = np.zeros((n + 2, n + 2))
|
||||||
|
x_padded[1:-1, 1:-1] = x_2d
|
||||||
|
|
||||||
|
y_2d = np.zeros((n, n))
|
||||||
|
|
||||||
|
# Apply the finite difference stencil: (4+h^2)u_ij - u_{i-1,j} - u_{i+1,j} - u_{i,j-1} - u_{i,j+1}
|
||||||
|
for i in range(n):
|
||||||
|
for j in range(n):
|
||||||
|
y_2d[i, j] = (4.0 + h_sq) * x_padded[i + 1, j + 1] \
|
||||||
|
- x_padded[i, j + 1] \
|
||||||
|
- x_padded[i + 2, j + 1] \
|
||||||
|
- x_padded[i + 1, j] \
|
||||||
|
- x_padded[i + 1, j + 2]
|
||||||
|
|
||||||
|
return y_2d.flatten()
|
||||||
|
|
||||||
|
def get_diag_inv_p2(i: int, n: int, h_sq: float) -> float:
|
||||||
|
"""Returns 1/A_ii for Problem 2. Diagonal is constant: 4 + h^2."""
|
||||||
|
return 1.0 / (4.0 + h_sq)
|
||||||
|
|
||||||
|
def get_off_diag_sum_p2(x_flat: np.ndarray, i_flat: int, n: int, **kwargs) -> float:
|
||||||
|
"""Computes the off-diagonal sum for a given row `i_flat` in Problem 2."""
|
||||||
|
row = i_flat // n
|
||||||
|
col = i_flat % n
|
||||||
|
sum_val = 0.0
|
||||||
|
|
||||||
|
if row > 0: sum_val += -x_flat[i_flat - n]
|
||||||
|
if row < n - 1: sum_val += -x_flat[i_flat + n]
|
||||||
|
if col > 0: sum_val += -x_flat[i_flat - 1]
|
||||||
|
if col < n - 1: sum_val += -x_flat[i_flat + 1]
|
||||||
|
|
||||||
|
return sum_val
|
||||||
|
|
||||||
|
def solve_and_report_p2(n: int):
|
||||||
|
"""Solves the 2D PDE linear system for a given n and reports results."""
|
||||||
|
print(f"\n--- Solving for n = {n} (System size: {n*n} x {n*n}) ---")
|
||||||
|
h = 1.0 / (n + 1)
|
||||||
|
h_sq = h**2
|
||||||
|
N = n * n
|
||||||
|
|
||||||
|
b = np.full(N, h_sq)
|
||||||
|
omega = 2.0 / (1.0 + np.sin(np.pi * h))
|
||||||
|
|
||||||
|
iteration_counts = []
|
||||||
|
all_residuals = {}
|
||||||
|
all_solutions = {}
|
||||||
|
|
||||||
|
methods_to_run = {
|
||||||
|
'Jacobi': ('jacobi', {}),
|
||||||
|
'Gauss-Seidel': ('gauss_seidel', {}),
|
||||||
|
'SOR': ('sor', {'omega': omega}),
|
||||||
|
'Steepest Descent': (steepest_descent, {}),
|
||||||
|
'Conjugate Gradient': (conjugate_gradient, {})
|
||||||
|
}
|
||||||
|
|
||||||
|
print("\nStarting iterative solvers...")
|
||||||
|
for name, (method_func, params) in methods_to_run.items():
|
||||||
|
print(f"Running {name}...")
|
||||||
|
|
||||||
|
solver_kwargs = {'n': n, 'h_sq': h_sq}
|
||||||
|
|
||||||
|
if isinstance(method_func, str):
|
||||||
|
solver_args = {'b': b, 'method': method_func, 'get_diag_inv': get_diag_inv_p2,
|
||||||
|
'get_off_diag_sum': get_off_diag_sum_p2, 'mat_vec_prod': mat_vec_prod_p2,
|
||||||
|
**solver_kwargs, **params}
|
||||||
|
solution, iters, res_hist = stationary_method(**solver_args)
|
||||||
|
else:
|
||||||
|
solver_args = {'b': b, 'mat_vec_prod': mat_vec_prod_p2, **solver_kwargs}
|
||||||
|
solution, iters, res_hist = method_func(**solver_args)
|
||||||
|
|
||||||
|
iteration_counts.append((name, iters))
|
||||||
|
all_residuals[name] = res_hist
|
||||||
|
all_solutions[name] = solution if iters < MAX_ITER else None
|
||||||
|
|
||||||
|
# Report iteration counts
|
||||||
|
print("\n--- Iteration Counts ---")
|
||||||
|
for name, iters in iteration_counts:
|
||||||
|
status = "converged" if iters < MAX_ITER else "did NOT converge"
|
||||||
|
print(f"{name:<20}: {iters} iterations ({status})")
|
||||||
|
|
||||||
|
# Plot residue history
|
||||||
|
plt.figure(figsize=(12, 8))
|
||||||
|
for name, res_hist in all_residuals.items():
|
||||||
|
res_log = np.log10(np.array(res_hist) + 1e-20)
|
||||||
|
plt.plot(res_log, label=name, linestyle='-')
|
||||||
|
|
||||||
|
plt.title(f'Convergence History for PDE Problem, n = {n}')
|
||||||
|
plt.xlabel('Iteration Step (m)')
|
||||||
|
plt.ylabel('log10(||r^(m)||_2)')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
|
||||||
|
current_ylim = plt.ylim()
|
||||||
|
plt.ylim(bottom=max(current_ylim[0], -13), top=current_ylim[1])
|
||||||
|
|
||||||
|
plt.savefig(f'problem2_convergence_n{n}.png')
|
||||||
|
print(f"\nConvergence plot saved to 'problem2_convergence_n{n}.png'")
|
||||||
|
|
||||||
|
if all_solutions:
|
||||||
|
print("Generating 3D surface plots of the solutions...")
|
||||||
|
|
||||||
|
# Create the X, Y grid for the surface plot, including boundaries
|
||||||
|
plot_grid = np.linspace(0, 1, n + 2)
|
||||||
|
X, Y = np.meshgrid(plot_grid, plot_grid)
|
||||||
|
|
||||||
|
# Create a figure for the subplots
|
||||||
|
fig = plt.figure(figsize=(18, 10))
|
||||||
|
fig.suptitle(f'3D Surface Plots of Numerical Solutions for n = {n}', fontsize=16)
|
||||||
|
# Loop through the stored solutions and create a subplot for each
|
||||||
|
for i, (name, solution_flat) in enumerate(all_solutions.items()):
|
||||||
|
# Reshape the 1D solution vector into a 2D grid
|
||||||
|
solution_2d = solution_flat.reshape((n, n))
|
||||||
|
|
||||||
|
# Pad the solution with zero boundaries for plotting
|
||||||
|
u_padded = np.zeros((n + 2, n + 2))
|
||||||
|
u_padded[1:-1, 1:-1] = solution_2d
|
||||||
|
|
||||||
|
# Add a new 3D subplot
|
||||||
|
ax = fig.add_subplot(2, 3, i + 1, projection='3d')
|
||||||
|
|
||||||
|
# Plot the surface
|
||||||
|
ax.plot_surface(X, Y, u_padded, cmap='viridis', edgecolor='none')
|
||||||
|
|
||||||
|
# Set titles and labels for each subplot
|
||||||
|
ax.set_title(name)
|
||||||
|
ax.set_xlabel('x')
|
||||||
|
ax.set_ylabel('y')
|
||||||
|
ax.set_zlabel('u(x,y)')
|
||||||
|
ax.view_init(elev=30, azim=-60) # Set a nice viewing angle
|
||||||
|
plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to make room for suptitle
|
||||||
|
plt.savefig(f'problem2_3d_solutions_n{n}.png')
|
||||||
|
print(f"3D surface comparison plot saved to 'problem2_3d_solutions_n{n}.png'")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Solve Problem 2 for n=16 and n=32
|
||||||
|
solve_and_report_p2(n=16)
|
||||||
|
solve_and_report_p2(n=32)
|
||||||
|
|
||||||
BIN
project2/problem2_3d_solutions_n16.png
Normal file
|
After Width: | Height: | Size: 416 KiB |
BIN
project2/problem2_3d_solutions_n32.png
Normal file
|
After Width: | Height: | Size: 525 KiB |
BIN
project2/problem2_convergence_n16.png
Normal file
|
After Width: | Height: | Size: 80 KiB |
BIN
project2/problem2_convergence_n32.png
Normal file
|
After Width: | Height: | Size: 70 KiB |
40
project2/problem2_output.txt
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
--- Solving for n = 16 (System size: 256 x 256) ---
|
||||||
|
|
||||||
|
Starting iterative solvers...
|
||||||
|
Running Jacobi...
|
||||||
|
Running Gauss-Seidel...
|
||||||
|
Running SOR...
|
||||||
|
Running Steepest Descent...
|
||||||
|
Running Conjugate Gradient...
|
||||||
|
|
||||||
|
--- Iteration Counts ---
|
||||||
|
Jacobi : 1268 iterations (converged)
|
||||||
|
Gauss-Seidel : 635 iterations (converged)
|
||||||
|
SOR : 71 iterations (converged)
|
||||||
|
Steepest Descent : 1247 iterations (converged)
|
||||||
|
Conjugate Gradient : 31 iterations (converged)
|
||||||
|
|
||||||
|
Convergence plot saved to 'problem2_convergence_n16.png'
|
||||||
|
Generating 3D surface plots of the solutions...
|
||||||
|
3D surface comparison plot saved to 'problem2_3d_solutions_n16.png'
|
||||||
|
|
||||||
|
--- Solving for n = 32 (System size: 1024 x 1024) ---
|
||||||
|
|
||||||
|
Starting iterative solvers...
|
||||||
|
Running Jacobi...
|
||||||
|
Running Gauss-Seidel...
|
||||||
|
Running SOR...
|
||||||
|
Running Steepest Descent...
|
||||||
|
Running Conjugate Gradient...
|
||||||
|
|
||||||
|
--- Iteration Counts ---
|
||||||
|
Jacobi : 4792 iterations (converged)
|
||||||
|
Gauss-Seidel : 2397 iterations (converged)
|
||||||
|
SOR : 138 iterations (converged)
|
||||||
|
Steepest Descent : 4807 iterations (converged)
|
||||||
|
Conjugate Gradient : 66 iterations (converged)
|
||||||
|
|
||||||
|
Convergence plot saved to 'problem2_convergence_n32.png'
|
||||||
|
Generating 3D surface plots of the solutions...
|
||||||
|
3D surface comparison plot saved to 'problem2_3d_solutions_n32.png'
|
||||||