paramz.optimization.scg

45 """ 46 Optimisation through Scaled Conjugate Gradients (SCG) 47 48 f: the objective function 49 gradf : the gradient function (should return a 1D np.ndarray) 50 x : the initial condition 51 52 Returns 53 x the optimal value for x 54 flog : a list of all the objective values 55 function_eval number of fn evaluations 56 status: string describing convergence status 57 """ 58 if xtol is None: 59 xtol = 1e-6 60 if ftol is None: 61 ftol = 1e-6 62 if gtol is None: 63 gtol = 1e-5 64 65 sigma0 = 1.0e-7 66 fold = f(x, *optargs) # Initial function value. 67 function_eval = 1 68 fnow = fold 69 gradnew = gradf(x, *optargs) # Initial gradient. 70 function_eval += 1 71 #if any(np.isnan(gradnew)): 72 # raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" 73 current_grad = np.dot(gradnew, gradnew) 74 gradold = gradnew.copy() 75 d = -gradnew # Initial search direction. 76 success = True # Force calculation of directional derivs. 77 nsuccess = 0 # nsuccess counts number of successes. 78 beta = 1.0 # Initial scale parameter. 79 betamin = 1.0e-15 # Lower bound on scale. 80 betamax = 1.0e15 # Upper bound on scale. 81 status = "Not converged" 82 83 flog = [fold] 84 85 iteration = 0 86 87 # Main optimization loop. 88 while iteration < maxiters: 89 90 # Calculate first and second directional derivatives. 91 if success: 92 mu = np.dot(d, gradnew) 93 if mu >= 0: # pragma: no cover 94 d = -gradnew 95 mu = np.dot(d, gradnew) 96 kappa = np.dot(d, d) 97 sigma = sigma0 / np.sqrt(kappa) 98 xplus = x + sigma * d 99 gplus = gradf(xplus, *optargs) 100 function_eval += 1 101 theta = np.dot(d, (gplus - gradnew)) / sigma 102 103 # Increase effective curvature and evaluate step size alpha. 104 delta = theta + beta * kappa 105 if delta <= 0: # pragma: no cover 106 delta = beta * kappa 107 beta = beta - theta / kappa 108 109 alpha = -mu / delta 110 111 # Calculate the comparison ratio. 112 xnew = x + alpha * d 113 fnew = f(xnew, *optargs) 114 function_eval += 1 115 116 Delta = 2.*(fnew - fold) / (alpha * mu) 117 if Delta >= 0.: 118 success = True 119 nsuccess += 1 120 x = xnew 121 fnow = fnew 122 else: 123 success = False 124 fnow = fold 125 126 # Store relevant variables 127 flog.append(fnow) # Current function value 128 129 iteration += 1 130 131 if success: 132 # Test for termination 133 134 if (np.abs(fnew - fold) < ftol): 135 status = 'converged - relative reduction in objective' 136 break 137 # return x, flog, function_eval, status 138 elif (np.max(np.abs(alpha * d)) < xtol): 139 status = 'converged - relative stepsize' 140 break 141 else: 142 # Update variables for new position 143 gradold = gradnew 144 gradnew = gradf(x, *optargs) 145 function_eval += 1 146 current_grad = np.dot(gradnew, gradnew) 147 fold = fnew 148 # If the gradient is zero then we are done. 149 if current_grad <= gtol: 150 status = 'converged - relative reduction in gradient' 151 break 152 # return x, flog, function_eval, status 153 154 # Adjust beta according to comparison ratio. 155 if Delta < 0.25: 156 beta = min(4.0 * beta, betamax) 157 if Delta > 0.75: 158 beta = max(0.25 * beta, betamin) 159 160 # Update search direction using Polak-Ribiere formula, or re-start 161 # in direction of negative gradient after nparams steps. 162 if nsuccess == x.size: 163 d = -gradnew 164 beta = 1. # This is not in the original paper 165 nsuccess = 0 166 elif success: 167 Gamma = np.dot(gradold - gradnew, gradnew) / (mu) 168 d = Gamma * d - gradnew 169 else: 170 # If we get here, then we haven't terminated in the given number of 171 # iterations. 172 status = "maxiter exceeded" 173 174 return x, flog, function_eval, status

Source Code for Module paramz.optimization.scg