Package paramz :: Module model
[hide private]
[frames] | no frames]

Source Code for Module paramz.model

  1  #=============================================================================== 
  2  # Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt). 
  3  # Copyright (c) 2014, James Hensman, Max Zwiessele 
  4  # Copyright (c) 2015, Max Zwiessele 
  5  # 
  6  # All rights reserved. 
  7  # 
  8  # Redistribution and use in source and binary forms, with or without 
  9  # modification, are permitted provided that the following conditions are met: 
 10  # 
 11  # * Redistributions of source code must retain the above copyright notice, this 
 12  #   list of conditions and the following disclaimer. 
 13  # 
 14  # * Redistributions in binary form must reproduce the above copyright notice, 
 15  #   this list of conditions and the following disclaimer in the documentation 
 16  #   and/or other materials provided with the distribution. 
 17  # 
 18  # * Neither the name of paramax nor the names of its 
 19  #   contributors may be used to endorse or promote products derived from 
 20  #   this software without specific prior written permission. 
 21  # 
 22  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 23  # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 24  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 25  # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 
 26  # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 27  # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
 28  # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 29  # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
 30  # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 31  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 32  #=============================================================================== 
 33   
 34   
 35  import numpy as np 
 36  from numpy.linalg.linalg import LinAlgError 
 37   
 38  from . import optimization 
 39  from .parameterized import Parameterized 
 40  from .optimization.verbose_optimization import VerboseOptimization 
 41  import multiprocessing as mp 
 42  #from functools import reduce 
 43   
44 -def opt_wrapper(args):
45 m = args[0] 46 kwargs = args[1] 47 return m.optimize(**kwargs)
48 49
50 -class Model(Parameterized):
51 _fail_count = 0 # Count of failed optimization steps (see objective) 52 _allowed_failures = 10 # number of allowed failures 53
54 - def __init__(self, name):
55 super(Model, self).__init__(name) # Parameterized.__init__(self) 56 self.optimization_runs = [] 57 self.sampling_runs = [] 58 self.preferred_optimizer = 'lbfgsb' 59 #from paramz import Tie 60 #self.tie = Tie() 61 #self.link_parameter(self.tie, -1) 62 self.obj_grads = None
63 #self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500) 64
65 - def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
66 """ 67 Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. 68 69 kwargs are passed to the optimizer. They can be: 70 71 :param max_iters: maximum number of function evaluations 72 :type max_iters: int 73 :messages: True: Display messages during optimisation, "ipython_notebook": 74 :type messages: bool"string 75 :param optimizer: which optimizer to use (defaults to self.preferred optimizer) 76 :type optimizer: string 77 78 Valid optimizers are: 79 - 'scg': scaled conjugate gradient method, recommended for stability. 80 See also GPy.inference.optimization.scg 81 - 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc) 82 - 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin), 83 - 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b), 84 - 'lbfgs': the bfgs method (see scipy.optimize.fmin_bfgs), 85 - 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only! 86 87 88 """ 89 if self.is_fixed or self.size == 0: 90 print('nothing to optimize') 91 return 92 93 if not self.update_model(): 94 print("updates were off, setting updates on again") 95 self.update_model(True) 96 97 if start is None: 98 start = self.optimizer_array 99 100 if optimizer is None: 101 optimizer = self.preferred_optimizer 102 103 if isinstance(optimizer, optimization.Optimizer): 104 opt = optimizer 105 opt.model = self 106 else: 107 optimizer = optimization.get_optimizer(optimizer) 108 opt = optimizer(max_iters=max_iters, **kwargs) 109 110 with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo: 111 opt.run(start, f_fp=self._objective_grads, f=self._objective, fp=self._grads) 112 113 self.optimization_runs.append(opt) 114 115 self.optimizer_array = opt.x_opt 116 return opt
117
118 - def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
119 """ 120 Perform random restarts of the model, and set the model to the best 121 seen solution. 122 123 If the robust flag is set, exceptions raised during optimizations will 124 be handled silently. If _all_ runs fail, the model is reset to the 125 existing parameter values. 126 127 \*\*kwargs are passed to the optimizer. 128 129 :param num_restarts: number of restarts to use (default 10) 130 :type num_restarts: int 131 :param robust: whether to handle exceptions silently or not (default False) 132 :type robust: bool 133 :param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module. 134 :type parallel: bool 135 :param num_processes: number of workers in the multiprocessing pool 136 :type numprocesses: int 137 :param max_f_eval: maximum number of function evaluations 138 :type max_f_eval: int 139 :param max_iters: maximum number of iterations 140 :type max_iters: int 141 :param messages: whether to display during optimisation 142 :type messages: bool 143 144 .. note:: 145 146 If num_processes is None, the number of workes in the 147 multiprocessing pool is automatically set to the number of processors 148 on the current machine. 149 150 """ 151 initial_parameters = self.optimizer_array.copy() 152 153 if parallel: #pragma: no cover 154 try: 155 pool = mp.Pool(processes=num_processes) 156 obs = [self.copy() for i in range(num_restarts)] 157 [obs[i].randomize() for i in range(num_restarts-1)] 158 jobs = pool.map(opt_wrapper, [(o,kwargs) for o in obs]) 159 pool.close() 160 pool.join() 161 except KeyboardInterrupt: 162 print("Ctrl+c received, terminating and joining pool.") 163 pool.terminate() 164 pool.join() 165 166 for i in range(num_restarts): 167 try: 168 if not parallel: 169 if i>0: self.randomize() 170 self.optimize(**kwargs) 171 else:#pragma: no cover 172 self.optimization_runs.append(jobs[i]) 173 174 if verbose: 175 print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))) 176 except Exception as e: 177 if robust: 178 print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))) 179 else: 180 raise e 181 182 if len(self.optimization_runs): 183 i = np.argmin([o.f_opt for o in self.optimization_runs[-num_restarts:]]) 184 self.optimizer_array = self.optimization_runs[-num_restarts+i].x_opt 185 else: 186 self.optimizer_array = initial_parameters 187 return self.optimization_runs
188
189 - def objective_function(self):
190 """ 191 The objective function for the given algorithm. 192 193 This function is the true objective, which wants to be minimized. 194 Note that all parameters are already set and in place, so you just need 195 to return the objective function here. 196 197 For probabilistic models this is the negative log_likelihood 198 (including the MAP prior), so we return it here. If your model is not 199 probabilistic, just return your objective to minimize here! 200 """ 201 raise NotImplementedError("Implement the result of the objective function here")
202
204 """ 205 The gradients for the objective function for the given algorithm. 206 The gradients are w.r.t. the *negative* objective function, as 207 this framework works with *negative* log-likelihoods as a default. 208 209 You can find the gradient for the parameters in self.gradient at all times. 210 This is the place, where gradients get stored for parameters. 211 212 This function is the true objective, which wants to be minimized. 213 Note that all parameters are already set and in place, so you just need 214 to return the gradient here. 215 216 For probabilistic models this is the gradient of the negative log_likelihood 217 (including the MAP prior), so we return it here. If your model is not 218 probabilistic, just return your *negative* gradient here! 219 """ 220 return self.gradient
221
222 - def _grads(self, x):
223 """ 224 Gets the gradients from the likelihood and the priors. 225 226 Failures are handled robustly. The algorithm will try several times to 227 return the gradients, and will raise the original exception if 228 the objective cannot be computed. 229 230 :param x: the parameters of the model. 231 :type x: np.array 232 """ 233 try: 234 # self._set_params_transformed(x) 235 self.optimizer_array = x 236 self.obj_grads = self._transform_gradients(self.objective_function_gradients()) 237 self._fail_count = 0 238 except (LinAlgError, ZeroDivisionError, ValueError): #pragma: no cover 239 if self._fail_count >= self._allowed_failures: 240 raise 241 self._fail_count += 1 242 self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) 243 return self.obj_grads
244
245 - def _objective(self, x):
246 """ 247 The objective function passed to the optimizer. It combines 248 the likelihood and the priors. 249 250 Failures are handled robustly. The algorithm will try several times to 251 return the objective, and will raise the original exception if 252 the objective cannot be computed. 253 254 :param x: the parameters of the model. 255 :parameter type: np.array 256 """ 257 try: 258 self.optimizer_array = x 259 obj = self.objective_function() 260 self._fail_count = 0 261 except (LinAlgError, ZeroDivisionError, ValueError):#pragma: no cover 262 if self._fail_count >= self._allowed_failures: 263 raise 264 self._fail_count += 1 265 return np.inf 266 return obj
267
268 - def _objective_grads(self, x):
269 try: 270 self.optimizer_array = x 271 obj_f, self.obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) 272 self._fail_count = 0 273 except (LinAlgError, ZeroDivisionError, ValueError):#pragma: no cover 274 if self._fail_count >= self._allowed_failures: 275 raise 276 self._fail_count += 1 277 obj_f = np.inf 278 self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10) 279 return obj_f, self.obj_grads
280
281 - def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, df_tolerance=1e-12):
282 """ 283 Check the gradient of the ,odel by comparing to a numerical 284 estimate. If the verbose flag is passed, individual 285 components are tested (and printed) 286 287 :param verbose: If True, print a "full" checking of each parameter 288 :type verbose: bool 289 :param step: The size of the step around which to linearise the objective 290 :type step: float (default 1e-6) 291 :param tolerance: the tolerance allowed (see note) 292 :type tolerance: float (default 1e-3) 293 294 Note:- 295 The gradient is considered correct if the ratio of the analytical 296 and numerical gradients is within <tolerance> of unity. 297 298 The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients. 299 If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually 300 not accurate enough for the tests (shown with blue). 301 """ 302 if not self._model_initialized_: 303 import warnings 304 warnings.warn("This model has not been initialized, try model.inititialize_model()", RuntimeWarning) 305 return False 306 307 x = self.optimizer_array.copy() 308 309 if not verbose: 310 # make sure only to test the selected parameters 311 if target_param is None: 312 transformed_index = np.arange(len(x)) 313 else: 314 transformed_index = self._raveled_index_for_transformed(target_param) 315 316 if transformed_index.size == 0: 317 print("No free parameters to check") 318 return True 319 320 # just check the global ratio 321 dx = np.zeros(x.shape) 322 dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.) 323 324 # evaulate around the point x 325 f1 = self._objective(x + dx) 326 f2 = self._objective(x - dx) 327 gradient = self._grads(x) 328 329 dx = dx[transformed_index] 330 gradient = gradient[transformed_index] 331 332 denominator = (2 * np.dot(dx, gradient)) 333 global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator) 334 global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) 335 if global_ratio is np.nan: # pragma: no cover 336 global_ratio = 0 337 return np.abs(1. - global_ratio) < tolerance or global_diff 338 else: 339 # check the gradient of each parameter individually, and do some pretty printing 340 try: 341 names = self.parameter_names_flat() 342 except NotImplementedError: 343 names = ['Variable %i' % i for i in range(len(x))] 344 # Prepare for pretty-printing 345 header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical', 'dF_ratio'] 346 max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) 347 float_len = 10 348 cols = [max_names] 349 cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))]) 350 cols = np.array(cols) + 5 351 header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] 352 header_string = list(map(lambda x: '|'.join(x), [header_string])) 353 separator = '-' * len(header_string[0]) 354 print('\n'.join([header_string[0], separator])) 355 356 if target_param is None: 357 target_param = self 358 transformed_index = self._raveled_index_for_transformed(target_param) 359 360 if transformed_index.size == 0: 361 print("No free parameters to check") 362 return True 363 364 gradient = self._grads(x).copy() 365 np.where(gradient == 0, 1e-312, gradient) 366 ret = True 367 for xind in zip(transformed_index): 368 xx = x.copy() 369 xx[xind] += step 370 f1 = float(self._objective(xx)) 371 xx[xind] -= 2.*step 372 f2 = float(self._objective(xx)) 373 #Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall 374 #the same 375 if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15: 376 df_ratio = np.abs((f1 - f2) / min(f1, f2)) 377 else: # pragma: no cover 378 df_ratio = 1.0 379 df_unstable = df_ratio < df_tolerance 380 numerical_gradient = (f1 - f2) / (2. * step) 381 if np.all(gradient[xind] == 0): # pragma: no cover 382 ratio = (f1 - f2) == gradient[xind] 383 else: 384 ratio = (f1 - f2) / (2. * step * gradient[xind]) 385 difference = np.abs(numerical_gradient - gradient[xind]) 386 387 if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: 388 formatted_name = "\033[92m {0} \033[0m".format(names[xind]) 389 ret &= True 390 else: # pragma: no cover 391 formatted_name = "\033[91m {0} \033[0m".format(names[xind]) 392 ret &= False 393 if df_unstable: # pragma: no cover 394 formatted_name = "\033[94m {0} \033[0m".format(names[xind]) 395 396 r = '%.6f' % float(ratio) 397 d = '%.6f' % float(difference) 398 g = '%.6f' % gradient[xind] 399 ng = '%.6f' % float(numerical_gradient) 400 df = '%1.e' % float(df_ratio) 401 grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5]) 402 print(grad_string) 403 404 self.optimizer_array = x 405 return ret
406
407 - def _repr_html_(self):
408 """Representation of the model in html for notebook display.""" 409 model_details = [['<b>Model</b>', self.name + '<br>'], 410 ['<b>Objective</b>', '{}<br>'.format(float(self.objective_function()))], 411 ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)], 412 ["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())], 413 ["<b>Updates</b>", '{}<br>'.format(self._update_on)], 414 ] 415 from operator import itemgetter 416 to_print = ["""<style type="text/css"> 417 .pd{ 418 font-family: "Courier New", Courier, monospace !important; 419 width: 100%; 420 padding: 3px; 421 } 422 </style>\n"""] + ["<p class=pd>"] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["</p>"] 423 to_print.append(super(Model, self)._repr_html_()) 424 return "\n".join(to_print)
425
426 - def __str__(self, VT100=True):
427 model_details = [['Name', self.name], 428 ['Objective', '{}'.format(float(self.objective_function()))], 429 ["Number of Parameters", '{}'.format(self.size)], 430 ["Number of Optimization Parameters", '{}'.format(self._size_transformed())], 431 ["Updates", '{}'.format(self._update_on)], 432 ] 433 max_len = max(map(len, model_details)) 434 to_print = [""] + ["{0:{l}} : {1}".format(name, detail, l=max_len) for name, detail in model_details] + ["Parameters:"] 435 to_print.append(super(Model, self).__str__(VT100=VT100)) 436 return "\n".join(to_print)
437