1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 import numpy as np
36 from numpy.linalg.linalg import LinAlgError
37
38 from . import optimization
39 from .parameterized import Parameterized
40 from .optimization.verbose_optimization import VerboseOptimization
41 import multiprocessing as mp
42
43
45 m = args[0]
46 kwargs = args[1]
47 return m.optimize(**kwargs)
48
49
50 -class Model(Parameterized):
51 _fail_count = 0
52 _allowed_failures = 10
53
55 super(Model, self).__init__(name)
56 self.optimization_runs = []
57 self.sampling_runs = []
58 self.preferred_optimizer = 'lbfgsb'
59
60
61
62 self.obj_grads = None
63
64
65 - def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
66 """
67 Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
68
69 kwargs are passed to the optimizer. They can be:
70
71 :param max_iters: maximum number of function evaluations
72 :type max_iters: int
73 :messages: True: Display messages during optimisation, "ipython_notebook":
74 :type messages: bool"string
75 :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
76 :type optimizer: string
77
78 Valid optimizers are:
79 - 'scg': scaled conjugate gradient method, recommended for stability.
80 See also GPy.inference.optimization.scg
81 - 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc)
82 - 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin),
83 - 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b),
84 - 'lbfgs': the bfgs method (see scipy.optimize.fmin_bfgs),
85 - 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only!
86
87
88 """
89 if self.is_fixed or self.size == 0:
90 print('nothing to optimize')
91 return
92
93 if not self.update_model():
94 print("updates were off, setting updates on again")
95 self.update_model(True)
96
97 if start is None:
98 start = self.optimizer_array
99
100 if optimizer is None:
101 optimizer = self.preferred_optimizer
102
103 if isinstance(optimizer, optimization.Optimizer):
104 opt = optimizer
105 opt.model = self
106 else:
107 optimizer = optimization.get_optimizer(optimizer)
108 opt = optimizer(max_iters=max_iters, **kwargs)
109
110 with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo:
111 opt.run(start, f_fp=self._objective_grads, f=self._objective, fp=self._grads)
112
113 self.optimization_runs.append(opt)
114
115 self.optimizer_array = opt.x_opt
116 return opt
117
118 - def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
119 """
120 Perform random restarts of the model, and set the model to the best
121 seen solution.
122
123 If the robust flag is set, exceptions raised during optimizations will
124 be handled silently. If _all_ runs fail, the model is reset to the
125 existing parameter values.
126
127 \*\*kwargs are passed to the optimizer.
128
129 :param num_restarts: number of restarts to use (default 10)
130 :type num_restarts: int
131 :param robust: whether to handle exceptions silently or not (default False)
132 :type robust: bool
133 :param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
134 :type parallel: bool
135 :param num_processes: number of workers in the multiprocessing pool
136 :type numprocesses: int
137 :param max_f_eval: maximum number of function evaluations
138 :type max_f_eval: int
139 :param max_iters: maximum number of iterations
140 :type max_iters: int
141 :param messages: whether to display during optimisation
142 :type messages: bool
143
144 .. note::
145
146 If num_processes is None, the number of workes in the
147 multiprocessing pool is automatically set to the number of processors
148 on the current machine.
149
150 """
151 initial_parameters = self.optimizer_array.copy()
152
153 if parallel:
154 try:
155 pool = mp.Pool(processes=num_processes)
156 obs = [self.copy() for i in range(num_restarts)]
157 [obs[i].randomize() for i in range(num_restarts-1)]
158 jobs = pool.map(opt_wrapper, [(o,kwargs) for o in obs])
159 pool.close()
160 pool.join()
161 except KeyboardInterrupt:
162 print("Ctrl+c received, terminating and joining pool.")
163 pool.terminate()
164 pool.join()
165
166 for i in range(num_restarts):
167 try:
168 if not parallel:
169 if i>0: self.randomize()
170 self.optimize(**kwargs)
171 else:
172 self.optimization_runs.append(jobs[i])
173
174 if verbose:
175 print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)))
176 except Exception as e:
177 if robust:
178 print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)))
179 else:
180 raise e
181
182 if len(self.optimization_runs):
183 i = np.argmin([o.f_opt for o in self.optimization_runs[-num_restarts:]])
184 self.optimizer_array = self.optimization_runs[-num_restarts+i].x_opt
185 else:
186 self.optimizer_array = initial_parameters
187 return self.optimization_runs
188
190 """
191 The objective function for the given algorithm.
192
193 This function is the true objective, which wants to be minimized.
194 Note that all parameters are already set and in place, so you just need
195 to return the objective function here.
196
197 For probabilistic models this is the negative log_likelihood
198 (including the MAP prior), so we return it here. If your model is not
199 probabilistic, just return your objective to minimize here!
200 """
201 raise NotImplementedError("Implement the result of the objective function here")
202
204 """
205 The gradients for the objective function for the given algorithm.
206 The gradients are w.r.t. the *negative* objective function, as
207 this framework works with *negative* log-likelihoods as a default.
208
209 You can find the gradient for the parameters in self.gradient at all times.
210 This is the place, where gradients get stored for parameters.
211
212 This function is the true objective, which wants to be minimized.
213 Note that all parameters are already set and in place, so you just need
214 to return the gradient here.
215
216 For probabilistic models this is the gradient of the negative log_likelihood
217 (including the MAP prior), so we return it here. If your model is not
218 probabilistic, just return your *negative* gradient here!
219 """
220 return self.gradient
221
244
246 """
247 The objective function passed to the optimizer. It combines
248 the likelihood and the priors.
249
250 Failures are handled robustly. The algorithm will try several times to
251 return the objective, and will raise the original exception if
252 the objective cannot be computed.
253
254 :param x: the parameters of the model.
255 :parameter type: np.array
256 """
257 try:
258 self.optimizer_array = x
259 obj = self.objective_function()
260 self._fail_count = 0
261 except (LinAlgError, ZeroDivisionError, ValueError):
262 if self._fail_count >= self._allowed_failures:
263 raise
264 self._fail_count += 1
265 return np.inf
266 return obj
267
280
281 - def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, df_tolerance=1e-12):
282 """
283 Check the gradient of the ,odel by comparing to a numerical
284 estimate. If the verbose flag is passed, individual
285 components are tested (and printed)
286
287 :param verbose: If True, print a "full" checking of each parameter
288 :type verbose: bool
289 :param step: The size of the step around which to linearise the objective
290 :type step: float (default 1e-6)
291 :param tolerance: the tolerance allowed (see note)
292 :type tolerance: float (default 1e-3)
293
294 Note:-
295 The gradient is considered correct if the ratio of the analytical
296 and numerical gradients is within <tolerance> of unity.
297
298 The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients.
299 If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually
300 not accurate enough for the tests (shown with blue).
301 """
302 if not self._model_initialized_:
303 import warnings
304 warnings.warn("This model has not been initialized, try model.inititialize_model()", RuntimeWarning)
305 return False
306
307 x = self.optimizer_array.copy()
308
309 if not verbose:
310
311 if target_param is None:
312 transformed_index = np.arange(len(x))
313 else:
314 transformed_index = self._raveled_index_for_transformed(target_param)
315
316 if transformed_index.size == 0:
317 print("No free parameters to check")
318 return True
319
320
321 dx = np.zeros(x.shape)
322 dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
323
324
325 f1 = self._objective(x + dx)
326 f2 = self._objective(x - dx)
327 gradient = self._grads(x)
328
329 dx = dx[transformed_index]
330 gradient = gradient[transformed_index]
331
332 denominator = (2 * np.dot(dx, gradient))
333 global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
334 global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
335 if global_ratio is np.nan:
336 global_ratio = 0
337 return np.abs(1. - global_ratio) < tolerance or global_diff
338 else:
339
340 try:
341 names = self.parameter_names_flat()
342 except NotImplementedError:
343 names = ['Variable %i' % i for i in range(len(x))]
344
345 header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical', 'dF_ratio']
346 max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
347 float_len = 10
348 cols = [max_names]
349 cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
350 cols = np.array(cols) + 5
351 header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
352 header_string = list(map(lambda x: '|'.join(x), [header_string]))
353 separator = '-' * len(header_string[0])
354 print('\n'.join([header_string[0], separator]))
355
356 if target_param is None:
357 target_param = self
358 transformed_index = self._raveled_index_for_transformed(target_param)
359
360 if transformed_index.size == 0:
361 print("No free parameters to check")
362 return True
363
364 gradient = self._grads(x).copy()
365 np.where(gradient == 0, 1e-312, gradient)
366 ret = True
367 for xind in zip(transformed_index):
368 xx = x.copy()
369 xx[xind] += step
370 f1 = float(self._objective(xx))
371 xx[xind] -= 2.*step
372 f2 = float(self._objective(xx))
373
374
375 if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
376 df_ratio = np.abs((f1 - f2) / min(f1, f2))
377 else:
378 df_ratio = 1.0
379 df_unstable = df_ratio < df_tolerance
380 numerical_gradient = (f1 - f2) / (2. * step)
381 if np.all(gradient[xind] == 0):
382 ratio = (f1 - f2) == gradient[xind]
383 else:
384 ratio = (f1 - f2) / (2. * step * gradient[xind])
385 difference = np.abs(numerical_gradient - gradient[xind])
386
387 if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
388 formatted_name = "\033[92m {0} \033[0m".format(names[xind])
389 ret &= True
390 else:
391 formatted_name = "\033[91m {0} \033[0m".format(names[xind])
392 ret &= False
393 if df_unstable:
394 formatted_name = "\033[94m {0} \033[0m".format(names[xind])
395
396 r = '%.6f' % float(ratio)
397 d = '%.6f' % float(difference)
398 g = '%.6f' % gradient[xind]
399 ng = '%.6f' % float(numerical_gradient)
400 df = '%1.e' % float(df_ratio)
401 grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5])
402 print(grad_string)
403
404 self.optimizer_array = x
405 return ret
406
408 """Representation of the model in html for notebook display."""
409 model_details = [['<b>Model</b>', self.name + '<br>'],
410 ['<b>Objective</b>', '{}<br>'.format(float(self.objective_function()))],
411 ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
412 ["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())],
413 ["<b>Updates</b>", '{}<br>'.format(self._update_on)],
414 ]
415 from operator import itemgetter
416 to_print = ["""<style type="text/css">
417 .pd{
418 font-family: "Courier New", Courier, monospace !important;
419 width: 100%;
420 padding: 3px;
421 }
422 </style>\n"""] + ["<p class=pd>"] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["</p>"]
423 to_print.append(super(Model, self)._repr_html_())
424 return "\n".join(to_print)
425
427 model_details = [['Name', self.name],
428 ['Objective', '{}'.format(float(self.objective_function()))],
429 ["Number of Parameters", '{}'.format(self.size)],
430 ["Number of Optimization Parameters", '{}'.format(self._size_transformed())],
431 ["Updates", '{}'.format(self._update_on)],
432 ]
433 max_len = max(map(len, model_details))
434 to_print = [""] + ["{0:{l}} : {1}".format(name, detail, l=max_len) for name, detail in model_details] + ["Parameters:"]
435 to_print.append(super(Model, self).__str__(VT100=VT100))
436 return "\n".join(to_print)
437