paramz.transformations

1 #=============================================================================== 2 # Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt). 3 # Copyright (c) 2015, Max Zwiessele 4 # 5 # All rights reserved. 6 # 7 # Redistribution and use in source and binary forms, with or without 8 # modification, are permitted provided that the following conditions are met: 9 # 10 # * Redistributions of source code must retain the above copyright notice, this 11 # list of conditions and the following disclaimer. 12 # 13 # * Redistributions in binary form must reproduce the above copyright notice, 14 # this list of conditions and the following disclaimer in the documentation 15 # and/or other materials provided with the distribution. 16 # 17 # * Neither the name of paramax nor the names of its 18 # contributors may be used to endorse or promote products derived from 19 # this software without specific prior written permission. 20 # 21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 #=============================================================================== 32 33 34 import numpy as np 35 from .domains import _POSITIVE,_NEGATIVE, _BOUNDED 36 import weakref 37 38 import sys 39 40 _log_lim_val = np.log(np.finfo(np.float64).max) 41 _exp_lim_val = np.finfo(np.float64).max 42 _lim_val = 36.0 43 epsilon = np.finfo(np.float64).resolution 44 45 #=============================================================================== 46 # Fixing constants 47 __fixed__ = "fixed" 48 FIXED = False 49 UNFIXED = True 50 #=============================================================================== 51 52

53 -class Transformation(object):

54 domain = None 55 _instance = None

56 - def __new__(cls, *args, **kwargs):

57 if not cls._instance or cls._instance.__class__ is not cls: 58 cls._instance = super(Transformation, cls).__new__(cls, *args, **kwargs) 59 return cls._instance

60 - def f(self, opt_param):

61 raise NotImplementedError

62 - def finv(self, model_param):

63 raise NotImplementedError

64 - def log_jacobian(self, model_param):

65 """ 66 compute the log of the jacobian of f, evaluated at f(x)= model_param 67 """ 68 raise NotImplementedError

69 - def log_jacobian_grad(self, model_param):

70 """ 71 compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param 72 """ 73 raise NotImplementedError

74 - def gradfactor(self, model_param, dL_dmodel_param):

75 """ df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param, 76 77 i.e.: 78 define 79 80 .. math:: 81 82 \frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)} 83 """ 84 raise NotImplementedError

85 - def gradfactor_non_natural(self, model_param, dL_dmodel_param):

86 return self.gradfactor(model_param, dL_dmodel_param)

87 - def initialize(self, f):

88 """ produce a sensible initial value for f(x)""" 89 raise NotImplementedError

90 - def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):

91 assert "matplotlib" in sys.modules, "matplotlib package has not been imported." 92 import matplotlib.pyplot as plt 93 x = np.linspace(-8,8) 94 plt.plot(x, self.f(x), *args, ax=axes, **kw) 95 axes = plt.gca() 96 axes.set_xlabel(xlabel) 97 axes.set_ylabel(ylabel)

98 - def __str__(self):

99 raise NotImplementedError

100 - def __repr__(self):

101 return self.__class__.__name__

102

103 -class Logexp(Transformation):

104 domain = _POSITIVE

105 - def f(self, x):

106 return np.where(x>_lim_val, x, np.log1p(np.exp(np.clip(x, -_log_lim_val, _lim_val)))) #+ epsilon

107 #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))

108 - def finv(self, f):

109 return np.where(f>_lim_val, f, np.log(np.expm1(f)))

110 - def gradfactor(self, f, df):

111 return df*np.where(f>_lim_val, 1., - np.expm1(-f))

112 - def initialize(self, f):

113 if np.any(f < 0.): 114 print("Warning: changing parameters to satisfy constraints") 115 return np.abs(f)

116 - def log_jacobian(self, model_param):

117 return np.where(model_param>_lim_val, model_param, np.log(np.expm1(model_param))) - model_param

118 - def log_jacobian_grad(self, model_param):

119 return 1./(np.expm1(model_param))

120 - def __str__(self):

121 return '+ve'

122

123 -class Exponent(Transformation):

124 domain = _POSITIVE

125 - def f(self, x):

126 return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))

127 - def finv(self, x):

128 return np.log(x)

129 - def gradfactor(self, f, df):

130 return np.einsum('i,i->i', df, f)

131 - def initialize(self, f):

132 if np.any(f < 0.): 133 print("Warning: changing parameters to satisfy constraints") 134 return np.abs(f)

135 - def log_jacobian(self, model_param):

136 return np.log(model_param)

137 - def log_jacobian_grad(self, model_param):

138 return 1./model_param

139 - def __str__(self):

140 return '+ve'

141 142 143

144 -class NormalTheta(Transformation):

145 "Do not use, not officially supported!" 146 _instances = []

147 - def __new__(cls, mu_indices=None, var_indices=None):

148 "Do not use, not officially supported!" 149 if cls._instances: 150 cls._instances[:] = [instance for instance in cls._instances if instance()] 151 for instance in cls._instances: 152 if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False): 153 return instance() 154 o = super(Transformation, cls).__new__(cls, mu_indices, var_indices) 155 cls._instances.append(weakref.ref(o)) 156 return cls._instances[-1]()

157

158 - def __init__(self, mu_indices, var_indices):

159 self.mu_indices = mu_indices 160 self.var_indices = var_indices

161

162 - def f(self, theta):

163 # In here abs is only a trick to make sure the numerics are ok. 164 # The variance will never go below zero, but at initialization we need to make sure 165 # that the values are ok 166 # Before: 167 theta[self.var_indices] = np.abs(-.5/theta[self.var_indices]) 168 #theta[self.var_indices] = np.exp(-.5/theta[self.var_indices]) 169 theta[self.mu_indices] *= theta[self.var_indices] 170 return theta # which is now {mu, var}

171

172 - def finv(self, muvar):

173 # before: 174 varp = muvar[self.var_indices] 175 muvar[self.mu_indices] /= varp 176 muvar[self.var_indices] = -.5/varp 177 #muvar[self.var_indices] = -.5/np.log(varp) 178 179 return muvar # which is now {theta1, theta2}

180

181 - def gradfactor(self, muvar, dmuvar):

182 mu = muvar[self.mu_indices] 183 var = muvar[self.var_indices] 184 #======================================================================= 185 # theta gradients 186 # This works and the gradient checks! 187 dmuvar[self.mu_indices] *= var 188 dmuvar[self.var_indices] *= 2*(var)**2 189 dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu 190 #======================================================================= 191 192 return dmuvar # which is now the gradient multiplicator for {theta1, theta2}

193

194 - def initialize(self, f):

195 if np.any(f[self.var_indices] < 0.): 196 print("Warning: changing parameters to satisfy constraints") 197 f[self.var_indices] = np.abs(f[self.var_indices]) 198 return f

199

200 - def __str__(self):

201 return "theta"

202

203 - def __getstate__(self):

204 return [self.mu_indices, self.var_indices]

205

206 - def __setstate__(self, state):

207 self.mu_indices = state[0] 208 self.var_indices = state[1]

209

210 -class NormalNaturalAntti(NormalTheta):

211 "Do not use, not officially supported!" 212 _instances = []

213 - def __new__(cls, mu_indices=None, var_indices=None):

214 "Do not use, not officially supported!" 215 if cls._instances: 216 cls._instances[:] = [instance for instance in cls._instances if instance()] 217 for instance in cls._instances: 218 if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False): 219 return instance() 220 o = super(Transformation, cls).__new__(cls, mu_indices, var_indices) 221 cls._instances.append(weakref.ref(o)) 222 return cls._instances[-1]()

223

224 - def __init__(self, mu_indices, var_indices):

225 self.mu_indices = mu_indices 226 self.var_indices = var_indices

227

228 - def gradfactor(self, muvar, dmuvar):

229 mu = muvar[self.mu_indices] 230 var = muvar[self.var_indices] 231 232 #======================================================================= 233 # theta gradients 234 # This works and the gradient checks! 235 dmuvar[self.mu_indices] *= var 236 dmuvar[self.var_indices] *= 2*var**2#np.einsum('i,i,i,i->i', dmuvar[self.var_indices], [2], var, var) 237 #======================================================================= 238 239 return dmuvar # which is now the gradient multiplicator

240

241 - def initialize(self, f):

242 if np.any(f[self.var_indices] < 0.): 243 print("Warning: changing parameters to satisfy constraints") 244 f[self.var_indices] = np.abs(f[self.var_indices]) 245 return f

246

247 - def __str__(self):

248 return "natantti"

249

250 -class NormalEta(Transformation):

251 "Do not use, not officially supported!" 252 _instances = []

253 - def __new__(cls, mu_indices=None, var_indices=None):

254 "Do not use, not officially supported!" 255 if cls._instances: 256 cls._instances[:] = [instance for instance in cls._instances if instance()] 257 for instance in cls._instances: 258 if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False): 259 return instance() 260 o = super(Transformation, cls).__new__(cls, mu_indices, var_indices) 261 cls._instances.append(weakref.ref(o)) 262 return cls._instances[-1]()

263

264 - def __init__(self, mu_indices, var_indices):

265 self.mu_indices = mu_indices 266 self.var_indices = var_indices

267

268 - def f(self, theta):

269 theta[self.var_indices] = np.abs(theta[self.var_indices] - theta[self.mu_indices]**2) 270 return theta # which is now {mu, var}

271

272 - def finv(self, muvar):

273 muvar[self.var_indices] += muvar[self.mu_indices]**2 274 return muvar # which is now {eta1, eta2}

275

276 - def gradfactor(self, muvar, dmuvar):

277 mu = muvar[self.mu_indices] 278 #======================================================================= 279 # Lets try natural gradients instead: Not working with bfgs... try stochastic! 280 dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices] 281 #======================================================================= 282 return dmuvar # which is now the gradient multiplicator

283

284 - def initialize(self, f):

285 if np.any(f[self.var_indices] < 0.): 286 print("Warning: changing parameters to satisfy constraints") 287 f[self.var_indices] = np.abs(f[self.var_indices]) 288 return f

289

290 - def __str__(self):

291 return "eta"

292

293 -class NormalNaturalThroughTheta(NormalTheta):

294 "Do not use, not officially supported!" 295 _instances = []

296 - def __new__(cls, mu_indices=None, var_indices=None):

297 "Do not use, not officially supported!" 298 if cls._instances: 299 cls._instances[:] = [instance for instance in cls._instances if instance()] 300 for instance in cls._instances: 301 if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False): 302 return instance() 303 o = super(Transformation, cls).__new__(cls, mu_indices, var_indices) 304 cls._instances.append(weakref.ref(o)) 305 return cls._instances[-1]()

306

307 - def __init__(self, mu_indices, var_indices):

308 self.mu_indices = mu_indices 309 self.var_indices = var_indices

310

311 - def gradfactor(self, muvar, dmuvar):

312 mu = muvar[self.mu_indices] 313 var = muvar[self.var_indices] 314 315 #======================================================================= 316 # This is just eta direction: 317 dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices] 318 #======================================================================= 319 320 #======================================================================= 321 # This is by going through theta fully and then going into eta direction: 322 #dmu = dmuvar[self.mu_indices] 323 #dmuvar[self.var_indices] += dmu*mu*(var + 4/var) 324 #======================================================================= 325 return dmuvar # which is now the gradient multiplicator

326

327 - def gradfactor_non_natural(self, muvar, dmuvar):

328 mu = muvar[self.mu_indices] 329 var = muvar[self.var_indices] 330 #======================================================================= 331 # theta gradients 332 # This works and the gradient checks! 333 dmuvar[self.mu_indices] *= var 334 dmuvar[self.var_indices] *= 2*(var)**2 335 dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu 336 #======================================================================= 337 338 return dmuvar # which is now the gradient multiplicator for {theta1, theta2}

339

340 - def __str__(self):

341 return "natgrad"

342 343

344 -class NormalNaturalWhooot(NormalTheta):

345 "Do not use, not officially supported!" 346 _instances = []

347 - def __new__(cls, mu_indices=None, var_indices=None):

348 "Do not use, not officially supported!" 349 if cls._instances: 350 cls._instances[:] = [instance for instance in cls._instances if instance()] 351 for instance in cls._instances: 352 if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False): 353 return instance() 354 o = super(Transformation, cls).__new__(cls, mu_indices, var_indices) 355 cls._instances.append(weakref.ref(o)) 356 return cls._instances[-1]()

357

358 - def __init__(self, mu_indices, var_indices):

359 self.mu_indices = mu_indices 360 self.var_indices = var_indices

361

362 - def gradfactor(self, muvar, dmuvar):

363 #mu = muvar[self.mu_indices] 364 #var = muvar[self.var_indices] 365 366 #======================================================================= 367 # This is just eta direction: 368 #dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices] 369 #======================================================================= 370 371 #======================================================================= 372 # This is by going through theta fully and then going into eta direction: 373 #dmu = dmuvar[self.mu_indices] 374 #dmuvar[self.var_indices] += dmu*mu*(var + 4/var) 375 #======================================================================= 376 return dmuvar # which is now the gradient multiplicator

377

378 - def __str__(self):

379 return "natgrad"

380

381 -class NormalNaturalThroughEta(NormalEta):

382 "Do not use, not officially supported!" 383 _instances = []

384 - def __new__(cls, mu_indices=None, var_indices=None):

385 "Do not use, not officially supported!" 386 if cls._instances: 387 cls._instances[:] = [instance for instance in cls._instances if instance()] 388 for instance in cls._instances: 389 if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False): 390 return instance() 391 o = super(Transformation, cls).__new__(cls, mu_indices, var_indices) 392 cls._instances.append(weakref.ref(o)) 393 return cls._instances[-1]()

394

395 - def __init__(self, mu_indices, var_indices):

396 self.mu_indices = mu_indices 397 self.var_indices = var_indices

398

399 - def gradfactor(self, muvar, dmuvar):

400 mu = muvar[self.mu_indices] 401 var = muvar[self.var_indices] 402 #======================================================================= 403 # theta gradients 404 # This works and the gradient checks! 405 dmuvar[self.mu_indices] *= var 406 dmuvar[self.var_indices] *= 2*(var)**2 407 dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu 408 #======================================================================= 409 return dmuvar

410

411 - def __str__(self):

412 return "natgrad"

413 414

415 -class LogexpNeg(Transformation):

416 domain = _POSITIVE

417 - def f(self, x):

418 return np.where(x>_lim_val, -x, -np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))

419 #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))

420 - def finv(self, f):

421 return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))

422 - def gradfactor(self, f, df):

423 return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))

424 - def initialize(self, f):

425 if np.any(f < 0.): 426 print("Warning: changing parameters to satisfy constraints") 427 return np.abs(f)

428 - def __str__(self):

429 return '+ve'

430 431

432 -class NegativeLogexp(Transformation):

433 domain = _NEGATIVE 434 logexp = Logexp()

435 - def f(self, x):

436 return -self.logexp.f(x) # np.log(1. + np.exp(x))

437 - def finv(self, f):

438 return self.logexp.finv(-f) # np.log(np.exp(-f) - 1.)

439 - def gradfactor(self, f, df):

440 return np.einsum('i,i->i', df, -self.logexp.gradfactor(-f, df))

441 - def initialize(self, f):

442 return -self.logexp.initialize(f) # np.abs(f)

443 - def __str__(self):

444 return '-ve'

445

446 -class LogexpClipped(Logexp):

447 max_bound = 1e100 448 min_bound = 1e-10 449 log_max_bound = np.log(max_bound) 450 log_min_bound = np.log(min_bound) 451 domain = _POSITIVE 452 _instances = []

453 - def __new__(cls, lower=1e-6, *args, **kwargs):

454 if cls._instances: 455 cls._instances[:] = [instance for instance in cls._instances if instance()] 456 for instance in cls._instances: 457 if instance().lower == lower: 458 return instance() 459 o = super(Transformation, cls).__new__(cls, lower, *args, **kwargs) 460 cls._instances.append(weakref.ref(o)) 461 return cls._instances[-1]()

462 - def __init__(self, lower=1e-6):

463 self.lower = lower

464 - def f(self, x):

465 exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound)) 466 f = np.log(1. + exp) 467 # if np.isnan(f).any(): 468 # import ipdb;ipdb.set_trace() 469 return np.clip(f, self.min_bound, self.max_bound)

470 - def finv(self, f):

471 return np.log(np.exp(f - 1.))

472 - def gradfactor(self, f, df):

473 ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound)) 474 gf = (ef - 1.) / ef 475 return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)

476 - def initialize(self, f):

477 if np.any(f < 0.): 478 print("Warning: changing parameters to satisfy constraints") 479 return np.abs(f)

480 - def __str__(self):

481 return '+ve_c'

482

483 -class NegativeExponent(Exponent):

484 domain = _NEGATIVE

485 - def f(self, x):

486 return -Exponent.f(x)

487 - def finv(self, f):

488 return Exponent.finv(-f)

489 - def gradfactor(self, f, df):

490 return np.einsum('i,i->i', df, f)

491 - def initialize(self, f):

492 return -Exponent.initialize(f) #np.abs(f)

493 - def __str__(self):

494 return '-ve'

495

496 -class Square(Transformation):

497 domain = _POSITIVE

498 - def f(self, x):

499 return x ** 2

500 - def finv(self, x):

501 return np.sqrt(x)

502 - def gradfactor(self, f, df):

503 return np.einsum('i,i->i', df, 2 * np.sqrt(f))

504 - def initialize(self, f):

505 return np.abs(f)

506 - def __str__(self):

507 return '+sq'

508

509 -class Logistic(Transformation):

510 domain = _BOUNDED 511 _instances = []

512 - def __new__(cls, lower=1e-6, upper=1e-6, *args, **kwargs):

513 if cls._instances: 514 cls._instances[:] = [instance for instance in cls._instances if instance()] 515 for instance in cls._instances: 516 if instance().lower == lower and instance().upper == upper: 517 return instance() 518 newfunc = super(Transformation, cls).__new__ 519 if newfunc is object.__new__: 520 o = newfunc(cls) 521 else: 522 o = newfunc(cls, lower, upper, *args, **kwargs) 523 cls._instances.append(weakref.ref(o)) 524 return cls._instances[-1]()

525 - def __init__(self, lower, upper):

526 assert lower < upper 527 self.lower, self.upper = float(lower), float(upper) 528 self.difference = self.upper - self.lower

529 - def f(self, x):

530 if (x<-300.).any(): 531 x = x.copy() 532 x[x<-300.] = -300. 533 return self.lower + self.difference / (1. + np.exp(-x))

534 - def finv(self, f):

535 return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))

536 - def gradfactor(self, f, df):

537 return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)

538 - def initialize(self, f):

539 if np.any(np.logical_or(f < self.lower, f > self.upper)): 540 print("Warning: changing parameters to satisfy constraints") 541 #return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) 542 #FIXME: Max, zeros_like right? 543 return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)

544 - def __str__(self):

545 return '{},{}'.format(self.lower, self.upper)

546

Source Code for Module paramz.transformations