Source code for moe.tests.optimal_learning.python.python_version.optimization_test

# -*- coding: utf-8 -*-
"""Tests for the Python optimization module (null, gradient descent, and multistarting) using a simple polynomial objective."""
import numpy

import pytest

from moe.optimal_learning.python.geometry_utils import ClosedInterval
from moe.optimal_learning.python.interfaces.optimization_interface import OptimizableInterface
from moe.optimal_learning.python.python_version.domain import TensorProductDomain
from moe.optimal_learning.python.python_version.optimization import multistart_optimize, LBFGSBParameters, GradientDescentParameters, NullOptimizer, GradientDescentOptimizer, MultistartOptimizer, LBFGSBOptimizer, COBYLAOptimizer, COBYLAParameters
from moe.tests.optimal_learning.python.optimal_learning_test_case import OptimalLearningTestCase


[docs]class QuadraticFunction(OptimizableInterface): r"""Class to evaluate the function f(x_1,...,x_{dim}) = -\sum_i (x_i - s_i)^2, i = 1..dim. This is a simple quadratic form with maxima at (s_1, ..., s_{dim}). """ def __init__(self, maxima_point, current_point): """Create an instance of QuadraticFunction with the specified maxima.""" self._maxima_point = numpy.copy(maxima_point) self._current_point = numpy.copy(current_point) @property
[docs] def dim(self): """Return the number of spatial dimensions.""" return self._maxima_point.size
@property
[docs] def problem_size(self): """Return the number of independent parameters to optimize.""" return self.dim
@property
[docs] def optimum_value(self): """Return max_x f(x), the global maximum value of this function.""" return 0.0
@property
[docs] def optimum_point(self): """Return the argmax_x (f(x)), the point at which the global maximum occurs.""" return numpy.copy(self._maxima_point)
[docs] def get_current_point(self): """Get the current_point (array of float64 with shape (problem_size)) at which this object is evaluating the objective function, ``f(x)``.""" return numpy.copy(self._current_point)
[docs] def set_current_point(self, current_point): """Set current_point to the specified point; ordering must match. :param current_point: current_point at which to evaluate the objective function, ``f(x)`` :type current_point: array of float64 with shape (problem_size) """ self._current_point = numpy.copy(current_point)
current_point = property(get_current_point, set_current_point)
[docs] def compute_objective_function(self, **kwargs): r"""Compute ``f(current_point)``. :return: value of objective function evaluated at ``current_point`` :rtype: float64 """ temp = self._current_point - self._maxima_point temp *= temp return -temp.sum()
[docs] def compute_grad_objective_function(self, **kwargs): r"""Compute the gradient of ``f(current_point)`` wrt ``current_point``. :return: gradient of the objective, i-th entry is ``\pderiv{f(x)}{x_i}`` :rtype: array of float64 with shape (problem_size) """ return -2.0 * (self._current_point - self._maxima_point)
[docs] def compute_hessian_objective_function(self, **kwargs): r"""Compute the hessian matrix of ``f(current_point)`` wrt ``current_point``. This matrix is symmetric as long as the mixed second derivatives of f(x) are continuous: Clairaut's Theorem. http://en.wikipedia.org/wiki/Symmetry_of_second_derivatives :return: hessian of the objective, (i,j)th entry is ``\mixpderiv{f(x)}{x_i}{x_j}`` :rtype: array of float64 with shape (problem_size, problem_size) """ return numpy.diag(numpy.full(self.dim, -2.0))
[docs]class TestNullOptimizer(OptimalLearningTestCase): """Test the NullOptimizer on a simple objective. NullOptimizer should do nothing. Multistarting it should be the same as a 'dumb' search over points. """ @classmethod @pytest.fixture(autouse=True, scope='class')
[docs] def base_setup(cls): """Set up a test case for optimizing a simple quadratic polynomial.""" cls.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.domain = TensorProductDomain(domain_bounds) maxima_point = numpy.full(cls.dim, 0.5) current_point = numpy.zeros(cls.dim) cls.polynomial = QuadraticFunction(maxima_point, current_point) cls.null_optimizer = NullOptimizer(cls.domain, cls.polynomial)
[docs] def test_null_optimizer(self): """Test that null optimizer does not change current_point.""" current_point_old = self.null_optimizer.objective_function.current_point self.null_optimizer.optimize() current_point_new = self.null_optimizer.objective_function.current_point self.assert_vector_within_relative(current_point_old, current_point_new, 0.0)
[docs] def test_multistarted_null_optimizer(self): """Test that multistarting null optimizer just evalutes the function and indentifies the max.""" num_points = 15 points = self.domain.generate_uniform_random_points_in_domain(num_points) truth = numpy.empty(num_points) for i, point in enumerate(points): self.null_optimizer.objective_function.current_point = point truth[i] = self.null_optimizer.objective_function.compute_objective_function() best_index = numpy.argmax(truth) truth_best_point = points[best_index, ...] test_best_point, test_values = multistart_optimize(self.null_optimizer, starting_points=points) self.assert_vector_within_relative(test_best_point, truth_best_point, 0.0) self.assert_vector_within_relative(test_values, truth, 0.0)
[docs]class TestOptimizer(OptimalLearningTestCase): r"""Test the implemented optimizers on a simple quadratic objective. We check GD in an unconstrained setting, a constrained setting, and we test multistarting it. For the other optimizers we check them in a constrained setting and a multistarted setting. We don't test the stochastic averaging option meaningfully. We check that the optimizer will average the number of steps specified by input. We also check that the simple unconstrained case can also be solved with averaging on\*. \* This is not much of a test. The problem is convex and isotropic so GD will take a more or less straight path to the maxima. Averaging can only reduce the accuracy of the solve. TODO(GH-179): Build a simple stochastic objective and test the stochastic component fully. """ @classmethod @pytest.fixture(autouse=True, scope='class')
[docs] def base_setup(cls): """Set up a test case for optimizing a simple quadratic polynomial.""" cls.dim = 3 domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.domain = TensorProductDomain(domain_bounds) large_domain_bounds = [ClosedInterval(-1.0, 1.0)] * cls.dim cls.large_domain = TensorProductDomain(large_domain_bounds) maxima_point = numpy.full(cls.dim, 0.5) current_point = numpy.zeros(cls.dim) cls.polynomial = QuadraticFunction(maxima_point, current_point) max_num_steps = 250 max_num_restarts = 10 num_steps_averaged = 0 gamma = 0.7 # smaller gamma would lead to faster convergence, but we don't want to make the problem too easy pre_mult = 1.0 max_relative_change = 0.8 tolerance = 1.0e-12 cls.gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) approx_grad = False max_func_evals = 150000 max_metric_correc = 10 factr = 1000.0 pgtol = 1e-10 epsilon = 1e-8 cls.BFGS_parameters = LBFGSBParameters( approx_grad, max_func_evals, max_metric_correc, factr, pgtol, epsilon, ) maxfun = 1000 rhobeg = 1.0 rhoend = numpy.finfo(numpy.float64).eps catol = 2.0e-13 cls.COBYLA_parameters = COBYLAParameters( rhobeg, rhoend, maxfun, catol, )
[docs] def test_get_averaging_range(self): """Test the method used to produce what interval to average over in Polyak-Ruppert averaging.""" num_steps_total = 250 end = num_steps_total + 1 num_steps_averaged_input_list = [-1, 0, 1, 20, 100, 249, 250, 251, 10000] truth_list = [(1, end), (250, end), (250, end), (231, end), (151, end), (2, end), (1, end), (1, end), (1, end)] for i, truth in enumerate(truth_list): start, end = GradientDescentOptimizer._get_averaging_range(num_steps_averaged_input_list[i], num_steps_total) assert start == truth[0] assert end == truth[1]
[docs] def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32)] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer(domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function() gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() assert final_value >= initial_value # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
[docs] def test_multistarted_gradient_descent_optimizer_crippled_start(self): """Check that multistarted GD is finding the best result from GD.""" # Only allow 1 GD iteration. max_num_steps = 1 max_num_restarts = 1 param_dict = self.gd_parameters._asdict() param_dict['max_num_steps'] = max_num_steps param_dict['max_num_restarts'] = max_num_restarts gd_parameters_crippled = GradientDescentParameters(**param_dict) gradient_descent_optimizer_crippled = GradientDescentOptimizer(self.domain, self.polynomial, gd_parameters_crippled) num_points = 15 points = self.domain.generate_uniform_random_points_in_domain(num_points) multistart_optimizer = MultistartOptimizer(gradient_descent_optimizer_crippled, num_points) test_best_point, _ = multistart_optimizer.optimize(random_starts=points) # This point set won't include the optimum so multistart GD won't find it. for value in (test_best_point - self.polynomial.optimum_point): assert value != 0.0 points_with_opt = numpy.append(points, self.polynomial.optimum_point.reshape((1, self.polynomial.dim)), axis=0) test_best_point, _ = multistart_optimizer.optimize(random_starts=points_with_opt) # This point set will include the optimum so multistart GD will find it. for value in (test_best_point - self.polynomial.optimum_point): assert value == 0.0
[docs] def optimizer_test(self, optimizer, tolerance=2.0e-13): """Check that the optimizer can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that the optimizer does not move from the optima if we start it there. optimizer.optimize() output = optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 2.0 * numpy.finfo(numpy.float64).eps) # Start at a wrong point and check optimization initial_guess = numpy.full(self.polynomial.dim, 0.2) optimizer.objective_function.current_point = initial_guess optimizer.optimize() output = optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
[docs] def multistarted_optimizer_test(self, optimizer): """Check that the multistarted optimizer can find the optimum in a 'very' large domain.""" tolerance = 2.0e-10 num_points = 10 multistart_optimizer = MultistartOptimizer(optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
[docs] def test_cobyla_optimizer(self): """Test if COBYLA can optimize a simple objective function.""" constrained_optimizer = COBYLAOptimizer(self.domain, self.polynomial, self.COBYLA_parameters) self.optimizer_test(constrained_optimizer)
[docs] def test_bfgs_optimizer(self): """Test if BFGS can optimize a simple objective function.""" bfgs_optimizer = LBFGSBOptimizer(self.domain, self.polynomial, self.BFGS_parameters) self.optimizer_test(bfgs_optimizer)
[docs] def test_gradient_descent_optimizer(self): """Test if Gradient Descent can optimize a simple objective function.""" gradient_descent_optimizer = GradientDescentOptimizer(self.domain, self.polynomial, self.gd_parameters) self.optimizer_test(gradient_descent_optimizer)
[docs] def test_cobyla_multistarted_optimizer(self): """Test if COBYLA can optimize a "hard" objective function with multistarts.""" constrained_optimizer = COBYLAOptimizer(self.large_domain, self.polynomial, self.COBYLA_parameters) self.multistarted_optimizer_test(constrained_optimizer)
[docs] def test_bfgs_multistarted_optimizer(self): """Test if BFGS can optimize a "hard" objective function with multistarts.""" bfgs_optimizer = LBFGSBOptimizer(self.large_domain, self.polynomial, self.BFGS_parameters) self.multistarted_optimizer_test(bfgs_optimizer)
[docs] def test_gradient_descent_multistarted_optimizer(self): """Test if Gradient Descent can optimize a "hard" objective function with multistarts.""" gradient_descent_optimizer = GradientDescentOptimizer(self.large_domain, self.polynomial, self.gd_parameters) self.multistarted_optimizer_test(gradient_descent_optimizer)
[docs] def test_gradient_descent_optimizer_with_averaging(self): """Test if Gradient Descent can optimize a simple objective function. This test doesn't exercise the purpose of averaging (i.e., this objective isn't stochastic), but it does check that it at least runs. """ num_steps_averaged = self.gd_parameters.max_num_steps * 3 / 4 param_dict = self.gd_parameters._asdict() param_dict['num_steps_averaged'] = num_steps_averaged gd_parameters_averaging = GradientDescentParameters(**param_dict) gradient_descent_optimizer = GradientDescentOptimizer(self.domain, self.polynomial, gd_parameters_averaging) self.optimizer_test(gradient_descent_optimizer, tolerance=2.0e-10)