Source code for moe.tests.optimal_learning.python.cpp_wrappers.gaussian_process_test

# -*- coding: utf-8 -*-
"""Test the C++ implementation of Gaussian Process properties (mean, var, gradients thereof) against the Python version."""
import copy

import numpy

import pytest

import moe.build.GPP as C_GP
from moe.optimal_learning.python.cpp_wrappers.covariance import SquareExponential
from moe.optimal_learning.python.cpp_wrappers.gaussian_process import GaussianProcess
from moe.optimal_learning.python.data_containers import HistoricalData, SamplePoint
from moe.tests.optimal_learning.python.gaussian_process_test_case import GaussianProcessTestCase


[docs]class TestGaussianProcess(GaussianProcessTestCase):

    """Test C++ vs Python implementations of Gaussian Process properties (mean, variance, cholesky variance, and their gradients)."""

    precompute_gaussian_process_data = True

    @classmethod
    @pytest.fixture(autouse=True, scope='class')
[docs]    def base_setup(cls):
        """Run the standard setup but seed the RNG first (for repeatability).

        It is easy to stumble into test cases where mean, var terms are very small (e.g., < 1.e-20),
        which makes it difficult to set meaningful tolerances for the checks.

        """
        numpy.random.seed(8794)
        super(TestGaussianProcess, cls).base_setup()

[docs]    def test_sample_point_from_gp(self):
        """Test that sampling points from the GP works."""
        point_one = SamplePoint([0.0, 1.0], -1.0, 0.0)
        point_two = SamplePoint([2.0, 2.5], 1.0, 0.1)
        covariance = SquareExponential([1.0, 1.0, 1.0])
        historical_data = HistoricalData(len(point_one.point), [point_one, point_two])

        gaussian_process = GaussianProcess(covariance, historical_data)
        out_values = numpy.zeros(3)
        for i in xrange(3):
            out_values[i] = gaussian_process.sample_point_from_gp(point_two.point, 0.001)

        gaussian_process._gaussian_process.reset_to_most_recent_seed()
        out_values_test = numpy.ones(3)
        for i in xrange(3):
            out_values_test[i] = gaussian_process.sample_point_from_gp(point_two.point, 0.001)

        # Exact match b/c we should've run over the exact same computations
        self.assert_vector_within_relative(out_values_test, out_values, 0.0)

        # Sampling from a historical point (that had 0 noise) should produce the same value associated w/that point
        value = gaussian_process.sample_point_from_gp(point_one.point, 0.0)
        self.assert_scalar_within_relative(value, point_one.value, numpy.finfo(numpy.float64).eps)

[docs]    def test_gp_construction_singular_covariance_matrix(self):
        """Test that the GaussianProcess ctor indicates a singular covariance matrix when points_sampled contains duplicates (0 noise)."""
        index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 1))
        domain, gaussian_process = self.gp_test_environments[index]
        point_one = SamplePoint([0.0] * domain.dim, 1.0, 0.0)
        # points two and three have duplicate coordinates and we have noise_variance = 0.0
        point_two = SamplePoint([1.0] * domain.dim, 1.0, 0.0)
        point_three = point_two

        historical_data = HistoricalData(len(point_one.point), [point_one, point_two, point_three])
        with pytest.raises(C_GP.SingularMatrixException):
            GaussianProcess(gaussian_process.get_covariance_copy(), historical_data)

[docs]    def test_gp_add_sampled_points_singular_covariance_matrix(self):
        """Test that GaussianProcess.add_sampled_points indicates a singular covariance matrix when points_sampled contains duplicates (0 noise)."""
        test_environment_input = copy.copy(self.gp_test_environment_input)
        test_environment_input.num_sampled = 1
        test_environment_input.gaussian_process_class = GaussianProcess
        _, gaussian_process = self._build_gaussian_process_test_data(test_environment_input)

        # points one and three have duplicate coordinates and we have noise_variance = 0.0
        point_one = SamplePoint([0.5] * gaussian_process.dim, 1.0, 0.0)
        point_two = SamplePoint([1.0] * gaussian_process.dim, -1.0, 0.0)
        point_three = point_one

        # points one and two are different, so this is safe
        gaussian_process.add_sampled_points([point_one, point_two])
        # point_three is identical to point_one; this will produce a singular covariance matrix
        with pytest.raises(C_GP.SingularMatrixException):
            gaussian_process.add_sampled_points([point_three])

[docs]    def test_python_and_cpp_return_same_mu_and_gradient(self):
        """Compare mu/grad mu results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 4
        mu_tolerance = 3.0e-13
        grad_mu_tolerance = 3.0e-12

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(num_to_sample)

                    cpp_mu = cpp_gp.compute_mean_of_points(points_to_sample)
                    python_mu = python_gp.compute_mean_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_mu, cpp_mu, mu_tolerance)

                    cpp_grad_mu = cpp_gp.compute_grad_mean_of_points(points_to_sample)
                    python_grad_mu = python_gp.compute_grad_mean_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_grad_mu, cpp_grad_mu, grad_mu_tolerance)

[docs]    def test_python_and_cpp_return_same_variance_and_gradient(self):
        """Compare var/grad var results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 2
        var_tolerance = 3.0e-13
        grad_var_tolerance = 3.0e-12

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(num_to_sample)

                    cpp_var = cpp_gp.compute_variance_of_points(points_to_sample)
                    python_var = python_gp.compute_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_var, cpp_var, var_tolerance)

                    cpp_grad_var = cpp_gp.compute_grad_variance_of_points(points_to_sample)
                    python_grad_var = python_gp.compute_grad_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_grad_var, cpp_grad_var, grad_var_tolerance)

[docs]    def test_python_and_cpp_return_same_cholesky_variance_and_gradient(self):
        """Compare chol_var/grad chol_var results from Python & C++, checking seeral random points per test case."""
        num_tests_per_case = 2
        var_tolerance = 3.0e-12
        # TODO(GH-240): set RNG seed for this case and restore toleranace to 3.0e-12 or better
        grad_var_tolerance = 3.0e-10

        for test_case in self.gp_test_environments:
            domain, python_gp = test_case
            python_cov, historical_data = python_gp.get_core_data_copy()

            cpp_cov = SquareExponential(python_cov.hyperparameters)
            cpp_gp = GaussianProcess(cpp_cov, historical_data)

            for num_to_sample in self.num_to_sample_list:
                for _ in xrange(num_tests_per_case):
                    points_to_sample = domain.generate_uniform_random_points_in_domain(num_to_sample)

                    cpp_var = cpp_gp.compute_cholesky_variance_of_points(points_to_sample)
                    python_var = python_gp.compute_cholesky_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_var, cpp_var, var_tolerance)

                    cpp_grad_var = cpp_gp.compute_grad_cholesky_variance_of_points(points_to_sample)
                    python_grad_var = python_gp.compute_grad_cholesky_variance_of_points(points_to_sample)
                    self.assert_vector_within_relative(python_grad_var, cpp_grad_var, grad_var_tolerance)