Source code for moe.optimal_learning.python.geometry_utils

# -*- coding: utf-8 -*-
"""Geometry utilities. e.g., ClosedInterval, point-plane geometry, random point generation."""
import collections

import numpy


[docs]def generate_latin_hypercube_points(num_points, domain_bounds): """Compute a set of random points inside some domain that lie in a latin hypercube. In 2D, a latin hypercube is a latin square--a checkerboard--such that there is exactly one sample in each row and each column. This notion is generalized for higher dimensions where each dimensional 'slice' has precisely one sample. See wikipedia: http://en.wikipedia.org/wiki/Latin_hypercube_sampling for more details on the latin hypercube sampling process. :param num_points: number of random points to generate :type num_points: int > 0 :param domain_bounds: [min, max] boundaries of the hypercube in each dimension :type domain_bounds: list of dim ClosedInterval :return: uniformly distributed random points inside the specified hypercube :rtype: array of float64 with shape (num_points, dim) """ # TODO(GH-56): Allow users to pass in a random source. if num_points == 0: return numpy.array([]) points = numpy.zeros((num_points, len(domain_bounds)), dtype=numpy.float64) for i, interval in enumerate(domain_bounds): # Cut the range into num_points slices subcube_edge_length = interval.length / float(num_points) # Create random ordering for slices ordering = numpy.arange(num_points) numpy.random.shuffle(ordering) for j in xrange(num_points): point_base = interval.min + subcube_edge_length * ordering[j] points[j, i] = point_base + numpy.random.uniform(0.0, subcube_edge_length) return points
[docs]def generate_grid_points(points_per_dimension, domain_bounds): r"""Generate a uniform grid of points on a tensor product region; exponential runtime. This can be useful for producing a reasonable set of initial samples when bootstrapping optimal_learning. Grid sampling (as opposed to a random sampling, e.g., latin hypercube) is not random. It also guarantees sampling of the domain corners. .. Note:: This operation is like an outer-product, so 4 points per dimension in 10 dimensions produces 4^{10} points. This could be built as an iterator instead, but the typical use case involves function evaluations at every point, so generating the points is not the limiting factor. :param points_per_dimension: (n_1, n_2, ... n_{dim}) number of stencil points per spatial dimension. If len(points_per_dimension) == 1, then n_i = len(points_per_dimension) :type points_per_dimension: tuple or scalar :param domain_bounds: the boundaries of a dim-dimensional tensor-product domain :type domain_bounds: iterable of dim ClosedInterval :return: stencil point coordinates :rtype: array of float64 with shape (\Pi_i n_i, dim) """ points_per_dimension = numpy.asarray(points_per_dimension) # Empty input OR at least 1 dimension has 0 points if points_per_dimension.size == 0 or not points_per_dimension.all(): return numpy.array([]) if points_per_dimension.size == 1: # resize fills new entries with copies of the original points_per_dimension = numpy.resize(points_per_dimension, len(domain_bounds)) # List of 1D grids w/the specified number of points per dimension per_axis_grid = [numpy.linspace(bounds.min, bounds.max, points_per_dimension[i]) for i, bounds in enumerate(domain_bounds)] # meshgrid produces a list of ndarray that is used to evaluate functions on a grid. # The i-th output array has the coordinate of *every* grid point in the i-th dimension. mesh_grid = numpy.meshgrid(*per_axis_grid) # ravel flattens the input (same as numpy.flatten but it tries to avoid copying) # vstack stacks inputs vertically: so for our 1D arrays, the i-th input becomes # the i-th row in a matrix. And since each mesh_grid output has *every* coordinate # of the grid in that dimension, the *columns* of the stack contain every grid point. return numpy.vstack(map(numpy.ravel, mesh_grid)).T # See ClosedInterval (below) for docstring.
_BaseClosedInterval = collections.namedtuple('ClosedInterval', ['min', 'max'])
[docs]class ClosedInterval(_BaseClosedInterval): r"""Container to represent the mathematical notion of a closed interval, commonly written \ms [a,b]\me. The closed interval \ms [a,b]\me is the set of all numbers \ms x \in \mathbb{R}\me such that \ms a \leq x \leq b\me. Note that "closed" here indicates the interval *includes* both endpoints. An interval with \ms a > b\me is considered empty. :ivar min: (*float64*) the "left" bound of the domain, ``a`` :ivar max: (*float64*) the "right" bound of the domain, ``b`` """ __slots__ = () @staticmethod
[docs] def build_closed_intervals_from_list(bounds_list): """Construct a list of dim ClosedInterval from an iterable structure of dim iterables with len = 2. For example, [[1, 2], [3, 4]] becomes [ClosedInterval(min=1, max=2), ClosedInterval(min=3, max=4)]. :param bounds_list: bounds to convert :type bounds_list: iterable of iterables, where the second dimension has len = 2 :return: bounds_list converted to list of ClosedInterval :rtype: list of ClosedInterval """ return [ClosedInterval(min, max) for (min, max) in bounds_list]
@property
[docs] def length(self): """Compute the length of this ClosedInterval.""" return self.max - self.min
[docs] def is_inside(self, value): """Check if a value is inside this ClosedInterval.""" return self.min <= value <= self.max
[docs] def is_empty(self): """Check whether this ClosedInterval is the emptyset: max < min.""" return self.max < self.min