Source code for moe.bandit.epsilon.epsilon_greedy

# -*- coding: utf-8 -*-
"""Classes (Python) to compute the Bandit Epsilon-Greedy arm allocation and choosing the arm to pull next.

See :class:`moe.bandit.epsilon.epsilon_interface.EpsilonInterface` for further details on this bandit.

"""
from moe.bandit.constant import DEFAULT_EPSILON, EPSILON_SUBTYPE_GREEDY
from moe.bandit.epsilon.epsilon_interface import EpsilonInterface


[docs]class EpsilonGreedy(EpsilonInterface): r"""Implementation of EpsilonGreedy. A class to encapsulate the computation of bandit epsilon greedy. See superclass :class:`moe.bandit.epsilon.epsilon_interface.EpsilonInterface` for further details. """ def __init__( self, historical_info, epsilon=DEFAULT_EPSILON, ): """Construct an EpsilonGreedy object. See superclass :class:`moe.bandit.epsilon.epsilon_interface.EpsilonInterface` for details.""" super(EpsilonGreedy, self).__init__( historical_info=historical_info, subtype=EPSILON_SUBTYPE_GREEDY, epsilon=epsilon, )
[docs] def allocate_arms(self): r"""Compute the allocation to each arm given ``historical_info``, running bandit ``subtype`` endpoint with hyperparameter epsilon. Computes the allocation to each arm based on the given subtype, historical info, and hyperparameter epsilon. Works with k-armed bandits (k >= 1). The Algorithm: http://en.wikipedia.org/wiki/Multi-armed_bandit#Approximate_solutions This method will pull the optimal arm (best expected return) with probability :math:`1-\epsilon`, with probability :math:`\epsilon` a random arm will be pulled. In case of a tie, the method will split the probability :math:`1-\epsilon` among the optimal arms and with probability :math:`\epsilon`, a random arm will be pulled. For example, if we have three arms, two arms (arm1 and arm2) with an average payoff of 0.5 and a new arm (arm3, average payoff is 0). Let the epsilon :math:`\epsilon` be 0.12. The allocation will be as follows: arm1: 0.48, arm2: 0.48, arm3: 0.04. The calculation is as follows: arm1 and arm2 both get the same allocation: .. math:: \frac{1-\epsilon}{2} + \frac{\epsilon}{3} = \frac{1-0.12}{2} + \frac{0.12}{3} = 0.44 + 0.04 = 0.48 arm3 gets the allocation: .. math:: \frac{\epsilon}{3} = \frac{0.12}{3} = 0.04 :return: the dictionary of (arm, allocation) key-value pairs :rtype: a dictionary of (str, float64) pairs :raise: ValueError when ``sample_arms`` are empty. """ arms_sampled = self._historical_info.arms_sampled num_arms = self._historical_info.num_arms if not arms_sampled: raise ValueError('sample_arms are empty!') winning_arm_names = self.get_winning_arm_names(arms_sampled) num_winning_arms = len(winning_arm_names) epsilon_allocation = self._epsilon / num_arms arms_to_allocations = {} # With probability epsilon, choose a winning arm at random. Therefore, we split the allocation epsilon among all arms. for arm_name in arms_sampled.iterkeys(): arms_to_allocations[arm_name] = epsilon_allocation # With probability 1-epsilon, split allocation among winning arms. winning_arm_allocation = (1.0 - self._epsilon) / num_winning_arms for winning_arm_name in winning_arm_names: arms_to_allocations[winning_arm_name] += winning_arm_allocation return arms_to_allocations