1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* */
/* This file is part of the program and library */
/* SCIP --- Solving Constraint Integer Programs */
/* */
/* Copyright 2002-2022 Zuse Institute Berlin */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); */
/* you may not use this file except in compliance with the License. */
/* You may obtain a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
/* See the License for the specific language governing permissions and */
/* limitations under the License. */
/* */
/* You should have received a copy of the Apache-2.0 license */
/* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
/* */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**@file bandit_epsgreedy.h
* @ingroup INTERNALAPI
* @brief internal methods for epsilon greedy bandit selection
* @author Gregor Hendel
*/
/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
#ifndef __SCIP_BANDIT_EPSGREEDY_H__
#define __SCIP_BANDIT_EPSGREEDY_H__
#include "blockmemshell/memory.h"
#include "scip/def.h"
#include "scip/type_bandit.h"
#include "scip/type_retcode.h"
#include "scip/type_scip.h"
#ifdef __cplusplus
extern "C" {
#endif
/** creates the epsilon greedy bandit algorithm includes it in SCIP */
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(
SCIP* scip /**< SCIP data structure */
);
/** callback to free bandit specific data structures */
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy);
/** selection callback for bandit algorithm */
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy);
/** update callback for bandit algorithm */
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy);
/** reset callback for bandit algorithm */
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy);
/** internal method to create and reset epsilon greedy bandit algorithm */
SCIP_RETCODE SCIPbanditCreateEpsgreedy(
BMS_BLKMEM* blkmem, /**< block memory */
BMS_BUFMEM* bufmem, /**< buffer memory */
SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
* only relevant if exponential decay is enabled
*/
int nactions, /**< the number of possible actions */
unsigned int initseed /**< initial random seed */
);
#ifdef __cplusplus
}
#endif
#endif