Skip to content

Commit

Permalink
Merge branch 'master' into windows_asan
Browse files Browse the repository at this point in the history
  • Loading branch information
bassmang authored Mar 7, 2024
2 parents 38e8f48 + 80e832f commit da1f106
Show file tree
Hide file tree
Showing 15 changed files with 232 additions and 39 deletions.
4 changes: 2 additions & 2 deletions ext_libs/ext_libs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ if(RAPIDJSON_SYS_DEP)
# Since EXACT is not specified, any version compatible with 1.1.0 is accepted (>= 1.1.0)
find_package(RapidJSON 1.1.0 CONFIG REQUIRED)
add_library(RapidJSON INTERFACE)
target_include_directories(RapidJSON INTERFACE ${RapidJSON_INCLUDE_DIRS})
target_include_directories(RapidJSON INTERFACE ${RapidJSON_INCLUDE_DIRS} ${RAPIDJSON_INCLUDE_DIRS})
else()
add_library(RapidJSON INTERFACE)
target_include_directories(RapidJSON SYSTEM INTERFACE "${CMAKE_CURRENT_LIST_DIR}/rapidjson/include")
Expand Down Expand Up @@ -127,4 +127,4 @@ if(VW_FEAT_CB_GRAPH_FEEDBACK)
target_include_directories(mlpack_ensmallen SYSTEM INTERFACE ${CMAKE_CURRENT_LIST_DIR}/armadillo-code/include)

target_include_directories(mlpack_ensmallen SYSTEM INTERFACE ${CMAKE_CURRENT_LIST_DIR}/ensmallen/include)
endif()
endif()
2 changes: 1 addition & 1 deletion python/docs/source/tutorials/cmd_first_steps.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,6 @@ The model predicted a value of **0**. This result means our house will not need
## More to explore

- See [Python tutorial](python_first_steps.ipynb) for a quick introduction to the basics of training and testing your model.
- To learn more about how to approach a contextual bandits problem using tVowpal Wabbit — including how to work with different contextual bandits approaches, how to format data, and understand the results — see the [Contextual Bandit Reinforcement Learning Tutorial](python_Contextual_bandits_and_Vowpal_Wabbit.ipynb).
- To learn more about how to approach a contextual bandits problem using Vowpal Wabbit — including how to work with different contextual bandits approaches, how to format data, and understand the results — see the [Contextual Bandit Reinforcement Learning Tutorial](python_Contextual_bandits_and_Vowpal_Wabbit.ipynb).
- For more on the contextual bandits approach to reinforcement learning, including a content personalization scenario, see the [Contextual Bandit Simulation Tutorial](python_Simulating_a_news_personalization_scenario_using_Contextual_Bandits.ipynb).
- See the [Linear Regression Tutorial](cmd_linear_regression.md) for a different look at the roof replacement problem and learn more about Vowpal Wabbit's format and understanding the results.
29 changes: 29 additions & 0 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6073,5 +6073,34 @@
"depends_on": [
467
]
},
{
"id": 469,
"desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
"vw_command": "--ccb_explore_adf --dsjson -d train-sets/issue4669.dsjson -f issue4669.model",
"diff_files": {
"stderr": "train-sets/ref/issue4669_train.stderr",
"stdout": "train-sets/ref/issue4669_train.stdout"
},
"input_files": [
"train-sets/issue4669.dsjson"
]
},
{
"id": 470,
"desc": "https://github.com/VowpalWabbit/vowpal_wabbit/issues/4669",
"vw_command": "--ccb_explore_adf --dsjson --all_slots_loss --epsilon 0 -t -i issue4669.model -t -d train-sets/issue4669.dsjson -p issue4669_test_pred.txt",
"diff_files": {
"stderr": "train-sets/ref/issue4669_test.stderr",
"stdout": "train-sets/ref/issue4669_test.stdout",
"issue4669_test_pred.txt": "train-sets/ref/issue4669_test_pred.txt"
},
"input_files": [
"train-sets/issue4669.dsjson",
"issue4669.model"
],
"depends_on": [
469
]
}
]
1 change: 1 addition & 0 deletions test/train-sets/issue4669.dsjson
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"c":{"_multi":[{"f":"1"},{"f":"2"}],"_slots":[{"_inc":[0,1]},{"_inc":[1]}]},"_outcomes":[{"_label_cost":1.0,"_a":[0,1],"_p":[0.5,0.5]},{"_label_cost":0.0,"_a":[1],"_p":[1]}]}
19 changes: 6 additions & 13 deletions test/train-sets/ref/active-simulation.t24.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,13 @@ Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 1 1.0 -1.0000 0.0000 128
0.791125 0.755288 2 6.8 -1.0000 -0.1309 44
1.274829 1.444750 8 26.3 1.0000 -0.2020 34
1.083985 0.895011 73 52.8 1.0000 0.0214 21
0.887295 0.693362 130 106.3 -1.0000 -0.3071 146
0.788245 0.690009 233 213.6 -1.0000 0.0421 47
0.664628 0.541195 398 427.4 -1.0000 -0.1863 68
0.634406 0.604328 835 856.9 -1.0000 -0.4327 40

finished run
number of examples = 1000
weighted example sum = 1014.004519
weighted label sum = -68.618036
average loss = 0.630964
best constant = -0.067670
best constant's loss = 0.995421
weighted example sum = 1.000000
weighted label sum = -1.000000
average loss = 1.000000
best constant = -1.000000
best constant's loss = 0.000000
total feature number = 78739
total queries = 474
total queries = 1
8 changes: 6 additions & 2 deletions test/train-sets/ref/help.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,12 @@ Weight Options:
[Reduction] Active Learning Options:
--active Enable active learning (type: bool, keep, necessary)
--simulation Active learning simulation mode (type: bool)
--mellowness arg Active learning mellowness parameter c_0. Default 8 (type: float,
default: 8, keep)
--direct Active learning via the tag and predictions interface. Tag should
start with "query?" to get query decision. Returned prediction
is either -1 for no or the importance weight for yes. (type:
bool)
--mellowness arg Active learning mellowness parameter c_0. Default 1. (type: float,
default: 1, keep)
[Reduction] Active Learning with Cover Options:
--active_cover Enable active learning with cover (type: bool, keep, necessary)
--mellowness arg Active learning mellowness parameter c_0 (type: float, default:
Expand Down
23 changes: 23 additions & 0 deletions test/train-sets/ref/issue4669_test.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
only testing
predictions = issue4669_test_pred.txt
using no cache
Reading datafile = train-sets/issue4669.dsjson
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 1
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
Input label = CCB
Output pred = DECISION_PROBS
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 0:1,1:0 1,None 9

finished run
number of examples = 1
weighted example sum = 1.000000
weighted label sum = 0.000000
average loss = 0.000000
total feature number = 9
Empty file.
3 changes: 3 additions & 0 deletions test/train-sets/ref/issue4669_test_pred.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1:1,0:0


22 changes: 22 additions & 0 deletions test/train-sets/ref/issue4669_train.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
final_regressor = issue4669.model
using no cache
Reading datafile = train-sets/issue4669.dsjson
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, cb_sample, shared_feature_merger, ccb_explore_adf
Input label = CCB
Output pred = DECISION_PROBS
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 1 1.0 0:1,1:0 0,1 12

finished run
number of examples = 1
weighted example sum = 1.000000
weighted label sum = 0.000000
average loss = 1.000000
total feature number = 12
Empty file.
3 changes: 2 additions & 1 deletion vowpalwabbit/core/src/decision_scores.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ void print_update(VW::workspace& all, const VW::multi_ex& slots, const VW::decis
std::string delim;
for (const auto& slot : decision_scores)
{
pred_ss << delim << slot[0].action;
if (slot.empty()) { pred_ss << delim << "None"; }
else { pred_ss << delim << slot[0].action; }
delim = ",";
}
all.sd->print_update(*all.output_runtime.trace_message, all.passes_config.holdout_set_off,
Expand Down
83 changes: 66 additions & 17 deletions vowpalwabbit/core/src/reductions/active.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,41 @@ using namespace VW::config;
using namespace VW::reductions;
namespace
{
float get_active_coin_bias(float k, float avg_loss, float g, float c0)
{
const float b = c0 * (std::log(k + 1.f) + 0.0001f) / (k + 0.0001f);
const float sb = std::sqrt(b);
float get_active_coin_bias(float example_count, float avg_loss, float alt_label_error_rate_diff, float mellowness)
{//implementation follows https://web.archive.org/web/20120525164352/http://books.nips.cc/papers/files/nips23/NIPS2010_0363.pdf
const float mellow_log_e_count_over_e_count = mellowness * (std::log(example_count + 1.f) + 0.0001f) / (example_count + 0.0001f);
const float sqrt_mellow_lecoec = std::sqrt(mellow_log_e_count_over_e_count);
// loss should be in [0,1]
avg_loss = VW::math::clamp(avg_loss, 0.f, 1.f);

const float sl = std::sqrt(avg_loss) + std::sqrt(avg_loss + g);
if (g <= sb * sl + b) { return 1; }
const float rs = (sl + std::sqrt(sl * sl + 4 * g)) / (2 * g);
return b * rs * rs;
const float sqrt_avg_loss_plus_sqrt_alt_loss = std::min(1.f, //std::sqrt(avg_loss) + // commented out because two square roots appears to conservative.
std::sqrt(avg_loss + alt_label_error_rate_diff));//emperical variance deflater.
//std::cout << "example_count = " << example_count << " avg_loss = " << avg_loss << " alt_label_error_rate_diff = " << alt_label_error_rate_diff << " mellowness = " << mellowness << " mlecoc = " << mellow_log_e_count_over_e_count
// << " sqrt_mellow_lecoec = " << sqrt_mellow_lecoec << " double sqrt = " << sqrt_avg_loss_plus_sqrt_alt_loss << std::endl;

if (alt_label_error_rate_diff <= sqrt_mellow_lecoec * sqrt_avg_loss_plus_sqrt_alt_loss//deflater in use.
+ mellow_log_e_count_over_e_count) { return 1; }
//old equation
// const float rs = (sqrt_avg_loss_plus_sqrt_alt_loss + std::sqrt(sqrt_avg_loss_plus_sqrt_alt_loss * sqrt_avg_loss_plus_sqrt_alt_loss + 4 * alt_label_error_rate_diff)) / (2 * alt_label_error_rate_diff);
// return mellow_log_e_count_over_e_count * rs * rs;
const float sqrt_s = (sqrt_mellow_lecoec + std::sqrt(mellow_log_e_count_over_e_count+4*alt_label_error_rate_diff*mellow_log_e_count_over_e_count)) / 2*alt_label_error_rate_diff;
// std::cout << "sqrt_s = " << sqrt_s << std::endl;
return sqrt_s*sqrt_s;
}

float query_decision(const active& a, float ec_revert_weight, float k)
float query_decision(const active& a, float updates_to_change_prediction, float example_count)
{
float bias;
if (k <= 1.f) { bias = 1.f; }
if (example_count <= 1.f) { bias = 1.f; }
else
{
const auto weighted_queries = static_cast<float>(a._shared_data->weighted_labeled_examples);
const float avg_loss = (static_cast<float>(a._shared_data->sum_loss) / k) +
std::sqrt((1.f + 0.5f * std::log(k)) / (weighted_queries + 0.0001f));
bias = get_active_coin_bias(k, avg_loss, ec_revert_weight / k, a.active_c0);
// const auto weighted_queries = static_cast<float>(a._shared_data->weighted_labeled_examples);
const float avg_loss = (static_cast<float>(a._shared_data->sum_loss) / example_count);
//+ std::sqrt((1.f + 0.5f * std::log(example_count)) / (weighted_queries + 0.0001f)); Commented this out, not following why we need it from the theory.
// std::cout << "avg_loss = " << avg_loss << " weighted_queries = " << weighted_queries << " sum_loss = " << a._shared_data->sum_loss << " example_count = " << example_count << std::endl;
bias = get_active_coin_bias(example_count, avg_loss, updates_to_change_prediction / example_count, a.active_c0);
}

// std::cout << "bias = " << bias << std::endl;
return (a._random_state->get_and_update_random() < bias) ? 1.f / bias : -1.f;
}

Expand Down Expand Up @@ -110,6 +120,34 @@ void predict_or_learn_active(active& a, learner& base, VW::example& ec)
}
}

template <bool is_learn>
void predict_or_learn_active_direct(active& a, learner& base, VW::example& ec)
{
if (is_learn) { base.learn(ec); }
else { base.predict(ec); }

if (ec.l.simple.label == FLT_MAX)
{
if (std::string(ec.tag.begin(), ec.tag.begin()+6) == "query?")
{
const float threshold = (a._shared_data->max_label + a._shared_data->min_label) * 0.5f;
// We want to understand the change in prediction if the label were to be
// the opposite of what was predicted. 0 and 1 are used for the expected min
// and max labels to be coming in from the active interactor.
ec.l.simple.label = (ec.pred.scalar >= threshold) ? a._min_seen_label : a._max_seen_label;
ec.confidence = std::abs(ec.pred.scalar - threshold) / base.sensitivity(ec);
ec.l.simple.label = FLT_MAX;
ec.pred.scalar = query_decision(a, ec.confidence, static_cast<float>(a._shared_data->weighted_unlabeled_examples));
}
}
else
{
// Update seen labels based on the current example's label.
a._min_seen_label = std::min(ec.l.simple.label, a._min_seen_label);
a._max_seen_label = std::max(ec.l.simple.label, a._max_seen_label);
}
}

void active_print_result(
VW::io::writer* f, float res, float weight, const VW::v_array<char>& tag, VW::io::logger& logger)
{
Expand Down Expand Up @@ -189,14 +227,16 @@ std::shared_ptr<VW::LEARNER::learner> VW::reductions::active_setup(VW::setup_bas

bool active_option = false;
bool simulation = false;
bool direct = false;
float active_c0;
option_group_definition new_options("[Reduction] Active Learning");
new_options.add(make_option("active", active_option).keep().necessary().help("Enable active learning"))
.add(make_option("simulation", simulation).help("Active learning simulation mode"))
.add(make_option("direct", direct).help("Active learning via the tag and predictions interface. Tag should start with \"query?\" to get query decision. Returned prediction is either -1 for no or the importance weight for yes."))
.add(make_option("mellowness", active_c0)
.keep()
.default_value(8.f)
.help("Active learning mellowness parameter c_0. Default 8"));
.default_value(1.f)
.help("Active learning mellowness parameter c_0. Default 1."));

if (!options.add_parse_and_check_necessary(new_options)) { return nullptr; }

Expand All @@ -223,6 +263,15 @@ std::shared_ptr<VW::LEARNER::learner> VW::reductions::active_setup(VW::setup_bas
print_update_func = VW::details::print_update_simple_label<active>;
reduction_name.append("-simulation");
}
else if (direct)
{
learn_func = predict_or_learn_active_direct<true>;
pred_func = predict_or_learn_active_direct<false>;
update_stats_func = update_stats_active;
output_example_prediction_func = VW::details::output_example_prediction_simple_label<active>;
print_update_func = VW::details::print_update_simple_label<active>;
learn_returns_prediction = base->learn_returns_prediction;
}
else
{
all.reduction_state.active = true;
Expand Down
18 changes: 15 additions & 3 deletions vowpalwabbit/core/src/reductions/conditional_contextual_bandit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "vw/core/reductions/conditional_contextual_bandit.h"

#include "vw/config/options.h"
#include "vw/core/cb.h"
#include "vw/core/ccb_label.h"
#include "vw/core/ccb_reduction_features.h"
#include "vw/core/constant.h"
Expand Down Expand Up @@ -213,8 +214,12 @@ void clear_pred_and_label(ccb_data& data)
data.actions[data.action_with_label]->l.cb.costs.clear();
}

// true if there exists at least 1 action in the cb multi-example
bool has_action(VW::multi_ex& cb_ex) { return !cb_ex.empty(); }
// true if there exists at least 2 examples (since there can only be up to 1
// shared example), or the 0th example is not shared.
bool has_action(VW::multi_ex& cb_ex)
{
return cb_ex.size() > 1 || (!cb_ex.empty() && !VW::ec_is_example_header_cb(*cb_ex[0]));
}

// This function intentionally does not handle increasing the num_features of the example because
// the output_example function has special logic to ensure the number of features is correctly calculated.
Expand Down Expand Up @@ -309,7 +314,11 @@ void build_cb_example(VW::multi_ex& cb_ex, VW::example* slot, const VW::ccb_labe
// First time seeing this, initialize the vector with falses so we can start setting each included action.
if (data.include_list.empty()) { data.include_list.assign(data.actions.size(), false); }

for (uint32_t included_action_id : explicit_includes) { data.include_list[included_action_id] = true; }
for (uint32_t included_action_id : explicit_includes)
{
// The action may be included but not actually exist in the list of possible actions.
if (included_action_id < data.actions.size()) { data.include_list[included_action_id] = true; }
}
}

// set the available actions in the cb multi-example
Expand Down Expand Up @@ -545,6 +554,9 @@ void update_stats_ccb(const VW::workspace& /* all */, shared_data& sd, const ccb
if (outcome != nullptr)
{
num_labeled++;
// It is possible for the prediction to be empty if there were no actions available at the time of taking the
// slot decision. In this case it does not contribute to loss.
if (preds[i].empty()) { continue; }
if (i == 0 || data.all_slots_loss_report)
{
const float l = VW::get_cost_estimate(outcome->probabilities[VW::details::TOP_ACTION_INDEX], outcome->cost,
Expand Down
56 changes: 56 additions & 0 deletions vowpalwabbit/core/tests/ccb_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,59 @@ TEST(Ccb, InsertInteractionsImplTest)

EXPECT_THAT(result, testing::ContainerEq(expected_after));
}

TEST(Ccb, ExplicitIncludedActionsNonExistentAction)
{
auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet"));
VW::multi_ex examples;
examples.push_back(VW::read_example(*vw, "ccb shared |"));
examples.push_back(VW::read_example(*vw, "ccb action |"));
examples.push_back(VW::read_example(*vw, "ccb slot 0:10:10 10 |"));

vw->learn(examples);

auto& decision_scores = examples[0]->pred.decision_scores;
EXPECT_EQ(decision_scores.size(), 1);
EXPECT_EQ(decision_scores[0].size(), 0);
vw->finish_example(examples);
}

TEST(Ccb, NoAvailableActions)
{
auto vw = VW::initialize(vwtest::make_args("--ccb_explore_adf", "--quiet", "--all_slots_loss"));
{
VW::multi_ex examples;
examples.push_back(VW::read_example(*vw, "ccb shared |"));
examples.push_back(VW::read_example(*vw, "ccb action | a"));
examples.push_back(VW::read_example(*vw, "ccb action | b"));
examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |"));
examples.push_back(VW::read_example(*vw, "ccb slot |"));

vw->learn(examples);

auto& decision_scores = examples[0]->pred.decision_scores;
EXPECT_EQ(decision_scores.size(), 2);
vw->finish_example(examples);
}

{
VW::multi_ex examples;
examples.push_back(VW::read_example(*vw, "ccb shared |"));
examples.push_back(VW::read_example(*vw, "ccb action | a"));
examples.push_back(VW::read_example(*vw, "ccb action | b"));
examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0,1 |"));
// This time restrict slot 1 to only have action 0 available
examples.push_back(VW::read_example(*vw, "ccb slot 0:-1:0.5 0 |"));

vw->predict(examples);

auto& decision_scores = examples[0]->pred.decision_scores;
EXPECT_EQ(decision_scores.size(), 2);
EXPECT_EQ(decision_scores[0].size(), 2);
EXPECT_EQ(decision_scores[0][0].action, 0);
EXPECT_EQ(decision_scores[0][1].action, 1);
EXPECT_EQ(decision_scores[1].size(), 0);

vw->finish_example(examples);
}
}

0 comments on commit da1f106

Please sign in to comment.