Skip to content

Commit

Permalink
Merge branch 'master' into vw_slim_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jackgerrits authored Feb 15, 2024
2 parents 4623b9f + 7c2963e commit 22bee5c
Show file tree
Hide file tree
Showing 52 changed files with 3,022 additions and 373 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/asan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [windows-latest, ubuntu-latest, macos-latest]
#os: [windows-latest, ubuntu-latest, macos-latest]
os: [ubuntu-latest, macos-latest] # Temporarily remove windows asan
preset: [vcpkg-asan-debug, vcpkg-ubsan-debug]
exclude:
# UBSan not supported by MSVC on Windows
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_windows_cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
CMAKE_BUILD_DIR: ${{ github.workspace }}/vw/build
SOURCE_DIR: ${{ github.workspace }}/vw
VCPKG_ROOT: ${{ github.workspace }}/vw/ext_libs/vcpkg
VCPKG_REF: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
VCPKG_REF: 53bef8994c541b6561884a8395ea35715ece75db

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ jobs:
runs-on: windows-2019
env:
VCPKG_ROOT: ${{ github.workspace }}\\vcpkg
VCPKG_REF: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1
VCPKG_REF: 53bef8994c541b6561884a8395ea35715ece75db
VCPKG_DEFAULT_BINARY_CACHE: ${{ github.workspace }}\vcpkg_binary_cache
strategy:
matrix:
Expand Down
16 changes: 16 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(ctest) Launch",
"type": "cppdbg",
"cwd": "${workspaceFolder}",
"request": "launch",
"program": "${cmake.testProgram}",
"args": [ "${cmake.testArgs}" ]
}
]
}
58 changes: 29 additions & 29 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ if(VW_FEAT_LDA AND NOT BUILD_PYTHON)
list(APPEND VCPKG_MANIFEST_FEATURES "lda")
endif()

option(BUILD_TESTING "Build tests" ON)
if(BUILD_TESTING)
list(APPEND VCPKG_MANIFEST_FEATURES "tests")
endif()
#option(BUILD_TESTING "Build tests" ON)
#if(BUILD_TESTING)
# list(APPEND VCPKG_MANIFEST_FEATURES "tests")
#endif()

option(BUILD_BENCHMARKS "Build benchmarks" OFF)
if(BUILD_BENCHMARKS)
Expand Down Expand Up @@ -100,6 +100,31 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_VISIBILITY_INLINES_HIDDEN TRUE)
set(CMAKE_CXX_VISIBILITY_PRESET "hidden")

option(VW_USE_ASAN "Compile with AddressSanitizer" OFF)
option(VW_USE_UBSAN "Compile with UndefinedBehaviorSanitizer" OFF)

if(VW_USE_ASAN)
add_compile_definitions(VW_USE_ASAN)
if(MSVC)
add_compile_options(/fsanitize=address)
add_link_options(/InferASanLibs /incremental:no /debug)
else()
add_compile_options(-fsanitize=address -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=address -fno-omit-frame-pointer -g3)
endif()
endif()

if(VW_USE_UBSAN)
add_compile_definitions(VW_USE_UBSAN)
if(MSVC)
message(FATAL_ERROR "UBSan not supported on MSVC")
else()
add_compile_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
endif()
endif()


include(VowpalWabbitUtils)

if(MSVC)
Expand Down Expand Up @@ -152,33 +177,8 @@ option(VW_SSE2NEON_SYS_DEP "Override using the submodule for SSE2Neon dependency
option(VW_BUILD_VW_C_WRAPPER "Enable building the c_wrapper project" ON)
option(vw_BUILD_NET_CORE "Build .NET Core targets" OFF)
option(vw_BUILD_NET_FRAMEWORK "Build .NET Framework targets" OFF)
option(VW_USE_ASAN "Compile with AddressSanitizer" OFF)
option(VW_USE_UBSAN "Compile with UndefinedBehaviorSanitizer" OFF)
option(VW_BUILD_WASM "Add WASM target" OFF)

if(VW_USE_ASAN)
add_compile_definitions(VW_USE_ASAN)
if(MSVC)
add_compile_options(/fsanitize=address /GS- /wd5072)
add_link_options(/InferASanLibs /incremental:no /debug)
# Workaround for MSVC ASan issue here: https://developercommunity.visualstudio.com/t/VS2022---Address-sanitizer-on-x86-Debug-/10116361
add_compile_definitions(_DISABLE_STRING_ANNOTATION)
else()
add_compile_options(-fsanitize=address -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=address -fno-omit-frame-pointer -g3)
endif()
endif()

if(VW_USE_UBSAN)
add_compile_definitions(VW_USE_UBSAN)
if(MSVC)
message(FATAL_ERROR "UBSan not supported on MSVC")
else()
add_compile_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
add_link_options(-fsanitize=undefined -fno-sanitize-recover -fno-omit-frame-pointer -g3)
endif()
endif()

if(VW_INSTALL AND NOT VW_ZLIB_SYS_DEP)
message(WARNING "Installing with a vendored version of zlib is not recommended. Use VW_ZLIB_SYS_DEP to use a system dependency or specify VW_INSTALL=OFF to silence this warning.")
endif()
Expand Down
2 changes: 1 addition & 1 deletion CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
},
"VW_GTEST_SYS_DEP": {
"type": "BOOL",
"value": "ON"
"value": "OFF"
},
"VW_EIGEN_SYS_DEP": {
"type": "BOOL",
Expand Down
2 changes: 1 addition & 1 deletion cmake/VowpalWabbitUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/refs/tags/release-1.11.0.zip
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
Expand Down
2 changes: 1 addition & 1 deletion ext_libs/vcpkg
Submodule vcpkg updated 5927 files
14 changes: 8 additions & 6 deletions python/docs/source/tutorials/DFtoVW_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -802,15 +802,17 @@
"\n",
"# Adding columns for easier visualization\n",
"weights_df[\"feature_name\"] = weights_df.apply(\n",
" lambda row: row.vw_feature_name.split(\"=\")[0]\n",
" if row.is_cat\n",
" else row.vw_feature_name,\n",
" lambda row: (\n",
" row.vw_feature_name.split(\"=\")[0] if row.is_cat else row.vw_feature_name\n",
" ),\n",
" axis=1,\n",
")\n",
"weights_df[\"feature_value\"] = weights_df.apply(\n",
" lambda row: row.vw_feature_name.split(\"=\")[1].zfill(2)\n",
" if row.is_cat\n",
" else row.vw_feature_name,\n",
" lambda row: (\n",
" row.vw_feature_name.split(\"=\")[1].zfill(2)\n",
" if row.is_cat\n",
" else row.vw_feature_name\n",
" ),\n",
" axis=1,\n",
")\n",
"weights_df.sort_values([\"feature_name\", \"feature_value\"], inplace=True)"
Expand Down
3 changes: 1 addition & 2 deletions python/tests/confidence_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,5 @@ def lblogwealth(self, *, t, sumXt, v, eta, s, alpha):

return max(
0,
(sumXt - sqrt(gamma1**2 * ll * v + gamma2**2 * ll**2) - gamma2 * ll)
/ t,
(sumXt - sqrt(gamma1**2 * ll * v + gamma2**2 * ll**2) - gamma2 * ll) / t,
)
32 changes: 17 additions & 15 deletions python/tests/crminustwo.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,21 +440,23 @@ def intervaldiff(
candidates.append(
(
gstar,
None
if isclose(kappa, 0)
else {
"kappastar": kappa,
"betastar": beta,
"gammastar": gamma,
"taustar": tau,
"ufake": ufake,
"wfake": wfake,
"rfake": rex,
"qfunc": lambda c, u, w, r, k=kappa, g=gamma, b=beta, t=tau, s=sign, num=n: -c
* (b + g * u + t * w + s * (u - w) * r)
/ ((num + 1) * k),
"mle": mle,
},
(
None
if isclose(kappa, 0)
else {
"kappastar": kappa,
"betastar": beta,
"gammastar": gamma,
"taustar": tau,
"ufake": ufake,
"wfake": wfake,
"rfake": rex,
"qfunc": lambda c, u, w, r, k=kappa, g=gamma, b=beta, t=tau, s=sign, num=n: -c
* (b + g * u + t * w + s * (u - w) * r)
/ ((num + 1) * k),
"mle": mle,
}
),
)
)

Expand Down
6 changes: 3 additions & 3 deletions python/vowpalwabbit/pyvw.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,9 +532,9 @@ def parse(
for ex in str_ex
]
):
str_ex: List[
Example
] = str_ex # pytype: disable=annotation-type-mismatch
str_ex: List[Example] = (
str_ex # pytype: disable=annotation-type-mismatch
)
return str_ex

if not isinstance(str_ex, (list, str)):
Expand Down
16 changes: 10 additions & 6 deletions test/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,21 @@ def _are_same(expected: Any, actual: Any, key: str) -> Tuple[bool, str]:
elif isinstance(expected, (int, bool, str)):
return (
expected == actual,
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}'"
if expected != actual
else "",
(
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}'"
if expected != actual
else ""
),
)
elif isinstance(expected, (float)):
delta = abs(expected - actual)
return (
delta < epsilon,
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}' (using epsilon: '{epsilon}')"
if delta >= epsilon
else "",
(
f"Key '{key}' value mismatch. Expected: '{expected}', but found: '{actual}' (using epsilon: '{epsilon}')"
if delta >= epsilon
else ""
),
)
elif isinstance(expected, dict):
expected_keys = set(expected.keys())
Expand Down
1 change: 1 addition & 0 deletions test/save_resume_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Test that the models generated with and without --predict_only_model produce the same predictions when loaded in test_mode.
"""

import sys
import os
import optparse
Expand Down
Binary file modified test/train-sets/0001.fb
Binary file not shown.
Binary file modified test/train-sets/ccb.fb
Binary file not shown.
Binary file modified test/train-sets/cs.fb
Binary file not shown.
Binary file modified test/train-sets/multiclass.fb
Binary file not shown.
Binary file modified test/train-sets/multilabel.fb
Binary file not shown.
Binary file modified test/train-sets/rcv1_cb_eval.fb
Binary file not shown.
Binary file modified test/train-sets/rcv1_raw_cb_small.fb
Binary file not shown.
Binary file modified test/train-sets/wiki256_no_label.fb
Binary file not shown.
91 changes: 69 additions & 22 deletions utl/flatbuffer/vw_to_flat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ void to_flat::create_no_label(VW::example* v, ExampleBuilder& ex_builder)
ex_builder.label = VW::parsers::flatbuffer::Createno_label(_builder, (uint8_t)'\000').Union();
}

flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespace(VW::features::audit_iterator begin,
VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash, bool audit)
// Create namespace when audit is true
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespace_audit(
VW::features::audit_iterator begin, VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash)
{
std::vector<flatbuffers::Offset<VW::parsers::flatbuffer::Feature>> fts;
std::stringstream ss;
ss << index;

Expand All @@ -316,26 +316,61 @@ flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespac
if (find_ns_offset == _share_examples.end())
{
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> namespace_offset;
std::vector<flatbuffers::Offset<flatbuffers::String>> feature_names;
std::vector<float> feature_values;
std::vector<uint64_t> feature_hashes;

// new namespace
if (audit)

std::string ns_name;
for (auto it = begin; it != end; ++it)
{
std::string ns_name;
for (auto it = begin; it != end; ++it)
{
ns_name = it.audit()->ns;
fts.push_back(
VW::parsers::flatbuffer::CreateFeatureDirect(_builder, it.audit()->name.c_str(), it.value(), it.index()));
}
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(_builder, ns_name.c_str(), index, &fts, hash);
if ((it.audit()->ns).c_str() != nullptr) ns_name = it.audit()->ns;

(feature_names).push_back(_builder.CreateString(it.audit()->name.c_str()));
(feature_values).push_back(it.value());
(feature_hashes).push_back(it.index());
}
else
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(
_builder, ns_name.c_str(), index, hash, &feature_names, &feature_values, &feature_hashes);

_share_examples[refid] = namespace_offset;
}

return _share_examples[refid];
}

// Create namespace when audit is false
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> to_flat::create_namespace(
features::const_iterator begin, features::const_iterator end, VW::namespace_index index, uint64_t hash)

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-ubsan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-ubsan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-asan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]

Check warning on line 345 in utl/flatbuffer/vw_to_flat.cc

View workflow job for this annotation

GitHub Actions / asan.macos-latest.vcpkg-asan-debug

'features' is deprecated: Moved into VW namespace. Will be removed in VW 10. [-Wdeprecated-declarations]
{
std::stringstream ss;
ss << index;

for (auto it = begin; it != end; ++it) { ss << it.index() << it.value(); }
ss << ":" << hash;

std::string s = ss.str();
uint64_t refid = VW::uniform_hash(s.c_str(), s.size(), 0);
const auto find_ns_offset = _share_examples.find(refid);

if (find_ns_offset == _share_examples.end())
{
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> namespace_offset;
std::vector<float> feature_values;
std::vector<uint64_t> feature_hashes;

for (auto it = begin; it != end; ++it)
{
for (auto it = begin; it != end; ++it)
if (it.value() != 0) // store the feature data only if the value is non zero
{
fts.push_back(VW::parsers::flatbuffer::CreateFeatureDirect(_builder, nullptr, it.value(), it.index()));
(feature_values).push_back(it.value());
(feature_hashes).push_back(it.index());
}
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(_builder, nullptr, index, &fts, hash);
}
namespace_offset = VW::parsers::flatbuffer::CreateNamespaceDirect(
_builder, nullptr, index, hash, nullptr, &feature_values, &feature_hashes);

_share_examples[refid] = namespace_offset;
}

Expand Down Expand Up @@ -438,13 +473,25 @@ void to_flat::convert_txt_to_flat(VW::workspace& all)
VW::details::flatten_namespace_extents(ae->feature_space[ns].namespace_extents, ae->feature_space[ns].size());
auto unflattened_with_ranges_that_dont_have_extents = unflatten_namespace_extents_dont_skip(flattened_extents);

for (const auto& extent : unflattened_with_ranges_that_dont_have_extents)
if (all.output_config.audit || all.output_config.hash_inv)
{
for (const auto& extent : unflattened_with_ranges_that_dont_have_extents)
{
// The extent hash for a non-hash-extent will be 0, which is the same as the field no existing to flatbuffers.
auto created_ns = create_namespace_audit(ae->feature_space[ns].audit_begin() + extent.begin_index,
ae->feature_space[ns].audit_begin() + extent.end_index, ns, extent.hash);
namespaces.push_back(created_ns);
}
}
else
{
// The extent hash for a non-hash-extent will be 0, which is the same as the field no existing to flatbuffers.
auto created_ns = create_namespace(ae->feature_space[ns].audit_begin() + extent.begin_index,
ae->feature_space[ns].audit_begin() + extent.end_index, ns, extent.hash,
all.output_config.audit || all.output_config.hash_inv);
namespaces.push_back(created_ns);
for (const auto& extent : unflattened_with_ranges_that_dont_have_extents)
{
// The extent hash for a non-hash-extent will be 0, which is the same as the field no existing to flatbuffers.
auto created_ns = create_namespace(ae->feature_space[ns].cbegin() + extent.begin_index,
ae->feature_space[ns].cbegin() + extent.end_index, ns, extent.hash);
namespaces.push_back(created_ns);
}
}
}
std::string tag(ae->tag.begin(), ae->tag.size());
Expand Down
6 changes: 4 additions & 2 deletions utl/flatbuffer/vw_to_flat.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class to_flat
void write_to_file(bool collection, bool is_multiline, MultiExampleBuilder& multi_ex_builder,
ExampleBuilder& ex_builder, std::ofstream& outfile);

flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> create_namespace(VW::features::audit_iterator begin,
VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash, bool audit);
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> create_namespace(
VW::features::const_iterator begin, VW::features::const_iterator end, VW::namespace_index index, uint64_t hash);
flatbuffers::Offset<VW::parsers::flatbuffer::Namespace> create_namespace_audit(
VW::features::audit_iterator begin, VW::features::audit_iterator end, VW::namespace_index index, uint64_t hash);
};
Loading

0 comments on commit 22bee5c

Please sign in to comment.