Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmark code for URL parser #24

Merged
merged 1 commit into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/test-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ jobs:
- name: clang++ C++17
cxx_compiler: clang++
cxx_standard: 17
cmake_options: ""
cmake_options: "-DURL_BUILD_BENCH=ON"
after_test: |
build/bench-url test/wpt/urltestdata.json
steps:
- uses: actions/checkout@v4
Expand All @@ -32,4 +34,7 @@ jobs:
- name: build
run: cmake --build build --config Release
- name: test
run: cd build ; ctest -C Release -V
run: ctest --test-dir build -C Release -V
- name: after test
if: ${{ matrix.after_test }}
run: ${{ matrix.after_test }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/build/

# downloadable dependencies
/deps/ankerl/
/deps/doctest/
/deps/picojson/

Expand Down
17 changes: 15 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ endif()

# Options
option(URL_BUILD_TESTS "Build the Upa URL tests." ${URL_MAIN_PROJECT})
option(URL_BUILD_BENCH "Build the Upa URL benchmarks." OFF)
option(URL_BUILD_FUZZER "Build the Upa URL fuzzer." OFF)
option(URL_BUILD_EXAMPLES "Build the Upa URL examples." OFF)
option(URL_BUILD_EXTRACTED "Build Upa URL examples extracted from the docs." OFF)
Expand Down Expand Up @@ -111,8 +112,8 @@ endif()
include_directories(deps)

# Are Upa URL and ICU libraries needed?
if (URL_BUILD_TESTS OR URL_BUILD_FUZZER OR URL_BUILD_EXAMPLES OR URL_BUILD_EXTRACTED OR
URL_INSTALL OR NOT URL_BUILD_TOOLS)
if (URL_BUILD_TESTS OR URL_BUILD_BENCH OR URL_BUILD_FUZZER OR URL_BUILD_EXAMPLES OR
URL_BUILD_EXTRACTED OR URL_INSTALL OR NOT URL_BUILD_TOOLS)
# This library depends on ICU
find_package(ICU REQUIRED COMPONENTS i18n uc)

Expand Down Expand Up @@ -187,6 +188,18 @@ if (URL_BUILD_TESTS)
endforeach()
endif()

# Benchmark targets

if (URL_BUILD_BENCH)
file(GLOB bench_files test/bench-*.cpp)

foreach(file ${bench_files})
get_filename_component(exe_name ${file} NAME_WE)
add_executable(${exe_name} ${file})
target_link_libraries(${exe_name} PRIVATE upa::url)
endforeach()
endif()

# Fuzzer targets

if (URL_BUILD_FUZZER)
Expand Down
Empty file added deps/ankerl/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions deps/download-deps.bat
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ set p=%~dp0

curl -fsS -o %p%\doctest\doctest.h https://raw.githubusercontent.com/doctest/doctest/v2.4.11/doctest/doctest.h
curl -fsS -o %p%\picojson\picojson.h https://raw.githubusercontent.com/kazuho/picojson/111c9be5188f7350c2eac9ddaedd8cca3d7bf394/picojson.h
curl -fsS -o %p%\ankerl/nanobench.h https://raw.githubusercontent.com/martinus/nanobench/v4.3.11/src/include/nanobench.h
1 change: 1 addition & 0 deletions deps/download-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ p="$(dirname "$0")"

curl -fsS -o $p/doctest/doctest.h https://raw.githubusercontent.com/doctest/doctest/v2.4.11/doctest/doctest.h
curl -fsS -o $p/picojson/picojson.h https://raw.githubusercontent.com/kazuho/picojson/111c9be5188f7350c2eac9ddaedd8cca3d7bf394/picojson.h
curl -fsS -o $p/ankerl/nanobench.h https://raw.githubusercontent.com/martinus/nanobench/v4.3.11/src/include/nanobench.h
181 changes: 181 additions & 0 deletions test/bench-url.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#include "upa/url.h"
#include "picojson_fffd.h"

#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <iostream>
#include <string>

#define ANKERL_NANOBENCH_IMPLEMENT
#include "ankerl/nanobench.h"

// -----------------------------------------------------------------------------
// Read samples from text file (URL in each line) and benchmark

int benchmark_txt(const char* file_name, uint64_t min_iters) {
std::vector<std::string> url_strings;

// Load URL samples
std::cout << "Load URL samples from: " << file_name << '\n';
std::ifstream finp(file_name);
if (!finp.is_open()) {
std::cout << "Failed to open " << file_name << '\n';
return 2;
}

std::string line;
while (std::getline(finp, line))
url_strings.push_back(line);

// Run benchmark

ankerl::nanobench::Bench().minEpochIterations(min_iters).run("Upa URL", [&] {
upa::url url;

for (const auto& str_url : url_strings) {
url.parse(str_url, nullptr);

ankerl::nanobench::doNotOptimizeAway(url);
}
});

return 0;
}

// -----------------------------------------------------------------------------
// Read samples from urltestdata.json and benchmark

template <class OnArrayItem>
class root_array_context : public picojson::deny_parse_context {
OnArrayItem on_array_item_;
public:
root_array_context(OnArrayItem on_array_item)
: on_array_item_(on_array_item)
{}

// array as root
bool parse_array_start() { return true; }
bool parse_array_stop(std::size_t) { return true; }

template <typename Iter> bool parse_array_item(picojson::input<Iter>& in, std::size_t) {
picojson::value item;

// parse the array item
picojson::default_parse_context ctx(&item);
if (!picojson::_parse(ctx, in))
return false;

// callback with array item
return on_array_item_(item);
}

// deny object as root
bool parse_object_start() { return false; }
bool parse_object_stop() { return false; }
};

template <typename Context>
bool load_tests(Context& ctx, const char* file_name) {
// Load URL samples
std::cout << "Load URL samples from: " << file_name << '\n';
std::ifstream file(file_name, std::ios_base::in | std::ios_base::binary);
if (!file.is_open()) {
std::cerr << "Can't open file: " << file_name << std::endl;
return false;
}

std::string err;

// for unformatted reading use std::istreambuf_iterator
// http://stackoverflow.com/a/17776228/3908097
picojson::_parse(ctx, std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), &err);

if (!err.empty()) {
std::cerr << err << std::endl;
return false;
}
return true;
}

void benchmark_wpt(const char* file_name, uint64_t min_iters) {
// Load URL strings
std::vector<std::pair<std::string, std::string>> url_samples;

root_array_context context{ [&](const picojson::value& item) {
if (item.is<picojson::object>()) {
try {
const picojson::object& obj = item.get<picojson::object>();
const auto input_val = obj.at("input");
const auto base_val = obj.at("base");

url_samples.emplace_back(
input_val.get<std::string>(),
base_val.is<picojson::null>() ? std::string{} : base_val.get<std::string>());
}
catch (const std::out_of_range& ex) {
std::cout << "[ERR:invalid file]: " << ex.what() << std::endl;
return false;
}
}
return true;
} };

if (!load_tests(context, file_name))
return;

// Run benchmark

ankerl::nanobench::Bench().minEpochIterations(min_iters).run("Upa URL", [&] {
upa::url url;
upa::url url_base;

for (const auto& url_strings : url_samples) {
upa::url* ptr_base = nullptr;
if (!url_strings.second.empty()) {
if (!upa::success(url_base.parse(url_strings.second, nullptr)))
continue; // invalid base
ptr_base = &url_base;
}
url.parse(url_strings.first, ptr_base);

ankerl::nanobench::doNotOptimizeAway(url);
}
});
}

// -----------------------------------------------------------------------------

uint64_t get_positive_or_default(const char* str, uint64_t def)
{
const uint64_t res = std::strtoull(str, nullptr, 10);
if (res > 0)
return res;
return def;
}

int main(int argc, const char* argv[])
{
constexpr uint64_t min_iters_def = 3;

if (argc < 2) {
std::cerr << "Usage: bench-url <file containing URLs> [<min iterations>]\n";
return 1;
}

const std::filesystem::path file_name = argv[1];
const uint64_t min_iters = argc > 2
? get_positive_or_default(argv[2], min_iters_def)
: min_iters_def;

if (file_name.extension() == ".json") {
benchmark_wpt(file_name.string().c_str(), min_iters);
} else if (file_name.extension() == ".txt") {
benchmark_txt(file_name.string().c_str(), min_iters);
} else {
std::cerr << "File containing URLs should have .json or .txt extension.\n";
return 1;
}

return 0;
}
Loading