Skip to content

Commit

Permalink
Add benchmark code for URL parser
Browse files Browse the repository at this point in the history
* Add benchmark code bench-url.cpp

* Update scripts to download nanobench.h

* Run benchmark in macOS workflow
  • Loading branch information
rmisev committed Nov 5, 2023
1 parent 4c5c326 commit 1279c38
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 4 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/test-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ jobs:
- name: clang++ C++17
cxx_compiler: clang++
cxx_standard: 17
cmake_options: ""
cmake_options: "-DURL_BUILD_BENCH=ON"
after_test: |
build/bench-url test/wpt/urltestdata.json
steps:
- uses: actions/checkout@v4
Expand All @@ -32,4 +34,7 @@ jobs:
- name: build
run: cmake --build build --config Release
- name: test
run: cd build ; ctest -C Release -V
run: ctest --test-dir build -C Release -V
- name: after test
if: ${{ matrix.after_test }}
run: ${{ matrix.after_test }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/build/

# downloadable dependencies
/deps/ankerl/
/deps/doctest/
/deps/picojson/

Expand Down
17 changes: 15 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ endif()

# Options
option(URL_BUILD_TESTS "Build the Upa URL tests." ${URL_MAIN_PROJECT})
option(URL_BUILD_BENCH "Build the Upa URL benchmarks." OFF)
option(URL_BUILD_FUZZER "Build the Upa URL fuzzer." OFF)
option(URL_BUILD_EXAMPLES "Build the Upa URL examples." OFF)
option(URL_BUILD_EXTRACTED "Build Upa URL examples extracted from the docs." OFF)
Expand Down Expand Up @@ -111,8 +112,8 @@ endif()
include_directories(deps)

# Are Upa URL and ICU libraries needed?
if (URL_BUILD_TESTS OR URL_BUILD_FUZZER OR URL_BUILD_EXAMPLES OR URL_BUILD_EXTRACTED OR
URL_INSTALL OR NOT URL_BUILD_TOOLS)
if (URL_BUILD_TESTS OR URL_BUILD_BENCH OR URL_BUILD_FUZZER OR URL_BUILD_EXAMPLES OR
URL_BUILD_EXTRACTED OR URL_INSTALL OR NOT URL_BUILD_TOOLS)
# This library depends on ICU
find_package(ICU REQUIRED COMPONENTS i18n uc)

Expand Down Expand Up @@ -187,6 +188,18 @@ if (URL_BUILD_TESTS)
endforeach()
endif()

# Benchmark targets

if (URL_BUILD_BENCH)
file(GLOB bench_files test/bench-*.cpp)

foreach(file ${bench_files})
get_filename_component(exe_name ${file} NAME_WE)
add_executable(${exe_name} ${file})
target_link_libraries(${exe_name} PRIVATE upa::url)
endforeach()
endif()

# Fuzzer targets

if (URL_BUILD_FUZZER)
Expand Down
Empty file added deps/ankerl/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions deps/download-deps.bat
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ set p=%~dp0

curl -fsS -o %p%\doctest\doctest.h https://raw.githubusercontent.com/doctest/doctest/v2.4.11/doctest/doctest.h
curl -fsS -o %p%\picojson\picojson.h https://raw.githubusercontent.com/kazuho/picojson/111c9be5188f7350c2eac9ddaedd8cca3d7bf394/picojson.h
curl -fsS -o %p%\ankerl/nanobench.h https://raw.githubusercontent.com/martinus/nanobench/v4.3.11/src/include/nanobench.h
1 change: 1 addition & 0 deletions deps/download-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ p="$(dirname "$0")"

curl -fsS -o $p/doctest/doctest.h https://raw.githubusercontent.com/doctest/doctest/v2.4.11/doctest/doctest.h
curl -fsS -o $p/picojson/picojson.h https://raw.githubusercontent.com/kazuho/picojson/111c9be5188f7350c2eac9ddaedd8cca3d7bf394/picojson.h
curl -fsS -o $p/ankerl/nanobench.h https://raw.githubusercontent.com/martinus/nanobench/v4.3.11/src/include/nanobench.h
178 changes: 178 additions & 0 deletions test/bench-url.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#include "upa/url.h"
#include "picojson_fffd.h"

#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <iostream>
#include <string>

#define ANKERL_NANOBENCH_IMPLEMENT
#include "ankerl/nanobench.h"

// -----------------------------------------------------------------------------
// Read samples from text file (URL in each line) and benchmark

int benchmark_txt(const char* file_name, uint64_t min_iters) {
std::vector<std::string> url_strings;

// Load URL samples
std::cout << "Load URL samples from: " << file_name << '\n';
std::ifstream finp(file_name);
if (!finp.is_open()) {
std::cout << "Failed to open " << file_name << '\n';
return 2;
}

std::string line;
while (std::getline(finp, line))
url_strings.push_back(line);

// Run benchmark

ankerl::nanobench::Bench().minEpochIterations(min_iters).run("url_whatwg", [&] {
upa::url url;

for (const auto& str_url : url_strings) {
url.parse(str_url, nullptr);
//std::string str{ url.href() };
//ankerl::nanobench::doNotOptimizeAway(str);
ankerl::nanobench::doNotOptimizeAway(url);
}
});

return 0;
}

// -----------------------------------------------------------------------------
// Read samples from urltestdata.json and benchmark

template <class OnArrayItem>
class root_array_context : public picojson::deny_parse_context {
OnArrayItem on_array_item_;
public:
root_array_context(OnArrayItem on_array_item)
: on_array_item_(on_array_item)
{}

// array as root
bool parse_array_start() { return true; }
bool parse_array_stop(std::size_t) { return true; }

template <typename Iter> bool parse_array_item(picojson::input<Iter>& in, std::size_t) {
picojson::value item;

// parse the array item
picojson::default_parse_context ctx(&item);
if (!picojson::_parse(ctx, in))
return false;

// callback with array item
return on_array_item_(item);
}

// deny object as root
bool parse_object_start() { return false; }
bool parse_object_stop() { return false; }
};

template <typename Context>
bool load_tests(Context& ctx, const char* file_name) {
// Load URL samples
std::cout << "Load URL samples from: " << file_name << '\n';
std::ifstream file(file_name, std::ios_base::in | std::ios_base::binary);
if (!file.is_open()) {
std::cerr << "Can't open file: " << file_name << std::endl;
return false;
}

std::string err;

// for unformatted reading use std::istreambuf_iterator
// http://stackoverflow.com/a/17776228/3908097
picojson::_parse(ctx, std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), &err);

if (!err.empty()) {
std::cerr << err << std::endl;
return false;
}
return true;
}

void benchmark_wpt(const char* file_name, uint64_t min_iters) {
// Load URL strings
std::vector<std::pair<std::string, std::string>> url_samples;

root_array_context context{ [&](const picojson::value& item) {
if (item.is<picojson::object>()) {
try {
const picojson::object& obj = item.get<picojson::object>();
const auto input_val = obj.at("input");
const auto base_val = obj.at("base");

url_samples.emplace_back(
input_val.get<std::string>(),
base_val.is<picojson::null>() ? std::string{} : base_val.get<std::string>());
}
catch (const std::out_of_range& ex) {
std::cout << "[ERR:invalid file]: " << ex.what() << std::endl;
return false;
}
}
return true;
} };

if (!load_tests(context, file_name))
return;

// Run benchmark

ankerl::nanobench::Bench().minEpochIterations(min_iters).run("url_whatwg", [&] {
upa::url url;
upa::url url_base;

for (const auto& url_strings : url_samples) {
upa::url* ptr_base = nullptr;
if (!url_strings.second.empty()) {
if (!upa::success(url_base.parse(url_strings.second, nullptr)))
continue; // invalid base
ptr_base = &url_base;
}
url.parse(url_strings.first, ptr_base);

ankerl::nanobench::doNotOptimizeAway(url);
}
});
}

// -----------------------------------------------------------------------------

uint64_t get_positive_or_default(const char* str, uint64_t def)
{
uint64_t res = std::strtoull(str, nullptr, 10);
if (res > 0)
return res;
return def;
}

int main(int argc, const char* argv[])
{
constexpr uint64_t min_iters_def = 3;

if (argc < 2) {
std::cerr << "Usage: bench-url <URL's file> [<min iterations>]\n";
return 1;
}

const std::filesystem::path file_name = argv[1];
const uint64_t min_iters = argc > 2
? get_positive_or_default(argv[2], min_iters_def)
: min_iters_def;

if (file_name.extension() == ".json")
benchmark_wpt(file_name.string().c_str(), min_iters);
else if (file_name.extension() == ".txt")
benchmark_txt(file_name.string().c_str(), min_iters);

return 0;
}

0 comments on commit 1279c38

Please sign in to comment.