From 819ef20db5f69ee824258ab026b1ae270a3b906c Mon Sep 17 00:00:00 2001 From: Jason Date: Sun, 26 Mar 2023 12:21:30 +0800 Subject: [PATCH] Add macOS Linux support --- .dockerignore | 1 - .github/workflows/build-cpu-linux-x64.yml | 65 + .github/workflows/build-cuda-linux-x64.yml | 65 + .github/workflows/build-darwin-x64.yml | 217 + Build/Darwin_x64/VideoSubFinderCli.run | 4 + Build/Docker/base.Dockerfile | 36 + Build/Docker/base_cuda.Dockerfile | 35 + Build/Docker/build.Dockerfile | 37 + Build/Docker/build.sh | 12 + Build/Docker/build_all.sh | 5 + Build/Docker/build_base.sh | 5 + Build/Docker/build_base_cuda.sh | 5 + Build/Docker/build_cuda.Dockerfile | 35 + Build/Docker/build_cuda.sh | 12 + Build/Docker/run_cuda.Dockerfile | 10 + Build/Docker/run_cuda.sh | 42 + CMakeLists.txt | 2 +- Components/FFMPEGVideo/FFMPEGVideo.h | 2 + Components/IPAlgorithms/IPAlgorithms.h | 6 + Components/Include/__pstl_algorithm | 15 + Components/Include/__pstl_config_site | 13 + Components/Include/__pstl_execution | 15 + Components/Include/__pstl_memory | 15 + Components/Include/__pstl_numeric | 15 + .../Include/pstl/internal/algorithm_fwd.h | 1202 ++++++ .../Include/pstl/internal/algorithm_impl.h | 3819 +++++++++++++++++ .../Include/pstl/internal/execution_defs.h | 100 + .../Include/pstl/internal/execution_impl.h | 105 + .../pstl/internal/glue_algorithm_defs.h | 558 +++ .../pstl/internal/glue_algorithm_impl.h | 1108 +++++ .../pstl/internal/glue_execution_defs.h | 55 + .../Include/pstl/internal/glue_memory_defs.h | 85 + .../Include/pstl/internal/glue_memory_impl.h | 352 ++ .../Include/pstl/internal/glue_numeric_defs.h | 124 + .../Include/pstl/internal/glue_numeric_impl.h | 232 + .../Include/pstl/internal/memory_impl.h | 112 + .../Include/pstl/internal/numeric_fwd.h | 139 + .../Include/pstl/internal/numeric_impl.h | 383 ++ .../Include/pstl/internal/omp/parallel_for.h | 64 + .../pstl/internal/omp/parallel_for_each.h | 59 + .../pstl/internal/omp/parallel_invoke.h | 50 + .../pstl/internal/omp/parallel_merge.h | 98 + .../pstl/internal/omp/parallel_reduce.h | 73 + .../Include/pstl/internal/omp/parallel_scan.h | 136 + .../omp/parallel_stable_partial_sort.h | 33 + .../pstl/internal/omp/parallel_stable_sort.h | 160 + .../internal/omp/parallel_transform_reduce.h | 113 + .../internal/omp/parallel_transform_scan.h | 32 + Components/Include/pstl/internal/omp/util.h | 173 + .../Include/pstl/internal/parallel_backend.h | 37 + .../pstl/internal/parallel_backend_omp.h | 58 + .../pstl/internal/parallel_backend_serial.h | 137 + .../pstl/internal/parallel_backend_tbb.h | 1296 ++++++ .../pstl/internal/parallel_backend_utils.h | 263 ++ .../Include/pstl/internal/parallel_impl.h | 90 + .../Include/pstl/internal/pstl_config.h | 204 + .../pstl/internal/unseq_backend_simd.h | 862 ++++ Components/Include/pstl/internal/utils.h | 177 + Interfaces/VideoSubFinderCli/CMakeLists.txt | 88 + README.md | 39 + docker/build.Dockerfile | 72 - docker/build.sh | 7 - docker/build_cuda.Dockerfile | 71 - 63 files changed, 13283 insertions(+), 152 deletions(-) create mode 100644 .github/workflows/build-cpu-linux-x64.yml create mode 100644 .github/workflows/build-cuda-linux-x64.yml create mode 100644 .github/workflows/build-darwin-x64.yml create mode 100644 Build/Darwin_x64/VideoSubFinderCli.run create mode 100644 Build/Docker/base.Dockerfile create mode 100644 Build/Docker/base_cuda.Dockerfile create mode 100644 Build/Docker/build.Dockerfile create mode 100644 Build/Docker/build.sh create mode 100644 Build/Docker/build_all.sh create mode 100644 Build/Docker/build_base.sh create mode 100644 Build/Docker/build_base_cuda.sh create mode 100644 Build/Docker/build_cuda.Dockerfile create mode 100644 Build/Docker/build_cuda.sh create mode 100644 Build/Docker/run_cuda.Dockerfile create mode 100644 Build/Docker/run_cuda.sh create mode 100644 Components/Include/__pstl_algorithm create mode 100644 Components/Include/__pstl_config_site create mode 100644 Components/Include/__pstl_execution create mode 100644 Components/Include/__pstl_memory create mode 100644 Components/Include/__pstl_numeric create mode 100644 Components/Include/pstl/internal/algorithm_fwd.h create mode 100644 Components/Include/pstl/internal/algorithm_impl.h create mode 100644 Components/Include/pstl/internal/execution_defs.h create mode 100644 Components/Include/pstl/internal/execution_impl.h create mode 100644 Components/Include/pstl/internal/glue_algorithm_defs.h create mode 100644 Components/Include/pstl/internal/glue_algorithm_impl.h create mode 100644 Components/Include/pstl/internal/glue_execution_defs.h create mode 100644 Components/Include/pstl/internal/glue_memory_defs.h create mode 100644 Components/Include/pstl/internal/glue_memory_impl.h create mode 100644 Components/Include/pstl/internal/glue_numeric_defs.h create mode 100644 Components/Include/pstl/internal/glue_numeric_impl.h create mode 100644 Components/Include/pstl/internal/memory_impl.h create mode 100644 Components/Include/pstl/internal/numeric_fwd.h create mode 100644 Components/Include/pstl/internal/numeric_impl.h create mode 100644 Components/Include/pstl/internal/omp/parallel_for.h create mode 100644 Components/Include/pstl/internal/omp/parallel_for_each.h create mode 100644 Components/Include/pstl/internal/omp/parallel_invoke.h create mode 100644 Components/Include/pstl/internal/omp/parallel_merge.h create mode 100644 Components/Include/pstl/internal/omp/parallel_reduce.h create mode 100644 Components/Include/pstl/internal/omp/parallel_scan.h create mode 100644 Components/Include/pstl/internal/omp/parallel_stable_partial_sort.h create mode 100644 Components/Include/pstl/internal/omp/parallel_stable_sort.h create mode 100644 Components/Include/pstl/internal/omp/parallel_transform_reduce.h create mode 100644 Components/Include/pstl/internal/omp/parallel_transform_scan.h create mode 100644 Components/Include/pstl/internal/omp/util.h create mode 100644 Components/Include/pstl/internal/parallel_backend.h create mode 100644 Components/Include/pstl/internal/parallel_backend_omp.h create mode 100644 Components/Include/pstl/internal/parallel_backend_serial.h create mode 100644 Components/Include/pstl/internal/parallel_backend_tbb.h create mode 100644 Components/Include/pstl/internal/parallel_backend_utils.h create mode 100644 Components/Include/pstl/internal/parallel_impl.h create mode 100644 Components/Include/pstl/internal/pstl_config.h create mode 100644 Components/Include/pstl/internal/unseq_backend_simd.h create mode 100644 Components/Include/pstl/internal/utils.h create mode 100644 Interfaces/VideoSubFinderCli/CMakeLists.txt create mode 100644 README.md delete mode 100644 docker/build.Dockerfile delete mode 100644 docker/build.sh delete mode 100644 docker/build_cuda.Dockerfile diff --git a/.dockerignore b/.dockerignore index 0398911..8740349 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,3 @@ /.idea/** -/docker/** /linux_build/** /Build/** \ No newline at end of file diff --git a/.github/workflows/build-cpu-linux-x64.yml b/.github/workflows/build-cpu-linux-x64.yml new file mode 100644 index 0000000..fb47c11 --- /dev/null +++ b/.github/workflows/build-cpu-linux-x64.yml @@ -0,0 +1,65 @@ + +name: Build CPU Linux X64 + +permissions: + contents: write +# Controls when the action will run. +on: + push: + branches: + - '*' + pull_request: + branches: + - '*' + # schedule: + # - cron: "0 0 * * */3" + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + logLevel: + description: 'Log level' + required: true + default: 'warning' + tags: + description: 'Test scenario tags' + +env: + TZ: Asia/Shanghai + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + buildx: + runs-on: ubuntu-22.04 + steps: + - + name: Checkout + uses: actions/checkout@v3 + - + name: Get current date + id: date + run: echo "::set-output name=today::$(date +'%Y%m%d')" + - + name: Assign release version + run: | + cat Interfaces/VideoSubFinderWXW/MyResource.h| grep VSF_VERSION | sed -E 's/.+VSF_VERSION.+"(.+)"/release_version=\1/g' >> $GITHUB_ENV + - + name: Build cpu + run: bash ./Build/Docker/build.sh + - + name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: videosubfinder-cli-cpu-linux-x64.tar.gz + path: | + ./Build/Docker/build/cpu/videosubfinder-cli-cpu-linux-x64.tar.gz + - + name: Release draft + uses: softprops/action-gh-release@v1 + with: + draft: true + name: "${{ env.release_version }}" + tag_name: "${{ env.release_version }}-ci" + files: | + ./Build/Docker/build/cpu/videosubfinder-cli-cpu-linux-x64.tar.gz + body: "[Github Action](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})" \ No newline at end of file diff --git a/.github/workflows/build-cuda-linux-x64.yml b/.github/workflows/build-cuda-linux-x64.yml new file mode 100644 index 0000000..a680b69 --- /dev/null +++ b/.github/workflows/build-cuda-linux-x64.yml @@ -0,0 +1,65 @@ + +name: Build CUDA Linux X64 + +permissions: + contents: write +# Controls when the action will run. +on: + push: + branches: + - '*' + pull_request: + branches: + - '*' + # schedule: + # - cron: "0 0 * * */3" + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + logLevel: + description: 'Log level' + required: true + default: 'warning' + tags: + description: 'Test scenario tags' + +env: + TZ: Asia/Shanghai + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + buildx: + runs-on: ubuntu-22.04 + steps: + - + name: Checkout + uses: actions/checkout@v3 + - + name: Get current date + id: date + run: echo "::set-output name=today::$(date +'%Y%m%d')" + - + name: Assign release version + run: | + cat Interfaces/VideoSubFinderWXW/MyResource.h| grep VSF_VERSION | sed -E 's/.+VSF_VERSION.+"(.+)"/release_version=\1/g' >> $GITHUB_ENV + - + name: Build cuda + run: bash ./Build/Docker/build_cuda.sh + - + name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: videosubfinder-cli-cuda-linux-x64.tar.gz + path: | + ./Build/Docker/build/cuda/videosubfinder-cli-cuda-linux-x64.tar.gz + - + name: Release draft + uses: softprops/action-gh-release@v1 + with: + draft: true + name: "${{ env.release_version }}" + tag_name: "${{ env.release_version }}-ci" + files: | + ./Build/Docker/build/cuda/videosubfinder-cli-cuda-linux-x64.tar.gz + body: "[Github Action](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})" \ No newline at end of file diff --git a/.github/workflows/build-darwin-x64.yml b/.github/workflows/build-darwin-x64.yml new file mode 100644 index 0000000..458bffd --- /dev/null +++ b/.github/workflows/build-darwin-x64.yml @@ -0,0 +1,217 @@ + +name: Build Darwin X64 + +permissions: + contents: write +# Controls when the action will run. +on: + push: + branches: + - '*' + pull_request: + branches: + - '*' + # schedule: + # - cron: "0 0 * * */3" + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + logLevel: + description: 'Log level' + required: true + default: 'warning' + tags: + description: 'Test scenario tags' + +env: + TZ: Asia/Shanghai + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + buildx: + runs-on: macos-12 + steps: + - + name: Checkout + uses: actions/checkout@v3 + - + name: Get current date + id: date + run: echo "::set-output name=today::$(date +'%Y%m%d')" + - + name: Assign release version + run: | + cat Interfaces/VideoSubFinderWXW/MyResource.h| grep VSF_VERSION | sed -E 's/.+VSF_VERSION.+"(.+)"/release_version=\1/g' >> $GITHUB_ENV + - + name: Empty Homebrew downloads cache + run: rm -rfv ~/Library/Caches/Homebrew/downloads/* || true + - + name: Restore Homebrew cache + uses: actions/cache/restore@v3 + with: + path: | + ~/Library/Caches/Homebrew/downloads/ + key: brew- + - + name: Install dependencies + run: HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake wxwidgets@3.2 opencv@4 ffmpeg tbb + - + name: Save Homebrew cache + id: cache-primes-save + uses: actions/cache/save@v3 + with: + path: | + ~/Library/Caches/Homebrew/downloads/ + key: brew- + - + name: Build + run: | + alias nproc="sysctl -n hw.logicalcpu" && \ + cd Build && \ + cmake -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=OFF -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ -DLIBCXX_ENABLE_PARALLEL_ALGORITHMS=YES \ + -DFFMPEG_INCLUDE_DIRS=$(readlink -f /usr/local/Cellar/ffmpeg/5*/include) \ + -DPSTL_PARALLEL_BACKEND="tbb" -DCMAKE_CXX_FLAGS=-isystem\ $(readlink -f /usr/local/Cellar/tbb/2*/include) .. && \ + make VERBOSE=1 -j$(nproc) && \ + true + - + name: Copy dependencies + run: | + cd Build && \ + mkdir work && \ + mkdir work/lib && \ + cd work && \ + cp -fv /usr/local/lib/libwx_baseu-3.2.0.2.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopencv_videoio.407.dylib lib/ || true; \ + cp -fv /usr/local/lib/libavcodec.59.dylib lib/ || true; \ + cp -fv /usr/local/lib/libavformat.59.dylib lib/ || true; \ + cp -fv /usr/local/lib/libavutil.57.dylib lib/ || true; \ + cp -fv /usr/local/lib/libswscale.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libavfilter.8.dylib lib/ || true; \ + cp -fv /usr/local/lib/libtbb.12.dylib lib/ || true; \ + cp -fv /usr/local/lib/libwx_osx_cocoau_core-3.2.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopencv_imgcodecs.407.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopencv_imgproc.407.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopencv_core.407.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsystem_pthread.dylib lib/ || true; \ + cp -fv /usr/local/lib/liblzma.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libGIF.dylib lib/ || true; \ + cp -fv /usr/local/lib/libpcre2-32.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libjpeg.8.dylib lib/ || true; \ + cp -fv /usr/local/lib/libwebp.7.dylib lib/ || true; \ + cp -fv /usr/local/lib/libpng16.16.dylib lib/ || true; \ + cp -fv /usr/local/lib/libtiff.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopenjp2.7.dylib lib/ || true; \ + cp -fv /usr/local/lib/libOpenEXR-3_1.30.dylib lib/ || true; \ + cp -fv /usr/local/lib/libImath-3_1.30.dylib lib/ || true; \ + cp -fv /usr/local/lib/libIlmThread-3_1.30.dylib lib/ || true; \ + cp -fv /usr/local/lib/libIex-3_1.30.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopenblas.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libgfortran.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libgomp.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libquadmath.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libgcc_s.1.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsharpyuv.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libzstd.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libswresample.4.dylib lib/ || true; \ + cp -fv /usr/local/lib/libvpx.8.dylib lib/ || true; \ + cp -fv /usr/local/lib/libwebpmux.3.dylib lib/ || true; \ + cp -fv /usr/local/lib/libaribb24.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libdav1d.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopencore-amrwb.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsnappy.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libaom.3.dylib lib/ || true; \ + cp -fv /usr/local/lib/libvmaf.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libjxl.0.8.dylib lib/ || true; \ + cp -fv /usr/local/lib/libmp3lame.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopencore-amrnb.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libopus.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/librav1e.0.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libspeex.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libSvtAv1Enc.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libtheoraenc.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libtheoradec.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libogg.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libvorbis.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libvorbisenc.2.dylib lib/ || true; \ + cp -fv /usr/local/lib/libx264.164.dylib lib/ || true; \ + cp -fv /usr/local/lib/libx265.199.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsoxr.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libX11.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libxcb.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libXau.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libXdmcp.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libhwy.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libbrotlidec.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libbrotlicommon.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libbrotlienc.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libbluray.2.dylib lib/ || true; \ + cp -fv /usr/local/lib/libgnutls.30.dylib lib/ || true; \ + cp -fv /usr/local/lib/librist.4.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsrt.1.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libzmq.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libfontconfig.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libfreetype.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libintl.8.dylib lib/ || true; \ + cp -fv /usr/local/lib/libp11-kit.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libidn2.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libunistring.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libtasn1.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libnettle.8.dylib lib/ || true; \ + cp -fv /usr/local/lib/libhogweed.6.dylib lib/ || true; \ + cp -fv /usr/local/lib/libgmp.10.dylib lib/ || true; \ + cp -fv /usr/local/lib/libmbedcrypto.13.dylib lib/ || true; \ + cp -fv /usr/local/lib/libcjson.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libssl.1.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libcrypto.1.1.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsodium.23.dylib lib/ || true; \ + cp -fv /usr/local/lib/libpostproc.56.dylib lib/ || true; \ + cp -fv /usr/local/lib/librubberband.2.dylib lib/ || true; \ + cp -fv /usr/local/lib/libsamplerate.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libtesseract.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libass.9.dylib lib/ || true; \ + cp -fv /usr/local/lib/libvidstab.1.2.dylib lib/ || true; \ + cp -fv /usr/local/lib/libzimg.2.dylib lib/ || true; \ + cp -fv /usr/local/lib/liblept.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libfribidi.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libharfbuzz.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libunibreak.5.dylib lib/ || true; \ + cp -fv /usr/local/lib/libglib-2.0.0.dylib lib/ || true; \ + cp -fv /usr/local/lib/libgraphite2.3.dylib lib/ || true; \ + cp -fv /usr/local/lib/libpcre2-8.0.dylib lib/ || true; \ + cp -fv ../Interfaces/VideoSubFinderCli/VideoSubFinderCli . && \ + cp -fv ../Darwin_x64/* . && \ + true + - + name: Archive artifacts + uses: thedoctor0/zip-release@0.7.1 + with: + type: 'tar' + filename: 'videosubfinder-cli-darwin-x64.tar.gz' + directory: 'Build/work' + path: "./VideoSubFinderCli" + - + name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: videosubfinder-cli-darwin-x64.tar.gz + path: | + ./Build/work/videosubfinder-cli-darwin-x64.tar.gz + - + name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: dependencies.tar.gz + path: | + ./Build/work/lib + - + name: Release draft + uses: softprops/action-gh-release@v1 + with: + draft: true + name: "${{ env.release_version }}" + tag_name: "${{ env.release_version }}-ci" + files: | + ./Build/work/videosubfinder-cli-darwin-x64.tar.gz + body: "[Github Action](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})" \ No newline at end of file diff --git a/Build/Darwin_x64/VideoSubFinderCli.run b/Build/Darwin_x64/VideoSubFinderCli.run new file mode 100644 index 0000000..1cb5da5 --- /dev/null +++ b/Build/Darwin_x64/VideoSubFinderCli.run @@ -0,0 +1,4 @@ +#!/bin/sh +cd ${0%/*} +export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$PWD/lib:/lib64 +./VideoSubFinderCli "$@" diff --git a/Build/Docker/base.Dockerfile b/Build/Docker/base.Dockerfile new file mode 100644 index 0000000..2a15f61 --- /dev/null +++ b/Build/Docker/base.Dockerfile @@ -0,0 +1,36 @@ +FROM ubuntu:20.04 as builder +# Allow ubuntu to cache package downloads +RUN rm -f /etc/apt/apt.conf.d/docker-clean +RUN --mount=type=cache,target=/var/cache/apt \ + apt update +RUN --mount=type=cache,target=/var/cache/apt \ + DEBIAN_FRONTEND=noninteractive apt install -y git cmake wget libtbb-dev \ + libavcodec-dev libgtk-3-dev libavformat-dev libswscale-dev libavfilter-dev build-essential \ + && if [[ "USE_GUI" = "1" ]] ; then DEBIAN_FRONTEND=noninteractive apt install -y \ + libgtk-3-dev ffmpeg libavutil-dev libx264-dev \ + ;fi +RUN mkdir -p /tmp/work \ + && cd /tmp/work \ + && git clone https://github.com/wxWidgets/wxWidgets.git \ + && cd wxWidgets/ \ + && git checkout v3.2.1 \ + && git submodule update --init --recursive \ + && mkdir buildgtk \ + && cd buildgtk/ \ + && ../configure --disable-gui \ + && make -j$(nproc) \ + && make install \ + && rm -rf /tmp/work/wxWidgets +RUN cd /tmp/work \ + && wget https://github.com/opencv/opencv/archive/4.7.0.tar.gz \ + && tar xvf 4.7.0.tar.gz \ + && cd opencv-4.7.0/ \ + && mkdir -p build \ + && cd build \ + && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GTK=OFF -DWITH_FFMPEG=ON -D CMAKE_BUILD_TYPE=RELEASE \ + -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_V4L=ON -D WITH_OPENGL=ON \ + -D WITH_CUBLAS=ON -DWITH_QT=OFF -DCUDA_NVCC_FLAGS="-D_FORCE_INLINES" .. \ + && cmake --build . --config Release -j $(nproc) \ + && make install \ + && rm -rf /tmp/work/opencv-4.7.0 \ + && rm -f /tmp/work/4.7.0.tar.gz \ No newline at end of file diff --git a/Build/Docker/base_cuda.Dockerfile b/Build/Docker/base_cuda.Dockerfile new file mode 100644 index 0000000..20c5c30 --- /dev/null +++ b/Build/Docker/base_cuda.Dockerfile @@ -0,0 +1,35 @@ +FROM nvidia/cuda:11.7.0-devel-ubuntu20.04 as builder +# Allow ubuntu to cache package downloads +RUN rm -f /etc/apt/apt.conf.d/docker-clean +ARG USE_GUI=0 +RUN --mount=type=cache,target=/var/cache/apt \ + apt update +RUN --mount=type=cache,target=/var/cache/apt \ + DEBIAN_FRONTEND=noninteractive apt install -y git cmake wget libtbb-dev \ + libavcodec-dev libgtk-3-dev libavformat-dev libswscale-dev libavfilter-dev \ + && if [[ "USE_GUI" = "1" ]] ; then DEBIAN_FRONTEND=noninteractive apt install -y \ + build-essential libgtk-3-dev ffmpeg libavutil-dev libx264-dev \ + ;fi +RUN mkdir -p /tmp/work \ + && cd /tmp/work \ + && git clone https://github.com/wxWidgets/wxWidgets.git --branch v3.2.1 --depth=1 --recurse-submodules -j8 \ + && cd wxWidgets/ \ + && mkdir buildgtk \ + && cd buildgtk/ \ + && ../configure --disable-gui \ + && make -j$(nproc) \ + && make install \ + && rm -rf /tmp/work/wxWidgets +RUN cd /tmp/work \ + && wget https://github.com/opencv/opencv/archive/4.7.0.tar.gz \ + && tar xvf 4.7.0.tar.gz \ + && cd opencv-4.7.0/ \ + && mkdir -p build \ + && cd build \ + && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GTK=OFF -DWITH_FFMPEG=ON -D CMAKE_BUILD_TYPE=RELEASE \ + -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_V4L=ON -D WITH_OPENGL=ON \ + -D WITH_CUBLAS=ON -DWITH_QT=OFF -DCUDA_NVCC_FLAGS="-D_FORCE_INLINES" .. \ + && cmake --build . --config Release -j $(nproc) \ + && make install \ + && rm -rf /tmp/work/opencv-4.7.0 \ + && rm -f /tmp/work/4.7.0.tar.gz \ No newline at end of file diff --git a/Build/Docker/build.Dockerfile b/Build/Docker/build.Dockerfile new file mode 100644 index 0000000..3e41e22 --- /dev/null +++ b/Build/Docker/build.Dockerfile @@ -0,0 +1,37 @@ +FROM eritpchy/videosubfinder-build:base +COPY . /tmp/work/videosubfinder-src + +RUN cd /tmp/work/videosubfinder-src \ + && rm -rf linux_build \ + && mkdir -p linux_build \ + && cd linux_build/ \ + && cmake -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=OFF .. \ + && cmake --build . --config Release -j $(nproc) \ + && cp ./Interfaces/VideoSubFinderCli/VideoSubFinderCli /tmp/work/ \ + && rm -rf /tmp/work/videosubfinder-src +RUN cp -L /usr/local/lib/libwx_baseu-3.2.so.0 \ + /usr/local/lib/libopencv_videoio.so.407 \ + /usr/local/lib/libopencv_core.so.407 \ + /usr/local/lib/libopencv_imgproc.so.407 \ + /usr/local/lib/libopencv_imgcodecs.so.407 \ + /tmp/work/ + + + + + + + + + + + + + + + + + + + + diff --git a/Build/Docker/build.sh b/Build/Docker/build.sh new file mode 100644 index 0000000..c87c847 --- /dev/null +++ b/Build/Docker/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e +cd ${0%/*} +if [[ "$GITHUB_ACTION" ]]; then + docker buildx build --cache-from type=gha --cache-to type=gha,mode=max \ + -t videosubfinder-build:cpu -f build.Dockerfile ../.. +else + docker build -t videosubfinder-build:cpu -f build.Dockerfile ../.. +fi +mkdir -p build/cpu/ +docker run --rm -v $PWD/build/cpu/:$PWD/build/cpu/ videosubfinder-build:cpu \ + bash -c "cd /tmp/work/ && tar cvzf $PWD/build/cpu/videosubfinder-cli-cpu-linux-x64.tar.gz *" \ No newline at end of file diff --git a/Build/Docker/build_all.sh b/Build/Docker/build_all.sh new file mode 100644 index 0000000..d8df994 --- /dev/null +++ b/Build/Docker/build_all.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +cd ${0%/*} +./build.sh +./build_cuda.sh \ No newline at end of file diff --git a/Build/Docker/build_base.sh b/Build/Docker/build_base.sh new file mode 100644 index 0000000..7ed5eb4 --- /dev/null +++ b/Build/Docker/build_base.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +cd ${0%/*} +docker build -t eritpchy/videosubfinder-build:base -f base.Dockerfile ../.. +docker push eritpchy/videosubfinder-build:base \ No newline at end of file diff --git a/Build/Docker/build_base_cuda.sh b/Build/Docker/build_base_cuda.sh new file mode 100644 index 0000000..de506e7 --- /dev/null +++ b/Build/Docker/build_base_cuda.sh @@ -0,0 +1,5 @@ +#!/bin/bash +set -e +cd ${0%/*} +docker build -t eritpchy/videosubfinder-build:base-cuda -f base_cuda.Dockerfile ../.. +docker push eritpchy/videosubfinder-build:base-cuda \ No newline at end of file diff --git a/Build/Docker/build_cuda.Dockerfile b/Build/Docker/build_cuda.Dockerfile new file mode 100644 index 0000000..7b855e2 --- /dev/null +++ b/Build/Docker/build_cuda.Dockerfile @@ -0,0 +1,35 @@ +FROM eritpchy/videosubfinder-build:base-cuda +COPY . /tmp/work/videosubfinder-src +RUN CUDA_DIR="$(ls -d1 /usr/local/cuda-*|head -1)" \ + && ln -s $CUDA_DIR/targets/x86_64-linux/lib/libcudart.so /usr/lib/libcudart.so \ + && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_DIR/lib64:$CUDA_DIR/extras/CUPTI/lib64 export PATH=$PATH:$CUDA_DIR/bin \ + && cd /tmp/work/videosubfinder-src \ + && rm -rf linux_build \ + && mkdir -p linux_build \ + && cd linux_build/ \ + && cmake -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=ON .. \ + && cmake --build . --config Release -j $(nproc) \ + && if [[ "USE_GUI" = "1" ]] ; then cp ./Interfaces/VideoSubFinderWXW/VideoSubFinderWXW /tmp/work/; \ + else cp ./Interfaces/VideoSubFinderCli/VideoSubFinderCli /tmp/work/ ; fi \ + && rm -rf /tmp/work/videosubfinder-src +RUN cp -L /usr/local/lib/libwx_baseu-3.2.so.0 \ + /usr/local/lib/libopencv_videoio.so.407 \ + /usr/local/lib/libopencv_core.so.407 \ + /usr/local/lib/libopencv_imgproc.so.407 \ + /usr/local/lib/libopencv_imgcodecs.so.407 \ + /tmp/work/ + + + + + + + + + + + + + + + diff --git a/Build/Docker/build_cuda.sh b/Build/Docker/build_cuda.sh new file mode 100644 index 0000000..749f59d --- /dev/null +++ b/Build/Docker/build_cuda.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e +cd ${0%/*} +if [[ "$GITHUB_ACTION" ]]; then + docker buildx build --cache-from type=gha --cache-to type=gha,mode=max \ + -t videosubfinder-build:cuda -f build_cuda.Dockerfile ../.. +else + docker build -t videosubfinder-build:cuda -f build_cuda.Dockerfile ../.. +fi +mkdir -p build/cuda/ +docker run --rm -v $PWD/build/cuda/:$PWD/build/cuda/ videosubfinder-build:cuda \ + bash -c "cd /tmp/work/ && tar cvzf $PWD/build/cuda/videosubfinder-cli-cuda-linux-x64.tar.gz *" \ No newline at end of file diff --git a/Build/Docker/run_cuda.Dockerfile b/Build/Docker/run_cuda.Dockerfile new file mode 100644 index 0000000..41eee74 --- /dev/null +++ b/Build/Docker/run_cuda.Dockerfile @@ -0,0 +1,10 @@ +FROM nvidia/cuda:11.7.0-runtime-ubuntu20.04 as builder +# Allow ubuntu to cache package downloads +RUN rm -f /etc/apt/apt.conf.d/docker-clean +ARG USE_GUI=0 +RUN --mount=type=cache,target=/var/cache/apt \ + apt update \ + && DEBIAN_FRONTEND=noninteractive apt install -y libavcodec58 \ + libavformat58 libswscale5 libavfilter7 libpcre2-32-0 libtbb2 +ADD build/cuda/videosubfinder-cli-cuda-linux-x64.tar.gz /usr/lib +RUN ls /usr/lib -l && mv /usr/lib/VideoSubFinderCli / \ No newline at end of file diff --git a/Build/Docker/run_cuda.sh b/Build/Docker/run_cuda.sh new file mode 100644 index 0000000..243be84 --- /dev/null +++ b/Build/Docker/run_cuda.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -e +cd ${0%/*} +while :; do + while getopts i:o:-: arg; do + case $arg in + i) + input="$OPTARG" + echo Option $arg specified. + ;; + o) + output="$OPTARG" + echo $output + echo Option $arg specified. + ;; + *) + echo Unknown option: $OPTARG. 2 >/dev/null + ;; + esac + [ ! -z "$input" ] && [ ! -z "$output" ] && break; + done + ((OPTIND++)) + [ $OPTIND -gt $# ] && break +done + +if [[ ! -z "$input" ]]; then + input="$(realpath "$input")" + input="${input%/*}" +fi + +if [[ ! -z "$output" ]]; then + output="$(realpath "$output")" +fi +echo Input: $input +echo Output: $output + +docker build -t videosubfinder:cuda -f run_cuda.Dockerfile . +if [[ "$input" = "$output" ]]; then +docker run -it --gpus all --rm -v "$input":"$input" videosubfinder:cuda /VideoSubFinderCli "$@" +else +docker run -it --gpus all --rm -v "$input":"$input" -v "$output":"$output" videosubfinder:cuda /VideoSubFinderCli "$@" +fi diff --git a/CMakeLists.txt b/CMakeLists.txt index 01041f9..f2cf4c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ else() else() find_package(wxWidgets REQUIRED COMPONENTS base) endif(USE_GUI) - find_package(OpenCV REQUIRED) + find_package(OpenCV REQUIRED COMPONENTS videoio) endif(WIN32) if(USE_CUDA) diff --git a/Components/FFMPEGVideo/FFMPEGVideo.h b/Components/FFMPEGVideo/FFMPEGVideo.h index 92c9b46..97d18a4 100644 --- a/Components/FFMPEGVideo/FFMPEGVideo.h +++ b/Components/FFMPEGVideo/FFMPEGVideo.h @@ -88,6 +88,8 @@ class FFMPEGVideo: public CVideo AVStream *video = NULL; #ifdef WIN32 const AVCodec *decoder = NULL; +#elif defined(__APPLE__) + const AVCodec *decoder = NULL; #else AVCodec *decoder = NULL; #endif diff --git a/Components/IPAlgorithms/IPAlgorithms.h b/Components/IPAlgorithms/IPAlgorithms.h index 64224dc..14378f6 100644 --- a/Components/IPAlgorithms/IPAlgorithms.h +++ b/Components/IPAlgorithms/IPAlgorithms.h @@ -22,8 +22,14 @@ #include #include #include +#ifdef __APPLE__ +#define _LIBCPP_HAS_PARALLEL_ALGORITHMS +#include "__pstl_execution" +#include "__pstl_algorithm" +#else #include #include +#endif #include #include #include diff --git a/Components/Include/__pstl_algorithm b/Components/Include/__pstl_algorithm new file mode 100644 index 0000000..79c1838 --- /dev/null +++ b/Components/Include/__pstl_algorithm @@ -0,0 +1,15 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __PSTL_ALGORITHM +#define __PSTL_ALGORITHM + +#include + +#endif /* __PSTL_ALGORITHM */ diff --git a/Components/Include/__pstl_config_site b/Components/Include/__pstl_config_site new file mode 100644 index 0000000..f33bbc2 --- /dev/null +++ b/Components/Include/__pstl_config_site @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __PSTL_CONFIG_SITE +#define __PSTL_CONFIG_SITE + +#define _PSTL_PAR_BACKEND_TBB +#endif // __PSTL_CONFIG_SITE diff --git a/Components/Include/__pstl_execution b/Components/Include/__pstl_execution new file mode 100644 index 0000000..0e2cd44 --- /dev/null +++ b/Components/Include/__pstl_execution @@ -0,0 +1,15 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __PSTL_EXECUTION +#define __PSTL_EXECUTION + +#include + +#endif /* __PSTL_EXECUTION */ diff --git a/Components/Include/__pstl_memory b/Components/Include/__pstl_memory new file mode 100644 index 0000000..12b7f5a --- /dev/null +++ b/Components/Include/__pstl_memory @@ -0,0 +1,15 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __PSTL_MEMORY +#define __PSTL_MEMORY + +#include + +#endif /* __PSTL_MEMORY */ diff --git a/Components/Include/__pstl_numeric b/Components/Include/__pstl_numeric new file mode 100644 index 0000000..cf168ef --- /dev/null +++ b/Components/Include/__pstl_numeric @@ -0,0 +1,15 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __PSTL_NUMERIC +#define __PSTL_NUMERIC + +#include + +#endif /* __PSTL_NUMERIC */ diff --git a/Components/Include/pstl/internal/algorithm_fwd.h b/Components/Include/pstl/internal/algorithm_fwd.h new file mode 100644 index 0000000..3dcf9e9 --- /dev/null +++ b/Components/Include/pstl/internal/algorithm_fwd.h @@ -0,0 +1,1202 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_ALGORITHM_FWD_H +#define _PSTL_ALGORITHM_FWD_H + +#include +#include +#include + +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +//------------------------------------------------------------------------ +// any_of +//------------------------------------------------------------------------ + +template +bool +__brick_any_of(const _ForwardIterator, const _ForwardIterator, _Pred, + /*__is_vector=*/std::false_type) noexcept; + +template +bool +__brick_any_of(const _RandomAccessIterator, const _RandomAccessIterator, _Pred, + /*__is_vector=*/std::true_type) noexcept; + +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept; + +template +bool +__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred); + +//------------------------------------------------------------------------ +// walk1 (pseudo) +// +// walk1 evaluates f(x) for each dereferenced value x drawn from [first,last) +//------------------------------------------------------------------------ + +template +void __brick_walk1(_ForwardIterator, _ForwardIterator, _Function, + /*vector=*/std::false_type) noexcept; + +template +void __brick_walk1(_RandomAccessIterator, _RandomAccessIterator, _Function, + /*vector=*/std::true_type) noexcept; + +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function) noexcept; + +template +void +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function); + +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick) noexcept; + +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Brick); + +//------------------------------------------------------------------------ +// walk1_n +//------------------------------------------------------------------------ + +template +_ForwardIterator __brick_walk1_n(_ForwardIterator, _Size, _Function, + /*_IsVectorTag=*/std::false_type); + +template +_RandomAccessIterator __brick_walk1_n(_RandomAccessIterator, _DifferenceType, _Function, + /*vectorTag=*/std::true_type) noexcept; + +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function) noexcept; + +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function); + +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Brick) noexcept; + +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick); + +//------------------------------------------------------------------------ +// walk2 (pseudo) +// +// walk2 evaluates f(x,y) for deferenced values (x,y) drawn from [first1,last1) and [first2,...) +//------------------------------------------------------------------------ + +template +_ForwardIterator2 __brick_walk2(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, + /*vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator2 __brick_walk2(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Function, + /*vector=*/std::true_type) noexcept; + +template +_ForwardIterator2 __brick_walk2_n(_ForwardIterator1, _Size, _ForwardIterator2, _Function, + /*vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator2 __brick_walk2_n(_RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function, + /*vector=*/std::true_type) noexcept; + +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Function); + +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, + _Function); + +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Brick) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Brick); + +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, + _RandomAccessIterator2, _Brick); + +//------------------------------------------------------------------------ +// walk3 (pseudo) +// +// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) +//------------------------------------------------------------------------ + +template +_ForwardIterator3 __brick_walk3(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, _Function, + /*vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator3 __brick_walk3(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator3, _Function, + /*vector=*/std::true_type) noexcept; + +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, + _Function) noexcept; + +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _Function); + +//------------------------------------------------------------------------ +// equal +//------------------------------------------------------------------------ + +template +bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate, + /* is_vector = */ std::false_type) noexcept; + +template +bool __brick_equal(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BinaryPredicate, + /* is_vector = */ std::true_type) noexcept; + +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _BinaryPredicate) noexcept; + +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); + +template +bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate, + /* is_vector = */ std::false_type) noexcept; + +template +bool __brick_equal(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _BinaryPredicate, /* is_vector = */ std::true_type) noexcept; + +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; + +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); + +//------------------------------------------------------------------------ +// find_if +//------------------------------------------------------------------------ + +template +_ForwardIterator __brick_find_if(_ForwardIterator, _ForwardIterator, _Predicate, + /*is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator __brick_find_if(_RandomAccessIterator, _RandomAccessIterator, _Predicate, + /*is_vector=*/std::true_type) noexcept; + +template +_ForwardIterator +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; + +template +_RandomAccessIterator +__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); + +//------------------------------------------------------------------------ +// find_end +//------------------------------------------------------------------------ + +template +_ForwardIterator1 __brick_find_end(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate, + /*__is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator1 __brick_find_end(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _BinaryPredicate, + /*__is_vector=*/std::true_type) noexcept; + +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; + +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// find_first_of +//------------------------------------------------------------------------ + +template +_ForwardIterator1 __brick_find_first_of(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate, + /*__is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator1 __brick_find_first_of(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _BinaryPredicate, + /*__is_vector=*/std::true_type) noexcept; + +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _BinaryPredicate) noexcept; + +template +_RandomAccessIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// search +//------------------------------------------------------------------------ + +template +_ForwardIterator1 __brick_search(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate, + /*vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator1 __brick_search(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _BinaryPredicate, + /*vector=*/std::true_type) noexcept; + +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; + +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// search_n +//------------------------------------------------------------------------ + +template +_ForwardIterator +__brick_search_n(_ForwardIterator, _ForwardIterator, _Size, const _Tp&, _BinaryPredicate, + /*vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator +__brick_search_n(_RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, _BinaryPredicate, + /*vector=*/std::true_type) noexcept; + +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, + _BinaryPredicate) noexcept; + +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, + const _Tp&, _BinaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// copy_n +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_copy_n(_ForwardIterator, _Size, _OutputIterator, + /*vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_copy_n(_RandomAccessIterator, _Size, _OutputIterator, + /*vector=*/std::true_type) noexcept; + +//------------------------------------------------------------------------ +// copy +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_copy(_ForwardIterator, _ForwardIterator, _OutputIterator, + /*vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, + /*vector=*/std::true_type) noexcept; + +//------------------------------------------------------------------------ +// move +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_move(_ForwardIterator, _ForwardIterator, _OutputIterator, + /*vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_move(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, + /*vector=*/std::true_type) noexcept; + +//------------------------------------------------------------------------ +// swap_ranges +//------------------------------------------------------------------------ +template +_OutputIterator __brick_swap_ranges(_ForwardIterator, _ForwardIterator, _OutputIterator, + /*vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_swap_ranges(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, + /*vector=*/std::true_type) noexcept; + +//------------------------------------------------------------------------ +// copy_if +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_copy_if(_ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate, + /*vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_copy_if(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate, + /*vector=*/std::true_type) noexcept; + +template +std::pair<_DifferenceType, _DifferenceType> +__brick_calc_mask_1(_ForwardIterator, _ForwardIterator, bool* __restrict, _UnaryPredicate, + /*vector=*/std::false_type) noexcept; +template +std::pair<_DifferenceType, _DifferenceType> +__brick_calc_mask_1(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _UnaryPredicate, + /*vector=*/std::true_type) noexcept; + +template +void +__brick_copy_by_mask(_ForwardIterator, _ForwardIterator, _OutputIterator, bool*, + /*vector=*/std::false_type) noexcept; + +template +void +__brick_copy_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, bool* __restrict, + /*vector=*/std::true_type) noexcept; + +template +void +__brick_partition_by_mask(_ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, bool*, + /*vector=*/std::false_type) noexcept; + +template +void +__brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*, + /*vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryPredicate) noexcept; + +template +_OutputIterator +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryPredicate); + +//------------------------------------------------------------------------ +// count +//------------------------------------------------------------------------ + +template +typename std::iterator_traits<_RandomAccessIterator>::difference_type + __brick_count(_RandomAccessIterator, _RandomAccessIterator, _Predicate, + /* is_vector = */ std::true_type) noexcept; + +template +typename std::iterator_traits<_ForwardIterator>::difference_type + __brick_count(_ForwardIterator, _ForwardIterator, _Predicate, + /* is_vector = */ std::false_type) noexcept; + +template +typename std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; + +template +typename std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); + +//------------------------------------------------------------------------ +// unique +//------------------------------------------------------------------------ + +template +_ForwardIterator __brick_unique(_ForwardIterator, _ForwardIterator, _BinaryPredicate, + /*is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator __brick_unique(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, + /*is_vector=*/std::true_type) noexcept; + +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate) noexcept; + +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// unique_copy +//------------------------------------------------------------------------ + +template +OutputIterator __brick_unique_copy(_ForwardIterator, _ForwardIterator, OutputIterator, _BinaryPredicate, + /*vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_unique_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _BinaryPredicate, + /*vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryPredicate) noexcept; + +template +_DifferenceType +__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate, + /*vector=*/std::false_type) noexcept; + +template +_DifferenceType +__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate, + /*vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _BinaryPredicate); + +//------------------------------------------------------------------------ +// reverse +//------------------------------------------------------------------------ + +template +void __brick_reverse(_BidirectionalIterator, _BidirectionalIterator, + /*__is_vector=*/std::false_type) noexcept; + +template +void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator, + /*__is_vector=*/std::true_type) noexcept; + +template +void __brick_reverse(_BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, + /*is_vector=*/std::false_type) noexcept; + +template +void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + /*is_vector=*/std::true_type) noexcept; + +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator) noexcept; + +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator); + +//------------------------------------------------------------------------ +// reverse_copy +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_reverse_copy(_BidirectionalIterator, _BidirectionalIterator, _OutputIterator, + /*is_vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_reverse_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, + /*is_vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _OutputIterator) noexcept; + +template +_OutputIterator +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator); + +//------------------------------------------------------------------------ +// rotate +//------------------------------------------------------------------------ + +template +_ForwardIterator __brick_rotate(_ForwardIterator, _ForwardIterator, _ForwardIterator, + /*is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator __brick_rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + /*is_vector=*/std::true_type) noexcept; + +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator) noexcept; + +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator); + +//------------------------------------------------------------------------ +// rotate_copy +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_rotate_copy(_ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator, + /*__is_vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_rotate_copy(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, + /*__is_vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, + _OutputIterator) noexcept; + +template +_OutputIterator +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _OutputIterator); + +//------------------------------------------------------------------------ +// is_partitioned +//------------------------------------------------------------------------ + +template +bool __brick_is_partitioned(_ForwardIterator, _ForwardIterator, _UnaryPredicate, + /*is_vector=*/std::false_type) noexcept; + +template +bool __brick_is_partitioned(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, + /*is_vector=*/std::true_type) noexcept; + +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; + +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); + +//------------------------------------------------------------------------ +// partition +//------------------------------------------------------------------------ + +template +_ForwardIterator __brick_partition(_ForwardIterator, _ForwardIterator, _UnaryPredicate, + /*is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator __brick_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, + /*is_vector=*/std::true_type) noexcept; + +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; + +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); + +//------------------------------------------------------------------------ +// stable_partition +//------------------------------------------------------------------------ + +template +_BidirectionalIterator __brick_stable_partition(_BidirectionalIterator, _BidirectionalIterator, _UnaryPredicate, + /*__is_vector=*/std::false_type) noexcept; + +template +_RandomAccessIterator __brick_stable_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, + /*__is_vector=*/std::true_type) noexcept; + +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _UnaryPredicate) noexcept; + +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// partition_copy +//------------------------------------------------------------------------ + +template +std::pair<_OutputIterator1, _OutputIterator2> + __brick_partition_copy(_ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, _UnaryPredicate, + /*is_vector=*/std::false_type) noexcept; + +template +std::pair<_OutputIterator1, _OutputIterator2> __brick_partition_copy(_RandomAccessIterator, _RandomAccessIterator, + _OutputIterator1, _OutputIterator2, + _UnaryPredicate, + /*is_vector=*/std::true_type) noexcept; + +template +std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, + _OutputIterator2, _UnaryPredicate) noexcept; + +template +std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator1, _OutputIterator2, _UnaryPredicate); + +//------------------------------------------------------------------------ +// sort +//------------------------------------------------------------------------ + +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, + _IsMoveConstructible) noexcept; + +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, + /*is_move_constructible=*/std::true_type); + +//------------------------------------------------------------------------ +// stable_sort +//------------------------------------------------------------------------ + +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; + +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + +//------------------------------------------------------------------------ +// partial_sort +//------------------------------------------------------------------------ + +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); + +//------------------------------------------------------------------------ +// partial_sort_copy +//------------------------------------------------------------------------ + +template +_RandomAccessIterator2 +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Compare) noexcept; + +template +_RandomAccessIterator2 +__pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _Compare); + +//------------------------------------------------------------------------ +// adjacent_find +//------------------------------------------------------------------------ + +template +_RandomAccessIterator +__brick_adjacent_find(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, + /* IsVector = */ std::true_type, bool) noexcept; + +template +_ForwardIterator +__brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate, + /* IsVector = */ std::false_type, bool) noexcept; + +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, bool) noexcept; + +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate, bool); + +//------------------------------------------------------------------------ +// nth_element +//------------------------------------------------------------------------ +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + +template +void +__pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; + +//------------------------------------------------------------------------ +// fill, fill_n +//------------------------------------------------------------------------ +template +void +__brick_fill(_RandomAccessIterator, _RandomAccessIterator, const _Tp&, + /* __is_vector = */ std::true_type) noexcept; + +template +void +__brick_fill(_ForwardIterator, _ForwardIterator, const _Tp&, + /* __is_vector = */ std::false_type) noexcept; + +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&) noexcept; + +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&); + +template +_RandomAccessIterator +__brick_fill_n(_RandomAccessIterator, _Size, const _Tp&, + /* __is_vector = */ std::true_type) noexcept; + +template +_OutputIterator +__brick_fill_n(_OutputIterator, _Size, const _Tp&, + /* __is_vector = */ std::false_type) noexcept; + +template +_OutputIterator +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&) noexcept; + +template +_RandomAccessIterator +__pattern_fill_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&); + +//------------------------------------------------------------------------ +// generate, generate_n +//------------------------------------------------------------------------ + +template +void __brick_generate(_RandomAccessIterator, _RandomAccessIterator, _Generator, + /* is_vector = */ std::true_type) noexcept; + +template +void __brick_generate(_ForwardIterator, _ForwardIterator, _Generator, + /* is_vector = */ std::false_type) noexcept; + +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator) noexcept; + +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Generator); + +template +_RandomAccessIterator __brick_generate_n(_RandomAccessIterator, Size, _Generator, + /* is_vector = */ std::true_type) noexcept; + +template +OutputIterator __brick_generate_n(OutputIterator, Size, _Generator, + /* is_vector = */ std::false_type) noexcept; + +template +OutputIterator +__pattern_generate_n(_Tag, _ExecutionPolicy&&, OutputIterator, Size, _Generator) noexcept; + +template +_RandomAccessIterator +__pattern_generate_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, Size, _Generator); + +//------------------------------------------------------------------------ +// remove +//------------------------------------------------------------------------ +template +_ForwardIterator __brick_remove_if(_ForwardIterator, _ForwardIterator, _UnaryPredicate, + /* __is_vector = */ std::false_type) noexcept; + +template +_RandomAccessIterator __brick_remove_if(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, + /* __is_vector = */ std::true_type) noexcept; + +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; + +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate) noexcept; + +//------------------------------------------------------------------------ +// merge +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_merge(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare, + /* __is_vector = */ std::false_type) noexcept; + +template +_OutputIterator __brick_merge(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _OutputIterator, _Compare, + /* __is_vector = */ std::true_type) noexcept; + +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare) noexcept; + +template +_OutputIterator +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare); + +//------------------------------------------------------------------------ +// inplace_merge +//------------------------------------------------------------------------ + +template +void __brick_inplace_merge(_BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, _Compare, + /* __is_vector = */ std::false_type) noexcept; + +template +void __brick_inplace_merge(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, + /* __is_vector = */ std::true_type) noexcept; + +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _BidirectionalIterator, _Compare) noexcept; + +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); + +//------------------------------------------------------------------------ +// includes +//------------------------------------------------------------------------ + +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Compare) noexcept; + +template +bool +__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Compare); + +//------------------------------------------------------------------------ +// set_union +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_set_union(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare, + /*__is_vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_set_union(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _OutputIterator, _Compare, + /*__is_vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare); + +//------------------------------------------------------------------------ +// set_intersection +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_set_intersection(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare, + /*__is_vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_set_intersection(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _OutputIterator, _Compare, + /*__is_vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + +template +_OutputIterator +__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, + _Compare); + +//------------------------------------------------------------------------ +// set_difference +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_set_difference(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare, + /*__is_vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_set_difference(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _OutputIterator, _Compare, + /*__is_vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + +template +_OutputIterator +__pattern_set_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare); + +//------------------------------------------------------------------------ +// set_symmetric_difference +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_set_symmetric_difference(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare, + /*__is_vector=*/std::false_type) noexcept; + +template +_OutputIterator __brick_set_symmetric_difference(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _OutputIterator, _Compare, + /*__is_vector=*/std::true_type) noexcept; + +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; + +template +_OutputIterator +__pattern_set_symmetric_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _OutputIterator, _Compare); + +//------------------------------------------------------------------------ +// is_heap_until +//------------------------------------------------------------------------ + +template +_RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare, + /* __is_vector = */ std::false_type) noexcept; + +template +_RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare, + /* __is_vector = */ std::true_type) noexcept; + +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; + +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + +//------------------------------------------------------------------------ +// min_element +//------------------------------------------------------------------------ + +template +_ForwardIterator __brick_min_element(_ForwardIterator, _ForwardIterator, _Compare, + /* __is_vector = */ std::false_type) noexcept; + +template +_RandomAccessIterator __brick_min_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, + /* __is_vector = */ std::true_type) noexcept; + +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; + +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + +//------------------------------------------------------------------------ +// minmax_element +//------------------------------------------------------------------------ + +template +std::pair<_ForwardIterator, _ForwardIterator> __brick_minmax_element(_ForwardIterator, _ForwardIterator, _Compare, + /* __is_vector = */ std::false_type) noexcept; + +template +std::pair<_RandomAccessIterator, _RandomAccessIterator> + __brick_minmax_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, + /* __is_vector = */ std::true_type) noexcept; + +template +std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; + +template +std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); + +//------------------------------------------------------------------------ +// mismatch +//------------------------------------------------------------------------ + +template +std::pair<_ForwardIterator1, _ForwardIterator2> __brick_mismatch(_ForwardIterator1, _ForwardIterator1, + _ForwardIterator2, _ForwardIterator2, _Predicate, + /* __is_vector = */ std::false_type) noexcept; + +template +std::pair<_RandomAccessIterator1, _RandomAccessIterator2> + __brick_mismatch(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _Predicate, + /* __is_vector = */ std::true_type) noexcept; + +template +std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Predicate) noexcept; + +template +std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Predicate) noexcept; + +//------------------------------------------------------------------------ +// lexicographical_compare +//------------------------------------------------------------------------ + +template +bool __brick_lexicographical_compare(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Compare, + /* __is_vector = */ std::false_type) noexcept; + +template +bool __brick_lexicographical_compare(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _RandomAccessIterator2, _Compare, + /* __is_vector = */ std::true_type) noexcept; + +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _Compare) noexcept; + +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _Compare) noexcept; + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_ALGORITHM_FWD_H */ diff --git a/Components/Include/pstl/internal/algorithm_impl.h b/Components/Include/pstl/internal/algorithm_impl.h new file mode 100644 index 0000000..2b505d9 --- /dev/null +++ b/Components/Include/pstl/internal/algorithm_impl.h @@ -0,0 +1,3819 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_ALGORITHM_IMPL_H +#define _PSTL_ALGORITHM_IMPL_H + +#include +#include +#include +#include +#include + +#include "execution_impl.h" +#include "memory_impl.h" +#include "parallel_backend.h" +#include "parallel_backend_utils.h" +#include "parallel_impl.h" +#include "pstl_config.h" +#include "unseq_backend_simd.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +//------------------------------------------------------------------------ +// any_of +//------------------------------------------------------------------------ + +template +bool +__brick_any_of(const _ForwardIterator __first, const _ForwardIterator __last, _Pred __pred, + /*__is_vector=*/std::false_type) noexcept +{ + return std::any_of(__first, __last, __pred); +}; + +template +bool +__brick_any_of(const _RandomAccessIterator __first, const _RandomAccessIterator __last, _Pred __pred, + /*__is_vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_or(__first, __last - __first, __pred); +}; + +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) noexcept +{ + return __internal::__brick_any_of(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +bool +__pattern_any_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Pred __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __internal::__parallel_or(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) + { return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); }); + }); +} + +// [alg.foreach] +// for_each_n with no policy + +template +_ForwardIterator +__for_each_n_it_serial(_ForwardIterator __first, _Size __n, _Function __f) +{ + for (; __n > 0; ++__first, --__n) + __f(__first); + return __first; +} + +//------------------------------------------------------------------------ +// walk1 (pseudo) +// +// walk1 evaluates f(x) for each dereferenced value x drawn from [first,last) +//------------------------------------------------------------------------ +template +void +__brick_walk1(_ForwardIterator __first, _ForwardIterator __last, _Function __f, /*vector=*/std::false_type) noexcept +{ + std::for_each(__first, __last, __f); +} + +template +void +__brick_walk1(_RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, + /*vector=*/std::true_type) noexcept +{ + __unseq_backend::__simd_walk_1(__first, __last - __first, __f); +} + +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f) noexcept +{ + __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); +} + +template +void +__pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Function __f) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f](_RandomAccessIterator __i, _RandomAccessIterator __j) + { __internal::__brick_walk1(__i, __j, __f, _IsVector{}); }); + }); +} + +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Brick __brick) noexcept +{ + __brick(__first, __last); +} + +template +void +__pattern_walk_brick(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Brick __brick) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) + { __brick(__i, __j); }); + }); +} + +//------------------------------------------------------------------------ +// walk1_n +//------------------------------------------------------------------------ +template +_ForwardIterator +__brick_walk1_n(_ForwardIterator __first, _Size __n, _Function __f, /*_IsVectorTag=*/std::false_type) +{ + return __internal::__for_each_n_it_serial(__first, __n, + [&__f](_ForwardIterator __it) { __f(*__it); }); // calling serial version +} + +template +_RandomAccessIterator +__brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __f, + /*vectorTag=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_walk_1(__first, __n, __f); +} + +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) noexcept +{ + return __internal::__brick_walk1_n(__first, __n, __f, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Function __f) +{ + __internal::__pattern_walk1(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); + + return __first + __n; +} + +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Brick __brick) noexcept +{ + return __brick(__first, __n); +} + +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __n, _Brick __brick) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i); }); + return __first + __n; + }); +} + +//------------------------------------------------------------------------ +// walk2 (pseudo) +// +// walk2 evaluates f(x,y) for deferenced values (x,y) drawn from [first1,last1) and [first2,...) +//------------------------------------------------------------------------ +template +_ForwardIterator2 +__brick_walk2(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f, + /*vector=*/std::false_type) noexcept +{ + for (; __first1 != __last1; ++__first1, ++__first2) + __f(*__first1, *__first2); + return __first2; +} + +template +_RandomAccessIterator2 +__brick_walk2(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _Function __f, + /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_walk_2(__first1, __last1 - __first1, __first2, __f); +} + +template +_ForwardIterator2 +__brick_walk2_n(_ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f, + /*vector=*/std::false_type) noexcept +{ + for (; __n > 0; --__n, ++__first1, ++__first2) + __f(*__first1, *__first2); + return __first2; +} + +template +_RandomAccessIterator2 +__brick_walk2_n(_RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Function __f, + /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_walk_2(__first1, __n, __first2, __f); +} + +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) noexcept +{ + return __internal::__brick_walk2(__first1, __last1, __first2, __f, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, _IsVector{}); }); + return __first2 + (__last1 - __first1); + }); +} + +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, + _Function __f) noexcept +{ + return __internal::__brick_walk2_n(__first1, __n, __first2, __f, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Function __f) +{ + return __internal::__pattern_walk2(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __first2, __f); +} + +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Brick __brick) noexcept +{ + return __brick(__first1, __last1, __first2); +} + +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { __brick(__i, __j, __first2 + (__i - __first1)); }); + return __first2 + (__last1 - __first1); + }); +} + +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, + _Brick __brick) noexcept +{ + return __brick(__first1, __n, __first2); +} + +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Brick __brick) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { __brick(__i, __j - __i, __first2 + (__i - __first1)); }); + return __first2 + __n; + }); +} + +//------------------------------------------------------------------------ +// walk3 (pseudo) +// +// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) +//------------------------------------------------------------------------ +template +_ForwardIterator3 +__brick_walk3(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator3 __first3, _Function __f, /*vector=*/std::false_type) noexcept +{ + for (; __first1 != __last1; ++__first1, ++__first2, ++__first3) + __f(*__first1, *__first2, *__first3); + return __first3; +} + +template +_RandomAccessIterator3 +__brick_walk3(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator3 __first3, _Function __f, /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_walk_3(__first1, __last1 - __first1, __first2, __first3, __f); +} + +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) noexcept +{ + return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, + _Function __f) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, + _IsVector{}); + }); + return __first3 + (__last1 - __first1); + }); +} + +//------------------------------------------------------------------------ +// equal +//------------------------------------------------------------------------ + +template +bool +__brick_equal(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __p, /* IsVector = */ std::false_type) noexcept +{ + return std::equal(__first1, __last1, __first2, __last2, __p); +} + +template +bool +__brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _BinaryPredicate __p, /* is_vector = */ std::true_type) noexcept +{ + if (__last1 - __first1 != __last2 - __first2) + return false; + + return __unseq_backend::__simd_first(__first1, __last1 - __first1, __first2, std::not_fn(__p)).first == __last1; +} + +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept +{ + return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, typename _Tag::__is_vector{}); +} + +template +bool +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _BinaryPredicate __p) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__last1 - __first1 != __last2 - __first2) + return false; + + return __internal::__except_handler( + [&]() + { + return !__internal::__parallel_or( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { + return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), + __first2 + (__j - __first1), __p, _IsVector{}); + }); + }); +} + +//------------------------------------------------------------------------ +// equal version for sequences with equal length +//------------------------------------------------------------------------ + +template +bool +__brick_equal(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p, + /* IsVector = */ std::false_type) noexcept +{ + return std::equal(__first1, __last1, __first2, __p); +} + +template +bool +__brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _BinaryPredicate __p, /* is_vector = */ std::true_type) noexcept +{ + return __unseq_backend::__simd_first(__first1, __last1 - __first1, __first2, std::not_fn(__p)).first == __last1; +} + +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _BinaryPredicate __p) noexcept +{ + return __internal::__brick_equal(__first1, __last1, __first2, __p, typename _Tag::__is_vector{}); +} + +template +bool +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return !__internal::__parallel_or( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); }); + }); +} + +//------------------------------------------------------------------------ +// find_if +//------------------------------------------------------------------------ +template +_ForwardIterator +__brick_find_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, + /*is_vector=*/std::false_type) noexcept +{ + return std::find_if(__first, __last, __pred); +} + +template +_RandomAccessIterator +__brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Predicate __pred, + /*is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; + return __unseq_backend::__simd_first( + __first, _SizeType(0), __last - __first, + [&__pred](_RandomAccessIterator __it, _SizeType __i) { return __pred(__it[__i]); }); +} + +template +_ForwardIterator +__pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Predicate __pred) noexcept +{ + return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) + { return __internal::__brick_find_if(__i, __j, __pred, _IsVector{}); }, + std::less::difference_type>(), + /*is_first=*/true); + }); +} + +//------------------------------------------------------------------------ +// find_end +//------------------------------------------------------------------------ + +// find the first occurrence of the subsequence [s_first, s_last) +// or the last occurrence of the subsequence in the range [first, last) +// b_first determines what occurrence we want to find (first or last) +template +_RandomAccessIterator1 +__find_subrange(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator1 __global_last, + _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, + bool __b_first, _IsVector __is_vector) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator2>::value_type _ValueType; + auto __n2 = __s_last - __s_first; + if (__n2 < 1) + { + return __b_first ? __first : __last; + } + + auto __n1 = __global_last - __first; + if (__n1 < __n2) + { + return __last; + } + + auto __cur = __last; + while (__first != __last && (__global_last - __first >= __n2)) + { + // find position of *s_first in [first, last) (it can be start of subsequence) + __first = __internal::__brick_find_if( + __first, __last, __equal_value_by_pred<_ValueType, _BinaryPredicate>(*__s_first, __pred), __is_vector); + + // if position that was found previously is the start of subsequence + // then we can exit the loop (b_first == true) or keep the position + // (b_first == false) + if (__first != __last && (__global_last - __first >= __n2) && + __internal::__brick_equal(__s_first + 1, __s_last, __first + 1, __pred, __is_vector)) + { + if (__b_first) + { + return __first; + } + else + { + __cur = __first; + } + } + else if (__first == __last) + { + break; + } + else + { + } + + // in case of b_first == false we try to find new start position + // for the next subsequence + ++__first; + } + return __cur; +} + +template +_RandomAccessIterator +__find_subrange(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __global_last, + _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector) noexcept +{ + if (static_cast<_Size>(__global_last - __first) < __count || __count < 1) + { + return __last; // According to the standard last shall be returned when count < 1 + } + + auto __unary_pred = __equal_value_by_pred<_Tp, _BinaryPredicate>(__value, __pred); + while (__first != __last && (static_cast<_Size>(__global_last - __first) >= __count)) + { + __first = __internal::__brick_find_if(__first, __last, __unary_pred, __is_vector); + + // check that all of elements in [first+1, first+count) equal to value + if (__first != __last && (static_cast<_Size>(__global_last - __first) >= __count) && + !__internal::__brick_any_of(__first + 1, __first + __count, std::not_fn(__unary_pred), __is_vector)) + { + return __first; + } + else if (__first == __last) + { + break; + } + else + { + ++__first; + } + } + return __last; +} + +template +_ForwardIterator1 +__brick_find_end(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::false_type) noexcept +{ + return std::find_end(__first, __last, __s_first, __s_last, __pred); +} + +template +_RandomAccessIterator1 +__brick_find_end(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::true_type) noexcept +{ + return __find_subrange(__first, __last, __last, __s_first, __s_last, __pred, false, std::true_type()); +} + +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__last - __first == __s_last - __s_first) + { + const bool __res = __internal::__pattern_equal(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); + return __res ? __first : __last; + } + else + { + return __internal::__except_handler( + [&]() + { + return __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, + _IsVector{}); + }, + std::greater::difference_type>(), + /*is_first=*/false); + }); + } +} + +//------------------------------------------------------------------------ +// find_first_of +//------------------------------------------------------------------------ +template +_ForwardIterator1 +__brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::false_type) noexcept +{ + return std::find_first_of(__first, __last, __s_first, __s_last, __pred); +} + +template +_RandomAccessIterator1 +__brick_find_first_of(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_find_first_of(__first, __last, __s_first, __s_last, __pred); +} + +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, + typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, _IsVector{}); }, + std::less::difference_type>(), /*is_first=*/true); + }); +} + +//------------------------------------------------------------------------ +// search +//------------------------------------------------------------------------ +template +_RandomAccessIterator1 +__brick_search(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept +{ + return std::search(__first, __last, __s_first, __s_last, __pred); +} + +template +_RandomAccessIterator1 +__brick_search(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept +{ + return __internal::__find_subrange(__first, __last, __last, __s_first, __s_last, __pred, true, std::true_type()); +} + +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__last - __first == __s_last - __s_first) + { + const bool __res = __internal::__pattern_equal(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); + return __res ? __first : __last; + } + else + { + return __internal::__except_handler( + [&]() + { + return __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, + _IsVector{}); + }, + std::less::difference_type>(), + /*is_first=*/true); + }); + } +} + +//------------------------------------------------------------------------ +// search_n +//------------------------------------------------------------------------ +template +_ForwardIterator +__brick_search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, + _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept +{ + return std::search_n(__first, __last, __count, __value, __pred); +} + +template +_RandomAccessIterator +__brick_search_n(_RandomAccessIterator __first, _RandomAccessIterator __last, _Size __count, const _Tp& __value, + _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept +{ + return __internal::__find_subrange(__first, __last, __last, __count, __value, __pred, std::true_type()); +} + +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) noexcept +{ + return __internal::__brick_search_n(__first, __last, __count, __value, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (static_cast<_Size>(__last - __first) == __count) + { + const bool __result = + !__internal::__pattern_any_of(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }); + return __result ? __first : __last; + } + else + { + return __internal::__except_handler( + [&__exec, __first, __last, __count, &__value, __pred]() + { + return __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j) + { return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, _IsVector{}); }, + std::less::difference_type>(), + /*is_first=*/true); + }); + } +} + +//------------------------------------------------------------------------ +// copy_n +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_copy_n(_ForwardIterator __first, _Size __n, _OutputIterator __result, /*vector=*/std::false_type) noexcept +{ + return std::copy_n(__first, __n, __result); +} + +template +_RandomAccessIterator2 +__brick_copy_n(_RandomAccessIterator1 __first, _Size __n, _RandomAccessIterator2 __result, + /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_assign( + __first, __n, __result, + [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { *__result = *__first; }); +} + +//------------------------------------------------------------------------ +// copy +//------------------------------------------------------------------------ +template +_OutputIterator +__brick_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + /*vector=*/std::false_type) noexcept +{ + return std::copy(__first, __last, __result); +} + +template +_RandomAccessIterator2 +__brick_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_assign( + __first, __last - __first, __result, + [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { *__result = *__first; }); +} + +//------------------------------------------------------------------------ +// move +//------------------------------------------------------------------------ +template +_OutputIterator +__brick_move(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + /*vector=*/std::false_type) noexcept +{ + return std::move(__first, __last, __result); +} + +template +_RandomAccessIterator2 +__brick_move(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_assign( + __first, __last - __first, __result, + [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { *__result = std::move(*__first); }); +} + +struct __brick_move_destroy +{ + template + _RandomAccessIterator2 + operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + /*vec*/ std::true_type) const + { + using _IteratorValueType = typename std::iterator_traits<_RandomAccessIterator1>::value_type; + + return __unseq_backend::__simd_assign(__first, __last - __first, __result, + [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { + *__result = std::move(*__first); + (*__first).~_IteratorValueType(); + }); + } + + template + _RandomAccessIterator2 + operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + /*vec*/ std::false_type) const + { + using _IteratorValueType = typename std::iterator_traits<_RandomAccessIterator1>::value_type; + + for (; __first != __last; ++__first, ++__result) + { + *__result = std::move(*__first); + (*__first).~_IteratorValueType(); + } + return __result; + } +}; + +//------------------------------------------------------------------------ +// swap_ranges +//------------------------------------------------------------------------ +template +_OutputIterator +__brick_swap_ranges(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + /*vector=*/std::false_type) noexcept +{ + return std::swap_ranges(__first, __last, __result); +} + +template +_RandomAccessIterator2 +__brick_swap_ranges(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + /*vector=*/std::true_type) noexcept +{ + using std::iter_swap; + return __unseq_backend::__simd_assign(__first, __last - __first, __result, + iter_swap<_RandomAccessIterator1, _RandomAccessIterator2>); +} + +//------------------------------------------------------------------------ +// copy_if +//------------------------------------------------------------------------ +template +_OutputIterator +__brick_copy_if(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _UnaryPredicate __pred, + /*vector=*/std::false_type) noexcept +{ + return std::copy_if(__first, __last, __result, __pred); +} + +template +_RandomAccessIterator2 +__brick_copy_if(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + _UnaryPredicate __pred, + /*vector=*/std::true_type) noexcept +{ +#if defined(_PSTL_MONOTONIC_PRESENT) + return __unseq_backend::__simd_copy_if(__first, __last - __first, __result, __pred); +#else + return std::copy_if(__first, __last, __result, __pred); +#endif +} + +// TODO: Try to use transform_reduce for combining __brick_copy_if_phase1 on IsVector. +template +std::pair<_DifferenceType, _DifferenceType> +__brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, bool* __restrict __mask, _UnaryPredicate __pred, + /*vector=*/std::false_type) noexcept +{ + auto __count_true = _DifferenceType(0); + auto __size = __last - __first; + + static_assert(__are_random_access_iterators<_ForwardIterator>::value, + "Pattern-brick error. Should be a random access iterator."); + + for (; __first != __last; ++__first, ++__mask) + { + *__mask = __pred(*__first); + if (*__mask) + { + ++__count_true; + } + } + return std::make_pair(__count_true, __size - __count_true); +} + +template +std::pair<_DifferenceType, _DifferenceType> +__brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __mask, _UnaryPredicate __pred, + /*vector=*/std::true_type) noexcept +{ + auto __result = __unseq_backend::__simd_calc_mask_1(__first, __last - __first, __mask, __pred); + return std::make_pair(__result, (__last - __first) - __result); +} + +template +void +__brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, bool* __mask, + _Assigner __assigner, /*vector=*/std::false_type) noexcept +{ + for (; __first != __last; ++__first, ++__mask) + { + if (*__mask) + { + __assigner(__first, __result); + ++__result; + } + } +} + +template +void +__brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + bool* __restrict __mask, _Assigner __assigner, /*vector=*/std::true_type) noexcept +{ +#if defined(_PSTL_MONOTONIC_PRESENT) + __unseq_backend::__simd_copy_by_mask(__first, __last - __first, __result, __mask, __assigner); +#else + __internal::__brick_copy_by_mask(__first, __last, __result, __mask, __assigner, std::false_type()); +#endif +} + +template +void +__brick_partition_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true, + _OutputIterator2 __out_false, bool* __mask, /*vector=*/std::false_type) noexcept +{ + for (; __first != __last; ++__first, ++__mask) + { + if (*__mask) + { + *__out_true = *__first; + ++__out_true; + } + else + { + *__out_false = *__first; + ++__out_false; + } + } +} + +template +void +__brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, bool* __mask, + /*vector=*/std::true_type) noexcept +{ +#if defined(_PSTL_MONOTONIC_PRESENT) + __unseq_backend::__simd_partition_by_mask(__first, __last - __first, __out_true, __out_false, __mask); +#else + __internal::__brick_partition_by_mask(__first, __last, __out_true, __out_false, __mask, std::false_type()); +#endif +} + +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + _UnaryPredicate __pred) noexcept +{ + return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + if (_DifferenceType(1) < __n) + { + __par_backend::__buffer __mask_buf(__n); + return __internal::__except_handler( + [&__exec, __n, __first, __result, __pred, &__mask_buf]() + { + bool* __mask = __mask_buf.get(); + _DifferenceType __m{}; + __par_backend::__parallel_strict_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), + __mask + __i, __pred, _IsVector{}) + .first; + }, + std::plus<_DifferenceType>(), // Combine + [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan + __internal::__brick_copy_by_mask( + __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); + }, + [&__m](_DifferenceType __total) { __m = __total; }); + return __result + __m; + }); + } + // trivial sequence - use serial algorithm + return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{}); +} + +//------------------------------------------------------------------------ +// count +//------------------------------------------------------------------------ +template +typename std::iterator_traits<_RandomAccessIterator>::difference_type +__brick_count(_RandomAccessIterator __first, _RandomAccessIterator __last, _Predicate __pred, + /* is_vector = */ std::true_type) noexcept +{ + return __unseq_backend::__simd_count(__first, __last - __first, __pred); +} + +template +typename std::iterator_traits<_ForwardIterator>::difference_type +__brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, + /* is_vector = */ std::false_type) noexcept +{ + return std::count_if(__first, __last, __pred); +} + +template +typename std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept +{ + return __internal::__brick_count(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +typename std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; + return __internal::__except_handler( + [&]() + { + return __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), + [__pred](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType + { return __value + __internal::__brick_count(__begin, __end, __pred, _IsVector{}); }, + std::plus<_SizeType>()); + }); +} + +//------------------------------------------------------------------------ +// unique +//------------------------------------------------------------------------ + +template +_RandomAccessIterator +__brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred, + /*is_vector=*/std::false_type) noexcept +{ + return std::unique(__first, __last, __pred); +} + +template +_RandomAccessIterator +__brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred, + /*is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::unique(__first, __last, __pred); +} + +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred) noexcept +{ + return __internal::__brick_unique(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +// That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is different. +// So, a caller passes _CalcMask brick into remove_elements. +template +_ForwardIterator +__remove_elements(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _CalcMask __calc_mask) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; + typedef typename std::iterator_traits<_ForwardIterator>::value_type _Tp; + _DifferenceType __n = __last - __first; + __par_backend::__buffer __mask_buf(__n); + // 1. find a first iterator that should be removed + return __internal::__except_handler([&]() { + bool* __mask = __mask_buf.get(); + _DifferenceType __min = __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, + [__first, __mask, &__calc_mask](_DifferenceType __i, _DifferenceType __j, + _DifferenceType __local_min) -> _DifferenceType + { + // Create mask + __calc_mask(__mask + __i, __mask + __j, __first + __i); + + // if minimum was found in a previous range we shouldn't do anymore + if (__local_min < __i) + { + return __local_min; + } + // find first iterator that should be removed + bool* __result = __internal::__brick_find_if( + __mask + __i, __mask + __j, [](bool __val) { return !__val; }, _IsVector{}); + if (__result - __mask == __j) + { + return __local_min; + } + return std::min(__local_min, _DifferenceType(__result - __mask)); + }, + [](_DifferenceType __local_min1, _DifferenceType __local_min2) -> _DifferenceType + { return std::min(__local_min1, __local_min2); }); + + // No elements to remove - exit + if (__min == __n) + { + return __last; + } + __n -= __min; + __first += __min; + + __par_backend::__buffer<_Tp> __buf(__n); + _Tp* __result = __buf.get(); + __mask += __min; + _DifferenceType __m{}; + // 2. Elements that doesn't satisfy pred are moved to result + __par_backend::__parallel_strict_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [__mask](_DifferenceType __i, _DifferenceType __len) + { + return __internal::__brick_count( + __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, _IsVector{}); + }, + std::plus<_DifferenceType>(), + [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) + { + __internal::__brick_copy_by_mask( + __first + __i, __first + __i + __len, __result + __initial, __mask + __i, + [](_ForwardIterator __x, _Tp* __z) + { + __internal::__invoke_if_else( + std::is_trivial<_Tp>(), [&]() { *__z = std::move(*__x); }, + [&]() { ::new (std::addressof(*__z)) _Tp(std::move(*__x)); }); + }, + _IsVector{}); + }, + [&__m](_DifferenceType __total) { __m = __total; }); + + // 3. Elements from result are moved to [first, last) + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, + [__result, __first](_Tp* __i, _Tp* __j) + { + __invoke_if_else( + std::is_trivial<_Tp>(), [&]() { __brick_move(__i, __j, __first + (__i - __result), _IsVector{}); }, + [&]() { __brick_move_destroy()(__i, __j, __first + (__i - __result), _IsVector{}); }); + }); + return __first + __m; + }); +} + +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; + + if (__first == __last) + { + return __last; + } + if (__first + 1 == __last || __first + 2 == __last) + { + // Trivial sequence - use serial algorithm + return __internal::__brick_unique(__first, __last, __pred, _IsVector{}); + } + return __internal::__remove_elements( + __tag, std::forward<_ExecutionPolicy>(__exec), ++__first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) + { + __internal::__brick_walk3( + __b, __e, __it - 1, __it, + [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, _IsVector{}); + }); +} + +//------------------------------------------------------------------------ +// unique_copy +//------------------------------------------------------------------------ + +template +OutputIterator +__brick_unique_copy(_ForwardIterator __first, _ForwardIterator __last, OutputIterator __result, _BinaryPredicate __pred, + /*vector=*/std::false_type) noexcept +{ + return std::unique_copy(__first, __last, __result, __pred); +} + +template +_RandomAccessIterator2 +__brick_unique_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, + _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept +{ +#if defined(_PSTL_MONOTONIC_PRESENT) + return __unseq_backend::__simd_unique_copy(__first, __last - __first, __result, __pred); +#else + return std::unique_copy(__first, __last, __result, __pred); +#endif +} + +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _BinaryPredicate __pred) noexcept +{ + return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{}); +} + +template +_DifferenceType +__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask, + _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept +{ + _DifferenceType __count = 0; + for (; __first != __last; ++__first, ++__mask) + { + *__mask = !__pred(*__first, *(__first - 1)); + __count += *__mask; + } + return __count; +} + +template +_DifferenceType +__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask, + _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept +{ + return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred); +} + +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + if (_DifferenceType(2) < __n) + { + __par_backend::__buffer __mask_buf(__n); + if (_DifferenceType(2) < __n) + { + return __internal::__except_handler( + [&__exec, __n, __first, __result, __pred, &__mask_buf]() + { + bool* __mask = __mask_buf.get(); + _DifferenceType __m{}; + __par_backend::__parallel_strict_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce + _DifferenceType __extra = 0; + if (__i == 0) + { + // Special boundary case + __mask[__i] = true; + if (--__len == 0) + return 1; + ++__i; + ++__extra; + } + return __internal::__brick_calc_mask_2<_DifferenceType>( + __first + __i, __first + (__i + __len), __mask + __i, __pred, _IsVector{}) + + __extra; + }, + std::plus<_DifferenceType>(), // Combine + [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan + // Phase 2 is same as for __pattern_copy_if + __internal::__brick_copy_by_mask( + __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, + _IsVector{}); + }, + [&__m](_DifferenceType __total) { __m = __total; }); + return __result + __m; + }); + } + } + // trivial sequence - use serial algorithm + return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{}); +} + +//------------------------------------------------------------------------ +// reverse +//------------------------------------------------------------------------ +template +void +__brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, /*__is_vector=*/std::false_type) noexcept +{ + std::reverse(__first, __last); +} + +template +void +__brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, /*__is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; + + const auto __n = (__last - __first) / 2; + __unseq_backend::__simd_walk_2(__first, __n, std::reverse_iterator<_RandomAccessIterator>(__last), + [](_ReferenceType __x, _ReferenceType __y) { + using std::swap; + swap(__x, __y); + }); +} + +// this brick is called in parallel version, so we can use iterator arithmetic +template +void +__brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, _BidirectionalIterator __d_last, + /*is_vector=*/std::false_type) noexcept +{ + for (--__d_last; __first != __last; ++__first, --__d_last) + { + using std::iter_swap; + iter_swap(__first, __d_last); + } +} + +// this brick is called in parallel version, so we can use iterator arithmetic +template +void +__brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __d_last, + /*is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; + + __unseq_backend::__simd_walk_2(__first, __last - __first, std::reverse_iterator<_RandomAccessIterator>(__d_last), + [](_ReferenceType __x, _ReferenceType __y) { + using std::swap; + swap(__x, __y); + }); +} + +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last) noexcept +{ + __internal::__brick_reverse(__first, __last, typename _Tag::__is_vector{}); +} + +template +void +__pattern_reverse(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) + { __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), _IsVector{}); }); +} + +//------------------------------------------------------------------------ +// reverse_copy +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _OutputIterator __d_first, + /*is_vector=*/std::false_type) noexcept +{ + return std::reverse_copy(__first, __last, __d_first); +} + +template +_RandomAccessIterator2 +__brick_reverse_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, + /*is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; + + return __unseq_backend::__simd_walk_2(std::reverse_iterator<_RandomAccessIterator1>(__last), __last - __first, + __d_first, [](_ReferenceType1 __x, _ReferenceType2 __y) { __y = __x; }); +} + +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _OutputIterator __d_first) noexcept +{ + return __internal::__brick_reverse_copy(__first, __last, __d_first, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + auto __len = __last - __first; + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) + { + __internal::__brick_reverse_copy(__inner_first, __inner_last, + __d_first + (__len - (__inner_last - __first)), _IsVector{}); + }); + return __d_first + __len; +} + +//------------------------------------------------------------------------ +// rotate +//------------------------------------------------------------------------ +template +_ForwardIterator +__brick_rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, + /*is_vector=*/std::false_type) noexcept +{ +#if defined(_PSTL_CPP11_STD_ROTATE_BROKEN) + std::rotate(__first, __middle, __last); + return std::next(__first, std::distance(__middle, __last)); +#else + return std::rotate(__first, __middle, __last); +#endif +} + +template +_RandomAccessIterator +__brick_rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, + /*is_vector=*/std::true_type) noexcept +{ + auto __n = __last - __first; + auto __m = __middle - __first; + const _RandomAccessIterator __ret = __first + (__last - __middle); + + bool __is_left = (__m <= __n / 2); + if (!__is_left) + __m = __n - __m; + + while (__n > 1 && __m > 0) + { + using std::iter_swap; + const auto __m_2 = __m * 2; + if (__is_left) + { + for (; __last - __first >= __m_2; __first += __m) + { + __unseq_backend::__simd_assign(__first, __m, __first + __m, + iter_swap<_RandomAccessIterator, _RandomAccessIterator>); + } + } + else + { + for (; __last - __first >= __m_2; __last -= __m) + { + __unseq_backend::__simd_assign(__last - __m, __m, __last - __m_2, + iter_swap<_RandomAccessIterator, _RandomAccessIterator>); + } + } + __is_left = !__is_left; + __m = __n % __m; + __n = __last - __first; + } + + return __ret; +} + +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last) noexcept +{ + return __internal::__brick_rotate(__first, __middle, __last, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp; + auto __n = __last - __first; + auto __m = __middle - __first; + if (__m <= __n / 2) + { + __par_backend::__buffer<_Tp> __buf(__n - __m); + return __internal::__except_handler( + [&__exec, __n, __m, __first, __middle, __last, &__buf]() + { + _Tp* __result = __buf.get(); + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__middle, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) + { __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), _IsVector{}); }); + + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__last, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) + { __internal::__brick_move(__b, __e, __b + (__last - __middle), _IsVector{}); }); + + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m), + [__first, __result](_Tp* __b, _Tp* __e) + { __brick_move_destroy()(__b, __e, __first + (__b - __result), _IsVector{}); }); + + return __first + (__last - __middle); + }); + } + else + { + __par_backend::__buffer<_Tp> __buf(__m); + return __internal::__except_handler( + [&__exec, __n, __m, __first, __middle, __last, &__buf]() + { + _Tp* __result = __buf.get(); + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__first, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) + { __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first), _IsVector{}); }); + + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__first, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) + { __internal::__brick_move(__b, __e, __first + (__b - __middle), _IsVector{}); }); + + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, + [__n, __m, __first, __result](_Tp* __b, _Tp* __e) + { __brick_move_destroy()(__b, __e, __first + ((__n - __m) + (__b - __result)), _IsVector{}); }); + + return __first + (__last - __middle); + }); + } +} + +//------------------------------------------------------------------------ +// rotate_copy +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_rotate_copy(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, + _OutputIterator __result, /*__is_vector=*/std::false_type) noexcept +{ + return std::rotate_copy(__first, __middle, __last, __result); +} + +template +_RandomAccessIterator2 +__brick_rotate_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __result, /*__is_vector=*/std::true_type) noexcept +{ + _RandomAccessIterator2 __res = __internal::__brick_copy(__middle, __last, __result, std::true_type()); + return __internal::__brick_copy(__first, __middle, __res, std::true_type()); +} + +template +_OutputIterator +__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last, _OutputIterator __result) noexcept +{ + return __internal::__brick_rotate_copy(__first, __middle, __last, __result, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) + { + if (__b > __middle) + { + __internal::__brick_copy(__b, __e, __result + (__b - __middle), _IsVector{}); + } + else + { + _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); + if (__e < __middle) + { + __internal::__brick_copy(__b, __e, __new_result, _IsVector{}); + } + else + { + __internal::__brick_copy(__b, __middle, __new_result, _IsVector{}); + __internal::__brick_copy(__middle, __e, __result, _IsVector{}); + } + } + }); + return __result + (__last - __first); +} + +//------------------------------------------------------------------------ +// is_partitioned +//------------------------------------------------------------------------ + +template +bool +__brick_is_partitioned(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, + /*is_vector=*/std::false_type) noexcept +{ + return std::is_partitioned(__first, __last, __pred); +} + +template +bool +__brick_is_partitioned(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred, + /*is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; + if (__first == __last) + { + return true; + } + else + { + _RandomAccessIterator __result = __unseq_backend::__simd_first( + __first, _SizeType(0), __last - __first, + [&__pred](_RandomAccessIterator __it, _SizeType __i) { return !__pred(__it[__i]); }); + if (__result == __last) + { + return true; + } + else + { + ++__result; + return !__unseq_backend::__simd_or(__result, __last - __result, __pred); + } + } +} + +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept +{ + return __internal::__brick_is_partitioned(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) +{ + if (__first == __last) + { + return true; + } + else + { + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler([&]() { + // State of current range: + // broken - current range is not partitioned by pred + // all_true - all elements in current range satisfy pred + // all_false - all elements in current range don't satisfy pred + // true_false - elements satisfy pred are placed before elements that don't satisfy pred + enum _ReduceType + { + __not_init = -1, + __broken, + __all_true, + __all_false, + __true_false + }; + _ReduceType __init = __not_init; + + // Array with states that we'll have when state from the left branch is merged with state from the right branch. + // State is calculated by formula: new_state = table[left_state * 4 + right_state] + _ReduceType __table[] = {__broken, __broken, __broken, __broken, __broken, __all_true, + __true_false, __true_false, __broken, __broken, __all_false, __broken, + __broken, __broken, __true_false, __broken}; + + __init = __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [&__pred, &__table](_RandomAccessIterator __i, _RandomAccessIterator __j, + _ReduceType __value) -> _ReduceType + { + if (__value == __broken) + { + return __broken; + } + _ReduceType __res = __not_init; + // if first element satisfy pred + if (__pred(*__i)) + { + // find first element that don't satisfy pred + _RandomAccessIterator __x = + __internal::__brick_find_if(__i + 1, __j, std::not_fn(__pred), _IsVector{}); + if (__x != __j) + { + // find first element after "x" that satisfy pred + _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{}); + // if it was found then range isn't partitioned by pred + if (__y != __j) + { + return __broken; + } + else + { + __res = __true_false; + } + } + else + { + __res = __all_true; + } + } + else + { // if first element doesn't satisfy pred + // then we should find the first element that satisfy pred. + // If we found it then range isn't partitioned by pred + if (__internal::__brick_find_if(__i + 1, __j, __pred, _IsVector{}) != __j) + { + return __broken; + } + else + { + __res = __all_false; + } + } + // if we have value from left range then we should calculate the result + return (__value == -1) ? __res : __table[__value * 4 + __res]; + }, + + [&__table](_ReduceType __val1, _ReduceType __val2) -> _ReduceType + { + if (__val1 == __broken || __val2 == __broken) + { + return __broken; + } + // calculate the result for new big range + return __table[__val1 * 4 + __val2]; + }); + return __init != __broken; + }); + } +} + +//------------------------------------------------------------------------ +// partition +//------------------------------------------------------------------------ + +template +_ForwardIterator +__brick_partition(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, + /*is_vector=*/std::false_type) noexcept +{ + return std::partition(__first, __last, __pred); +} + +template +_RandomAccessIterator +__brick_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred, + /*is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::partition(__first, __last, __pred); +} + +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept +{ + return __internal::__brick_partition(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + // partitioned range: elements before pivot satisfy pred (true part), + // elements after pivot don't satisfy pred (false part) + struct _PartitionRange + { + _RandomAccessIterator __begin; + _RandomAccessIterator __pivot; + _RandomAccessIterator __end; + }; + + return __internal::__except_handler([&]() { + _PartitionRange __init{__last, __last, __last}; + + // lambda for merging two partitioned ranges to one partitioned range + auto __reductor = [&__exec](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange + { + auto __size1 = __val1.__end - __val1.__pivot; + auto __size2 = __val2.__pivot - __val2.__begin; + auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); + + // if all elements in left range satisfy pred then we can move new pivot to pivot of right range + if (__val1.__end == __val1.__pivot) + { + return {__new_begin, __val2.__pivot, __val2.__end}; + } + // if true part of right range greater than false part of left range + // then we should swap the false part of left range and last part of true part of right range + else if (__size2 > __size1) + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, + [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), + _IsVector{}); + }); + return {__new_begin, __val2.__pivot - __size1, __val2.__end}; + } + // else we should swap the first part of false part of left range and true part of right range + else + { + __par_backend::__parallel_for( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, + [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), _IsVector{}); + }); + return {__new_begin, __val1.__pivot + __size2, __val2.__end}; + } + }; + + _PartitionRange __result = __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange + { + //1. serial partition + _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, _IsVector{}); + + // 2. merging of two ranges (left and right respectively) + return __reductor(__value, {__i, __pivot, __j}); + }, + __reductor); + return __result.__pivot; + }); +} + +//------------------------------------------------------------------------ +// stable_partition +//------------------------------------------------------------------------ + +template +_BidirectionalIterator +__brick_stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred, + /*__is_vector=*/std::false_type) noexcept +{ + return std::stable_partition(__first, __last, __pred); +} + +template +_RandomAccessIterator +__brick_stable_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::stable_partition(__first, __last, __pred); +} + +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _UnaryPredicate __pred) noexcept +{ + return __internal::__brick_stable_partition(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + // partitioned range: elements before pivot satisfy pred (true part), + // elements after pivot don't satisfy pred (false part) + struct _PartitionRange + { + _RandomAccessIterator __begin; + _RandomAccessIterator __pivot; + _RandomAccessIterator __end; + }; + + return __internal::__except_handler([&]() { + _PartitionRange __init{__last, __last, __last}; + + // lambda for merging two partitioned ranges to one partitioned range + auto __reductor = [](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange + { + auto __size1 = __val1.__end - __val1.__pivot; + auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); + + // if all elements in left range satisfy pred then we can move new pivot to pivot of right range + if (__val1.__end == __val1.__pivot) + { + return {__new_begin, __val2.__pivot, __val2.__end}; + } + // if true part of right range greater than false part of left range + // then we should swap the false part of left range and last part of true part of right range + else + { + __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, _IsVector{}); + return {__new_begin, __val2.__pivot - __size1, __val2.__end}; + } + }; + + _PartitionRange __result = __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange + { + //1. serial stable_partition + _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, _IsVector{}); + + // 2. merging of two ranges (left and right respectively) + return __reductor(__value, {__i, __pivot, __j}); + }, + __reductor); + return __result.__pivot; + }); +} + +//------------------------------------------------------------------------ +// partition_copy +//------------------------------------------------------------------------ + +template +std::pair<_OutputIterator1, _OutputIterator2> +__brick_partition_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true, + _OutputIterator2 __out_false, _UnaryPredicate __pred, /*is_vector=*/std::false_type) noexcept +{ + return std::partition_copy(__first, __last, __out_true, __out_false, __pred); +} + +template +std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__brick_partition_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true, + _RandomAccessIterator3 __out_false, _UnaryPredicate __pred, + /*is_vector=*/std::true_type) noexcept +{ +#if defined(_PSTL_MONOTONIC_PRESENT) + return __unseq_backend::__simd_partition_copy(__first, __last - __first, __out_true, __out_false, __pred); +#else + return std::partition_copy(__first, __last, __out_true, __out_false, __pred); +#endif +} + +template +std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept +{ + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, + typename _Tag::__is_vector{}); +} + +template +std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true, + _RandomAccessIterator3 __out_false, _UnaryPredicate __pred) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + typedef std::pair<_DifferenceType, _DifferenceType> _ReturnType; + const _DifferenceType __n = __last - __first; + if (_DifferenceType(1) < __n) + { + __par_backend::__buffer __mask_buf(__n); + return __internal::__except_handler( + [&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]() + { + bool* __mask = __mask_buf.get(); + _ReturnType __m{}; + __par_backend::__parallel_strict_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, + std::make_pair(_DifferenceType(0), _DifferenceType(0)), + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), + __mask + __i, __pred, _IsVector{}); + }, + [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType + { return std::make_pair(__x.first + __y.first, __x.second + __y.second); }, // Combine + [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) { // Scan + __internal::__brick_partition_by_mask( + __first + __i, __first + (__i + __len), __out_true + __initial.first, + __out_false + __initial.second, __mask + __i, _IsVector{}); + }, + [&__m](_ReturnType __total) { __m = __total; }); + return std::make_pair(__out_true + __m.first, __out_false + __m.second); + }); + } + // trivial sequence - use serial algorithm + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, _IsVector{}); +} + +//------------------------------------------------------------------------ +// sort +//------------------------------------------------------------------------ + +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + _IsMoveConstructible) noexcept +{ + std::sort(__first, __last, __comp); +} + +template +void +__pattern_sort(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, /*is_move_constructible=*/std::true_type) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __internal::__except_handler( + [&]() + { + __par_backend::__parallel_stable_sort( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) + { std::sort(__first, __last, __comp); }); + }); +} + +//------------------------------------------------------------------------ +// stable_sort +//------------------------------------------------------------------------ + +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept +{ + std::stable_sort(__first, __last, __comp); +} + +template +void +__pattern_stable_sort(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __internal::__except_handler( + [&]() + { + __par_backend::__parallel_stable_sort( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) + { std::stable_sort(__first, __last, __comp); }); + }); +} + +//------------------------------------------------------------------------ +// partial_sort +//------------------------------------------------------------------------ + +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _Compare __comp) noexcept +{ + std::partial_sort(__first, __middle, __last, __comp); +} + +template +void +__pattern_partial_sort(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + const auto __n = __middle - __first; + if (__n == 0) + return; + + __internal::__except_handler( + [&]() + { + __par_backend::__parallel_stable_sort( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) + { + if (__n < __end - __begin) + std::partial_sort(__begin, __begin + __n, __end, __comp); + else + std::sort(__begin, __end, __comp); + }, + __n); + }); +} + +//------------------------------------------------------------------------ +// partial_sort_copy +//------------------------------------------------------------------------ + +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) noexcept +{ + return std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); +} + +template +_RandomAccessIterator2 +__pattern_partial_sort_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, + _RandomAccessIterator2 __d_last, _Compare __comp) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__last == __first || __d_last == __d_first) + { + return __d_first; + } + auto __n1 = __last - __first; + auto __n2 = __d_last - __d_first; + return __internal::__except_handler([&]() { + if (__n2 >= __n1) + { + __par_backend::__parallel_stable_sort( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, + [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) + { + _RandomAccessIterator1 __i1 = __first + (__i - __d_first); + _RandomAccessIterator1 __j1 = __first + (__j - __d_first); + + // 1. Copy elements from input to output +#if !defined(_PSTL_ICC_18_OMP_SIMD_BROKEN) + __internal::__brick_copy(__i1, __j1, __i, _IsVector{}); +#else + std::copy(__i1, __j1, __i); +#endif + // 2. Sort elements in output sequence + std::sort(__i, __j, __comp); + }, + __n1); + return __d_first + __n1; + } + else + { + typedef typename std::iterator_traits<_RandomAccessIterator1>::value_type _T1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::value_type _T2; + __par_backend::__buffer<_T1> __buf(__n1); + _T1* __r = __buf.get(); + + __par_backend::__parallel_stable_sort( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) + { + _RandomAccessIterator1 __it = __first + (__i - __r); + + // 1. Copy elements from input to raw memory + for (_T1* __k = __i; __k != __j; ++__k, ++__it) + { + ::new (__k) _T2(*__it); + } + + // 2. Sort elements in temporary __buffer + if (__n2 < __j - __i) + std::partial_sort(__i, __i + __n2, __j, __comp); + else + std::sort(__i, __j, __comp); + }, + __n2); + + // 3. Move elements from temporary __buffer to output + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, + [__r, __d_first](_T1* __i, _T1* __j) + { __brick_move_destroy()(__i, __j, __d_first + (__i - __r), _IsVector{}); }); + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r + __n2, + __r + __n1, + [](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, _IsVector{}); }); + + return __d_first + __n2; + } + }); +} + +//------------------------------------------------------------------------ +// adjacent_find +//------------------------------------------------------------------------ +template +_RandomAccessIterator +__brick_adjacent_find(_RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred, + /* IsVector = */ std::true_type, bool __or_semantic) noexcept +{ + return __unseq_backend::__simd_adjacent_find(__first, __last, __pred, __or_semantic); +} + +template +_ForwardIterator +__brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, + /* IsVector = */ std::false_type, bool) noexcept +{ + return std::adjacent_find(__first, __last, __pred); +} + +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred, bool __or_semantic) noexcept +{ + return __internal::__brick_adjacent_find(__first, __last, __pred, typename _Tag::__is_vector{}, __or_semantic); +} + +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred, bool __or_semantic) +{ + if (__last - __first < 2) + return __last; + + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, + [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, + _RandomAccessIterator __value) -> _RandomAccessIterator + { + // TODO: investigate performance benefits from the use of shared variable for the result, + // checking (compare_and_swap idiom) its __value at __first. + if (__or_semantic && __value < __last) + { //found + __par_backend::__cancel_execution(); + return __value; + } + + if (__value > __begin) + { + // modify __end to check the predicate on the boundary __values; + // TODO: to use a custom range with boundaries overlapping + // TODO: investigate what if we remove "if" below and run algorithm on range [__first, __last-1) + // then check the pair [__last-1, __last) + if (__end != __last) + ++__end; + + //correct the global result iterator if the "brick" returns a local "__last" + const _RandomAccessIterator __res = + __internal::__brick_adjacent_find(__begin, __end, __pred, _IsVector{}, __or_semantic); + if (__res < __end) + __value = __res; + } + return __value; + }, + [](_RandomAccessIterator __x, _RandomAccessIterator __y) -> _RandomAccessIterator + { return __x < __y ? __x : __y; } //reduce a __value + ); + }); +} + +//------------------------------------------------------------------------ +// nth_element +//------------------------------------------------------------------------ + +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp) noexcept +{ + std::nth_element(__first, __nth, __last, __comp); +} + +template +void +__pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) noexcept +{ + if (__first == __last || __nth == __last) + { + return; + } + + using std::iter_swap; + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp; + _RandomAccessIterator __x; + do + { + __x = __internal::__pattern_partition(__tag, std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }); + --__x; + if (__x != __first) + { + iter_swap(__first, __x); + } + // if x > nth then our new range for partition is [first, x) + if (__x - __nth > 0) + { + __last = __x; + } + // if x < nth then our new range for partition is [x, last) + else if (__x - __nth < 0) + { + // if *x == *nth then we can start new partition with x+1 + if (!__comp(*__nth, *__x) && !__comp(*__x, *__nth)) + { + ++__x; + } + else + { + iter_swap(__nth, __x); + } + __first = __x; + } + } while (__x != __nth); +} + +//------------------------------------------------------------------------ +// fill, fill_n +//------------------------------------------------------------------------ +template +void +__brick_fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, + /* __is_vector = */ std::true_type) noexcept +{ + __unseq_backend::__simd_fill_n(__first, __last - __first, __value); +} + +template +void +__brick_fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, + /* __is_vector = */ std::false_type) noexcept +{ + std::fill(__first, __last, __value); +} + +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept +{ + __internal::__brick_fill(__first, __last, __value, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, const _Tp& __value) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&__exec, __first, __last, &__value]() + { + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](_RandomAccessIterator __begin, _RandomAccessIterator __end) + { __internal::__brick_fill(__begin, __end, __value, _IsVector{}); }); + return __last; + }); +} + +template +_RandomAccessIterator +__brick_fill_n(_RandomAccessIterator __first, _Size __count, const _Tp& __value, + /* __is_vector = */ std::true_type) noexcept +{ + return __unseq_backend::__simd_fill_n(__first, __count, __value); +} + +template +_OutputIterator +__brick_fill_n(_OutputIterator __first, _Size __count, const _Tp& __value, /* __is_vector = */ std::false_type) noexcept +{ + return std::fill_n(__first, __count, __value); +} + +template +_OutputIterator +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value) noexcept +{ + return __internal::__brick_fill_n(__first, __count, __value, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_fill_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, const _Tp& __value) +{ + return __internal::__pattern_fill(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __value); +} + +//------------------------------------------------------------------------ +// generate, generate_n +//------------------------------------------------------------------------ +template +void +__brick_generate(_RandomAccessIterator __first, _RandomAccessIterator __last, _Generator __g, + /* is_vector = */ std::true_type) noexcept +{ + __unseq_backend::__simd_generate_n(__first, __last - __first, __g); +} + +template +void +__brick_generate(_ForwardIterator __first, _ForwardIterator __last, _Generator __g, + /* is_vector = */ std::false_type) noexcept +{ + std::generate(__first, __last, __g); +} + +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) noexcept +{ + __internal::__brick_generate(__first, __last, __g, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Generator __g) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__g](_RandomAccessIterator __begin, _RandomAccessIterator __end) + { __internal::__brick_generate(__begin, __end, __g, _IsVector{}); }); + return __last; + }); +} + +template +_RandomAccessIterator +__brick_generate_n(_RandomAccessIterator __first, Size __count, _Generator __g, + /* is_vector = */ std::true_type) noexcept +{ + return __unseq_backend::__simd_generate_n(__first, __count, __g); +} + +template +OutputIterator +__brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_vector = */ std::false_type) noexcept +{ + return std::generate_n(__first, __count, __g); +} + +template +_OutputIterator +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g) noexcept +{ + return __internal::__brick_generate_n(__first, __count, __g, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_generate_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, _Generator __g) +{ + static_assert(__are_random_access_iterators<_RandomAccessIterator>::value, + "Pattern-brick error. Should be a random access iterator."); + return __internal::__pattern_generate(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __g); +} + +//------------------------------------------------------------------------ +// remove +//------------------------------------------------------------------------ + +template +_ForwardIterator +__brick_remove_if(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, + /* __is_vector = */ std::false_type) noexcept +{ + return std::remove_if(__first, __last, __pred); +} + +template +_RandomAccessIterator +__brick_remove_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred, + /* __is_vector = */ std::true_type) noexcept +{ +#if defined(_PSTL_MONOTONIC_PRESENT) + return __unseq_backend::__simd_remove_if(__first, __last - __first, __pred); +#else + return std::remove_if(__first, __last, __pred); +#endif +} + +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept +{ + return __internal::__brick_remove_if(__first, __last, __pred, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; + + if (__first == __last || __first + 1 == __last) + { + // Trivial sequence - use serial algorithm + return __internal::__brick_remove_if(__first, __last, __pred, _IsVector{}); + } + + return __internal::__remove_elements( + __tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) + { + __internal::__brick_walk2( + __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{}); + }); +} + +//------------------------------------------------------------------------ +// merge +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_merge(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp, + /* __is_vector = */ std::false_type) noexcept +{ + return std::merge(__first1, __last1, __first2, __last2, __d_first, __comp); +} + +template +_RandomAccessIterator3 +__brick_merge(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, _Compare __comp, + /* __is_vector = */ std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::merge(__first1, __last1, __first2, __last2, __d_first, __comp); +} + +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __d_first, + _Compare __comp) noexcept +{ + return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, + typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __d_first, _Compare __comp) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + __par_backend::__parallel_merge( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + __comp, + [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, + _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) + { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); }); + return __d_first + (__last1 - __first1) + (__last2 - __first2); +} + +//------------------------------------------------------------------------ +// inplace_merge +//------------------------------------------------------------------------ +template +void +__brick_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, + _Compare __comp, /* __is_vector = */ std::false_type) noexcept +{ + std::inplace_merge(__first, __middle, __last, __comp); +} + +template +void +__brick_inplace_merge(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, + _Compare __comp, /* __is_vector = */ std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial") + std::inplace_merge(__first, __middle, __last, __comp); +} + +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last, _Compare __comp) noexcept +{ + __internal::__brick_inplace_merge(__first, __middle, __last, __comp, typename _Tag::__is_vector{}); +} + +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__first == __last || __first == __middle || __middle == __last) + { + return; + } + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp; + auto __n = __last - __first; + __par_backend::__buffer<_Tp> __buf(__n); + _Tp* __r = __buf.get(); + __internal::__except_handler( + [&]() + { + auto __move_values = [](_RandomAccessIterator __x, _Tp* __z) + { + __internal::__invoke_if_else( + std::is_trivial<_Tp>(), [&]() { *__z = std::move(*__x); }, + [&]() { ::new (std::addressof(*__z)) _Tp(std::move(*__x)); }); + }; + + auto __move_sequences = [](_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Tp* __first2) + { return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector()); }; + + __par_backend::__parallel_merge( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, + __comp, + [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, + _RandomAccessIterator __f2, _RandomAccessIterator __l2, + _Tp* __f3, _Compare __comp) + { + (__utils::__serial_move_merge(__n))(__f1, __l1, __f2, __l2, __f3, __comp, __move_values, + __move_values, __move_sequences, __move_sequences); + return __f3 + (__l1 - __f1) + (__l2 - __f2); + }); + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, + [__r, __first](_Tp* __i, _Tp* __j) + { __brick_move_destroy()(__i, __j, __first + (__i - __r), _IsVector{}); }); + }); +} + +//------------------------------------------------------------------------ +// includes +//------------------------------------------------------------------------ + +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept +{ + return std::includes(__first1, __last1, __first2, __last2, __comp); +} + +template +bool +__pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__first2 >= __last2) + return true; + + if (__first1 >= __last1 || __comp(*__first2, *__first1) || __comp(*(__last1 - 1), *(__last2 - 1))) + return false; + + __first1 = std::lower_bound(__first1, __last1, *__first2, __comp); + if (__first1 == __last1) + return false; + + if (__last2 - __first2 == 1) + return !__comp(*__first1, *__first2) && !__comp(*__first2, *__first1); + + return __internal::__except_handler( + [&]() + { + return !__internal::__parallel_or( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) + { + _PSTL_ASSERT(__j > __i); + //_PSTL_ASSERT(__j - __i > 1); + + //1. moving boundaries to "consume" subsequence of equal elements + auto __is_equal = [&__comp](_RandomAccessIterator2 __a, _RandomAccessIterator2 __b) -> bool + { return !__comp(*__a, *__b) && !__comp(*__b, *__a); }; + + //1.1 left bound, case "aaa[aaaxyz...]" - searching "x" + if (__i > __first2 && __is_equal(__i, __i - 1)) + { + //whole subrange continues to content equal elements - return "no op" + if (__is_equal(__i, __j - 1)) + return false; + + __i = std::upper_bound(__i, __last2, *__i, __comp); + } + + //1.2 right bound, case "[...aaa]aaaxyz" - searching "x" + if (__j < __last2 && __is_equal(__j - 1, __j)) + __j = std::upper_bound(__j, __last2, *__j, __comp); + + //2. testing is __a subsequence of the second range included into the first range + auto __b = std::lower_bound(__first1, __last1, *__i, __comp); + + _PSTL_ASSERT(!__comp(*(__last1 - 1), *__b)); + _PSTL_ASSERT(!__comp(*(__j - 1), *__i)); + return !std::includes(__b, __last1, __i, __j, __comp); + }); + }); +} + +constexpr auto __set_algo_cut_off = 1000; + +template +_OutputIterator +__parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp; + + struct _SetRange + { + _DifferenceType __pos, __len, __buf_pos; + bool + empty() const + { + return __len == 0; + } + }; + + const _DifferenceType __n1 = __last1 - __first1; + const _DifferenceType __n2 = __last2 - __first2; + + __par_backend::__buffer<_Tp> __buf(__size_func(__n1, __n2)); + + return __internal::__except_handler( + [&__exec, __n1, __first1, __last1, __first2, __last2, __result, __comp, __size_func, __set_op, &__buf]() + { + auto __buffer = __buf.get(); + _DifferenceType __m{}; + auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan + if (!__s.empty()) + __brick_move_destroy()(__buffer + __s.__buf_pos, __buffer + (__s.__buf_pos + __s.__len), + __result + __s.__pos, _IsVector{}); + }; + __par_backend::__parallel_strict_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + //[__b; __e) - a subrange of the first sequence, to reduce + _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); + + //try searching for the first element which not equal to *__b + if (__b != __first1) + __b = std::upper_bound(__b, __last1, *__b, __comp); + + //try searching for the first element which not equal to *__e + if (__e != __last1) + __e = std::upper_bound(__e, __last1, *__e, __comp); + + //check is [__b; __e) empty + if (__e - __b < 1) + { + _ForwardIterator2 __bb = __last2; + if (__b != __last1) + __bb = std::lower_bound(__first2, __last2, *__b, __comp); + + const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); + return _SetRange{0, 0, __buf_pos}; + } + + //try searching for "corresponding" subrange [__bb; __ee) in the second sequence + _ForwardIterator2 __bb = __first2; + if (__b != __first1) + __bb = std::lower_bound(__first2, __last2, *__b, __comp); + + _ForwardIterator2 __ee = __last2; + if (__e != __last1) + __ee = std::lower_bound(__bb, __last2, *__e, __comp); + + const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2)); + auto __buffer_b = __buffer + __buf_pos; + auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp); + + return _SetRange{0, __res - __buffer_b, __buf_pos}; + }, + [](const _SetRange& __a, const _SetRange& __b) { // Combine + if (__b.__buf_pos > __a.__buf_pos || ((__b.__buf_pos == __a.__buf_pos) && !__b.empty())) + return _SetRange{__a.__pos + __a.__len + __b.__pos, __b.__len, __b.__buf_pos}; + return _SetRange{__b.__pos + __b.__len + __a.__pos, __a.__len, __a.__buf_pos}; + }, + __scan, // Scan + [&__m, &__scan](const _SetRange& __total) { // Apex + //final scan + __scan(0, 0, __total); + __m = __total.__pos + __total.__len; + }); + return __result + __m; + }); +} + +//a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' +template +_OutputIterator +__parallel_set_union_op(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp, _SetUnionOp __set_union_op) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + auto copy_range1 = [](_ForwardIterator1 __begin, _ForwardIterator1 __end, _OutputIterator __res) + { return __internal::__brick_copy(__begin, __end, __res, typename _Tag::__is_vector{}); }; + auto copy_range2 = [](_ForwardIterator2 __begin, _ForwardIterator2 __end, _OutputIterator __res) + { return __internal::__brick_copy(__begin, __end, __res, typename _Tag::__is_vector{}); }; + + // {1} {}: parallel copying just first sequence + if (__n2 == 0) + return __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, copy_range1); + + // {} {2}: parallel copying justmake second sequence + if (__n1 == 0) + return __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, copy_range2); + + // testing whether the sequences are intersected + _ForwardIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp); + + if (__left_bound_seq_1 == __last1) + { + //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 + __par_backend::__parallel_invoke( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), + [=] + { + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, copy_range1); + }, + [=] + { + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result + __n1, copy_range2); + }); + return __result + __n1 + __n2; + } + + // testing whether the sequences are intersected + _ForwardIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp); + + if (__left_bound_seq_2 == __last2) + { + //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 + __par_backend::__parallel_invoke( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), + [=] + { + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, copy_range2); + }, + [=] + { + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result + __n2, copy_range1); + }); + return __result + __n1 + __n2; + } + + const auto __m1 = __left_bound_seq_1 - __first1; + if (__m1 > __set_algo_cut_off) + { + auto __res_or = __result; + __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) + __par_backend::__parallel_invoke( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), + //do parallel copying of [first1; left_bound_seq_1) + [=] + { + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __left_bound_seq_1, __res_or, copy_range1); + }, + [=, &__result] + { + __result = __internal::__parallel_set_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); + }); + return __result; + } + + const auto __m2 = __left_bound_seq_2 - __first2; + _PSTL_ASSERT(__m1 == 0 || __m2 == 0); + if (__m2 > __set_algo_cut_off) + { + auto __res_or = __result; + __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) + __par_backend::__parallel_invoke( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), + //do parallel copying of [first2; left_bound_seq_2) + [=] + { + __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, + __left_bound_seq_2, __res_or, copy_range2); + }, + [=, &__result] + { + __result = __internal::__parallel_set_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); + }); + return __result; + } + + return __internal::__parallel_set_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); +} + +//------------------------------------------------------------------------ +// set_union +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_set_union(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + /*__is_vector=*/std::false_type) noexcept +{ + return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +struct __BrickCopyConstruct +{ + template + _OutputIterator + operator()(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result) + { + return __brick_uninitialized_copy(__first, __last, __result, _IsVector()); + } +}; + +template +_OutputIterator +__brick_set_union(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _OutputIterator __result, _Compare __comp, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp) +{ + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // use serial algorithm + if (__n1 + __n2 <= __set_algo_cut_off) + return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); + + typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp; + return __parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) + { + return __pstl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, + __BrickCopyConstruct<_IsVector>()); + }); +} + +//------------------------------------------------------------------------ +// set_intersection +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_set_intersection(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + /*__is_vector=*/std::false_type) noexcept +{ + return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_RandomAccessIterator3 +__brick_set_intersection(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) +{ + typedef typename std::iterator_traits<_RandomAccessIterator3>::value_type _Tp; + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // intersection is empty + if (__n1 == 0 || __n2 == 0) + return __result; + + // testing whether the sequences are intersected + _RandomAccessIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp); + //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty + if (__left_bound_seq_1 == __last1) + return __result; + + // testing whether the sequences are intersected + _RandomAccessIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp); + //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty + if (__left_bound_seq_2 == __last2) + return __result; + + const auto __m1 = __last1 - __left_bound_seq_1 + __n2; + if (__m1 > __set_algo_cut_off) + { + //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) + return __internal::__parallel_set_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { + return __pstl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, __result, + __comp); + }); + } + + const auto __m2 = __last2 - __left_bound_seq_2 + __n1; + if (__m2 > __set_algo_cut_off) + { + //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) + __result = __internal::__parallel_set_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { + return __pstl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, __result, + __comp); + }); + return __result; + } + + // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm + return std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp); +} + +//------------------------------------------------------------------------ +// set_difference +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_set_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + /*__is_vector=*/std::false_type) noexcept +{ + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_RandomAccessIterator3 +__brick_set_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) +{ + typedef typename std::iterator_traits<_RandomAccessIterator3>::value_type _Tp; + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // {} \ {2}: the difference is empty + if (__n1 == 0) + return __result; + + // {1} \ {}: parallel copying just first sequence + if (__n2 == 0) + return __internal::__pattern_walk2_brick( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + [](_RandomAccessIterator1 __begin, _RandomAccessIterator1 __end, _RandomAccessIterator3 __res) + { return __internal::__brick_copy(__begin, __end, __res, _IsVector{}); }); + + // testing whether the sequences are intersected + _RandomAccessIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp); + //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence + if (__left_bound_seq_1 == __last1) + return __internal::__pattern_walk2_brick( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + [](_RandomAccessIterator1 __begin, _RandomAccessIterator1 __end, _RandomAccessIterator3 __res) + { return __internal::__brick_copy(__begin, __end, __res, _IsVector{}); }); + + // testing whether the sequences are intersected + _RandomAccessIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp); + //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence + if (__left_bound_seq_2 == __last2) + return __internal::__pattern_walk2_brick( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + [](_RandomAccessIterator1 __begin, _RandomAccessIterator1 __end, _RandomAccessIterator3 __res) + { return __internal::__brick_copy(__begin, __end, __res, _IsVector{}); }); + + if (__n1 + __n2 > __set_algo_cut_off) + return __parallel_set_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType) { return __n; }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) + { + return __pstl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, + __comp, __BrickCopyConstruct<_IsVector>()); + }); + + // use serial algorithm + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); +} + +//------------------------------------------------------------------------ +// set_symmetric_difference +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_set_symmetric_difference(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + /*__is_vector=*/std::false_type) noexcept +{ + return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_RandomAccessIterator3 +__brick_set_symmetric_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp, + /*__is_vector=*/std::true_type) noexcept +{ + _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial"); + return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); +} + +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept +{ + return __internal::__brick_set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp) +{ + + const auto __n1 = __last1 - __first1; + const auto __n2 = __last2 - __first2; + + // use serial algorithm + if (__n1 + __n2 <= __set_algo_cut_off) + return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); + + typedef typename std::iterator_traits<_RandomAccessIterator3>::value_type _Tp; + return __internal::__parallel_set_union_op( + __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) + { + return __pstl::__utils::__set_symmetric_difference_construct(__first1, __last1, __first2, __last2, __result, + __comp, __BrickCopyConstruct<_IsVector>()); + }); +} + +//------------------------------------------------------------------------ +// is_heap_until +//------------------------------------------------------------------------ + +template +_RandomAccessIterator +__brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + /* __is_vector = */ std::false_type) noexcept +{ + return std::is_heap_until(__first, __last, __comp); +} + +template +_RandomAccessIterator +__brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + /* __is_vector = */ std::true_type) noexcept +{ + if (__last - __first < 2) + return __last; + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; + return __unseq_backend::__simd_first( + __first, _SizeType(0), __last - __first, + [&__comp](_RandomAccessIterator __it, _SizeType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); +} + +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept +{ + return __internal::__brick_is_heap_until(__first, __last, __comp, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp, + /* __is_vector = */ std::false_type) noexcept +{ + _DifferenceType __i = __begin; + for (; __i < __end; ++__i) + { + if (__comp(__first[(__i - 1) / 2], __first[__i])) + { + break; + } + } + return __first + __i; +} + +template +_RandomAccessIterator +__is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp, + /* __is_vector = */ std::true_type) noexcept +{ + return __unseq_backend::__simd_first( + __first, __begin, __end, + [&__comp](_RandomAccessIterator __it, _DifferenceType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); +} + +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__last - __first < 2) + return __last; + + return __internal::__except_handler( + [&]() + { + return __parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, + _IsVector{}); + }, + std::less::difference_type>(), /*is_first=*/true); + }); +} + +//------------------------------------------------------------------------ +// min_element +//------------------------------------------------------------------------ + +template +_ForwardIterator +__brick_min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp, + /* __is_vector = */ std::false_type) noexcept +{ + return std::min_element(__first, __last, __comp); +} + +template +_RandomAccessIterator +__brick_min_element(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + /* __is_vector = */ std::true_type) noexcept +{ +#if defined(_PSTL_UDR_PRESENT) + return __unseq_backend::__simd_min_element(__first, __last - __first, __comp); +#else + return std::min_element(__first, __last, __comp); +#endif +} + +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept +{ + return __internal::__brick_min_element(__first, __last, __comp, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + if (__first == __last) + return __last; + + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, __first, + [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, + _RandomAccessIterator __init) -> _RandomAccessIterator + { + const _RandomAccessIterator subresult = + __internal::__brick_min_element(__begin, __end, __comp, _IsVector{}); + return __internal::__cmp_iterators_by_values(__init, subresult, __comp); + }, + [=](_RandomAccessIterator __it1, _RandomAccessIterator __it2) -> _RandomAccessIterator + { return __internal::__cmp_iterators_by_values(__it1, __it2, __comp); }); + }); +} + +//------------------------------------------------------------------------ +// minmax_element +//------------------------------------------------------------------------ + +template +std::pair<_ForwardIterator, _ForwardIterator> +__brick_minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp, + /* __is_vector = */ std::false_type) noexcept +{ + return std::minmax_element(__first, __last, __comp); +} + +template +std::pair<_RandomAccessIterator, _RandomAccessIterator> +__brick_minmax_element(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + /* __is_vector = */ std::true_type) noexcept +{ +#if defined(_PSTL_UDR_PRESENT) + return __unseq_backend::__simd_minmax_element(__first, __last - __first, __comp); +#else + return std::minmax_element(__first, __last, __comp); +#endif +} + +template +std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept +{ + return __internal::__brick_minmax_element(__first, __last, __comp, typename _Tag::__is_vector{}); +} + +template +std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) +{ + if (__first == __last) + return std::make_pair(__first, __first); + + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler([&]() { + typedef std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; + + return __par_backend::__parallel_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + std::make_pair(__first, __first), + [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result + { + const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{}); + return std::make_pair( + __internal::__cmp_iterators_by_values(__subresult.first, __init.first, __comp), + __internal::__cmp_iterators_by_values(__init.second, __subresult.second, std::not_fn(__comp))); + }, + [=](_Result __p1, _Result __p2) -> _Result + { + return std::make_pair( + __internal::__cmp_iterators_by_values(__p1.first, __p2.first, __comp), + __internal::__cmp_iterators_by_values(__p2.second, __p1.second, std::not_fn(__comp))); + }); + }); +} + +//------------------------------------------------------------------------ +// mismatch +//------------------------------------------------------------------------ +template +std::pair<_ForwardIterator1, _ForwardIterator2> +__mismatch_serial(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __pred) +{ +#if defined(_PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT) + return std::mismatch(__first1, __last1, __first2, __last2, __pred); +#else + for (; __first1 != __last1 && __first2 != __last2 && __pred(*__first1, *__first2); ++__first1, ++__first2) + { + } + return std::make_pair(__first1, __first2); +#endif +} + +template +std::pair<_ForwardIterator1, _ForwardIterator2> +__brick_mismatch(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _Predicate __pred, /* __is_vector = */ std::false_type) noexcept +{ + return __mismatch_serial(__first1, __last1, __first2, __last2, __pred); +} + +template +std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__brick_mismatch(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _Predicate __pred, /* __is_vector = */ std::true_type) noexcept +{ + auto __n = std::min(__last1 - __first1, __last2 - __first2); + return __unseq_backend::__simd_first(__first1, __n, __first2, std::not_fn(__pred)); +} + +template +std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred) noexcept +{ + return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, typename _Tag::__is_vector{}); +} + +template +std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Predicate __pred) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler([&]() { + auto __n = std::min(__last1 - __first1, __last2 - __first2); + auto __result = __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { + return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + __pred, _IsVector{}) + .first; + }, + std::less::difference_type>(), /*is_first=*/true); + return std::make_pair(__result, __first2 + (__result - __first1)); + }); +} + +//------------------------------------------------------------------------ +// lexicographical_compare +//------------------------------------------------------------------------ + +template +bool +__brick_lexicographical_compare(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _Compare __comp, + /* __is_vector = */ std::false_type) noexcept +{ + return std::lexicographical_compare(__first1, __last1, __first2, __last2, __comp); +} + +template +bool +__brick_lexicographical_compare(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp, + /* __is_vector = */ std::true_type) noexcept +{ + if (__first2 == __last2) + { // if second sequence is empty + return false; + } + else if (__first1 == __last1) + { // if first sequence is empty + return true; + } + else + { + typedef typename std::iterator_traits<_RandomAccessIterator1>::reference ref_type1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::reference ref_type2; + --__last1; + --__last2; + auto __n = std::min(__last1 - __first1, __last2 - __first2); + std::pair<_RandomAccessIterator1, _RandomAccessIterator2> __result = __unseq_backend::__simd_first( + __first1, __n, __first2, [__comp](const ref_type1 __x, const ref_type2 __y) mutable { + return __comp(__x, __y) || __comp(__y, __x); + }); + + if (__result.first == __last1 && __result.second != __last2) + { // if first sequence shorter than second + return !__comp(*__result.second, *__result.first); + } + else + { // if second sequence shorter than first or both have the same number of elements + return __comp(*__result.first, *__result.second); + } + } +} + +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept +{ + return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, + typename _Tag::__is_vector{}); +} + +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) noexcept +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + if (__first2 == __last2) + { // if second sequence is empty + return false; + } + else if (__first1 == __last1) + { // if first sequence is empty + return true; + } + else + { + typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _RefType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _RefType2; + --__last1; + --__last2; + auto __n = std::min(__last1 - __first1, __last2 - __first2); + auto __result = __internal::__parallel_find( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, &__comp](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) + { + return __internal::__brick_mismatch( + __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + [&__comp](const _RefType1 __x, const _RefType2 __y) + { return !__comp(__x, __y) && !__comp(__y, __x); }, + _IsVector{}) + .first; + }, + std::less::difference_type>(), /*is_first=*/true); + + if (__result == __last1 && __first2 + (__result - __first1) != __last2) + { // if first sequence shorter than second + return !__comp(*(__first2 + (__result - __first1)), *__result); + } + else + { // if second sequence shorter than first or both have the same number of elements + return __comp(*__result, *(__first2 + (__result - __first1))); + } + } +} + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_ALGORITHM_IMPL_H */ diff --git a/Components/Include/pstl/internal/execution_defs.h b/Components/Include/pstl/internal/execution_defs.h new file mode 100644 index 0000000..d7c4126 --- /dev/null +++ b/Components/Include/pstl/internal/execution_defs.h @@ -0,0 +1,100 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_EXECUTION_POLICY_DEFS_H +#define _PSTL_EXECUTION_POLICY_DEFS_H + +#include + +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace execution +{ +inline namespace v1 +{ + +// 2.4, Sequential execution policy +class sequenced_policy +{ +}; + +// 2.5, Parallel execution policy +class parallel_policy +{ +}; + +// 2.6, Parallel+Vector execution policy +class parallel_unsequenced_policy +{ +}; + +class unsequenced_policy +{ +}; + +// 2.8, Execution policy objects +constexpr sequenced_policy seq{}; +constexpr parallel_policy par{}; +constexpr parallel_unsequenced_policy par_unseq{}; +constexpr unsequenced_policy unseq{}; + +// 2.3, Execution policy type trait +template +struct is_execution_policy : std::false_type +{ +}; + +template <> +struct is_execution_policy<__pstl::execution::sequenced_policy> : std::true_type +{ +}; +template <> +struct is_execution_policy<__pstl::execution::parallel_policy> : std::true_type +{ +}; +template <> +struct is_execution_policy<__pstl::execution::parallel_unsequenced_policy> : std::true_type +{ +}; +template <> +struct is_execution_policy<__pstl::execution::unsequenced_policy> : std::true_type +{ +}; + +#if defined(_PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT) +template +constexpr bool is_execution_policy_v = __pstl::execution::is_execution_policy::value; +#endif + +} // namespace v1 +} // namespace execution + +namespace __internal +{ +template +using __enable_if_execution_policy = + typename std::enable_if<__pstl::execution::is_execution_policy::type>::value, + T>::type; + +template +struct __serial_tag; +template +struct __parallel_tag; + +} // namespace __internal + +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_EXECUTION_POLICY_DEFS_H */ diff --git a/Components/Include/pstl/internal/execution_impl.h b/Components/Include/pstl/internal/execution_impl.h new file mode 100644 index 0000000..5dc622b --- /dev/null +++ b/Components/Include/pstl/internal/execution_impl.h @@ -0,0 +1,105 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_EXECUTION_IMPL_H +#define _PSTL_EXECUTION_IMPL_H + +#include +#include + +#include "pstl_config.h" +#include "execution_defs.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +template +using __are_iterators_of = std::conjunction< + std::is_base_of<_IteratorTag, typename std::iterator_traits>::iterator_category>...>; + +template +using __are_random_access_iterators = __are_iterators_of; + +struct __serial_backend_tag +{ +}; +struct __tbb_backend_tag +{ +}; +struct __openmp_backend_tag +{ +}; + +#if defined(_PSTL_PAR_BACKEND_TBB) +using __par_backend_tag = __tbb_backend_tag; +#elif defined(_PSTL_PAR_BACKEND_OPENMP) +using __par_backend_tag = __openmp_backend_tag; +#elif defined(_PSTL_PAR_BACKEND_SERIAL) +using __par_backend_tag = __serial_backend_tag; +#else +# error "A parallel backend must be specified"; +#endif + +template +struct __serial_tag +{ + using __is_vector = _IsVector; +}; + +template +struct __parallel_tag +{ + using __is_vector = _IsVector; + // backend tag can be change depending on + // TBB availability in the environment + using __backend_tag = __par_backend_tag; +}; + +template +using __tag_type = typename std::conditional<__internal::__are_random_access_iterators<_IteratorTypes...>::value, + __parallel_tag<_IsVector>, __serial_tag<_IsVector>>::type; + +template +__serial_tag +__select_backend(__pstl::execution::sequenced_policy, _IteratorTypes&&...) +{ + return {}; +} + +template +__serial_tag<__internal::__are_random_access_iterators<_IteratorTypes...>> +__select_backend(__pstl::execution::unsequenced_policy, _IteratorTypes&&...) +{ + return {}; +} + +template +__tag_type +__select_backend(__pstl::execution::parallel_policy, _IteratorTypes&&...) +{ + return {}; +} + +template +__tag_type<__internal::__are_random_access_iterators<_IteratorTypes...>, _IteratorTypes...> +__select_backend(__pstl::execution::parallel_unsequenced_policy, _IteratorTypes&&...) +{ + return {}; +} + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_EXECUTION_IMPL_H */ diff --git a/Components/Include/pstl/internal/glue_algorithm_defs.h b/Components/Include/pstl/internal/glue_algorithm_defs.h new file mode 100644 index 0000000..28a7f92 --- /dev/null +++ b/Components/Include/pstl/internal/glue_algorithm_defs.h @@ -0,0 +1,558 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_ALGORITHM_DEFS_H +#define _PSTL_GLUE_ALGORITHM_DEFS_H + +#include +#include + +#include "execution_defs.h" +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace std +{ + +// [alg.any_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred); + +// [alg.all_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +all_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred); + +// [alg.none_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +none_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred); + +// [alg.foreach] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f); + +// [alg.find] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +find_if_not(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value); + +// [alg.find.end] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last); + +// [alg.find_first_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last); + +// [alg.adjacent_find] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred); + +// [alg.count] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, + typename iterator_traits<_ForwardIterator>::difference_type> +count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, + typename iterator_traits<_ForwardIterator>::difference_type> +count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred); + +// [alg.search] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value, _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value); + +// [alg.copy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _ForwardIterator2 __result); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 result, + _Predicate __pred); + +// [alg.swap] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2); + +// [alg.transform] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + _UnaryOperation __op); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator __result, _BinaryOperation __op); + +// [alg.replace] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, + const _Tp& __new_value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +replace(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_value, + const _Tp& __new_value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +replace_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + const _Tp& __old_value, const _Tp& __new_value); + +// [alg.fill] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const _Tp& __value); + +// [alg.generate] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size count, _Generator __g); + +// [alg.remove] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +remove_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Predicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +remove_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + const _Tp& __value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +remove(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value); + +// [alg.unique] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result); + +// [alg.reverse] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, + _ForwardIterator __d_first); + +// [alg.rotate] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last, + _ForwardIterator2 __result); + +// [alg.partitions] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _BidirectionalIterator> +stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, + _UnaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +partition_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _ForwardIterator1 __out_true, _ForwardIterator2 __out_false, _UnaryPredicate __pred); + +// [alg.sort] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last); + +// [stable.sort] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last); + +// [mismatch] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _BinaryPredicate __pred); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2); + +// [alg.equal] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _BinaryPredicate __p); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __p); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2); + +// [alg.move] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first); + +// [partial.sort] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last); + +// [partial.sort.copy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last); + +// [is.sorted] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +// [alg.nth.element] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last); + +// [alg.merge] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __d_first); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last); + +// [includes] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2); + +// [set.union] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __result); + +// [set.intersection] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result); + +// [set.difference] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result); + +// [set.symmetric.difference] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator result, + _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result); + +// [is.heap] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last); + +// [alg.min.max] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>> +minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>> +minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +// [alg.lex.comparison] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2); + +} // namespace std + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_GLUE_ALGORITHM_DEFS_H */ diff --git a/Components/Include/pstl/internal/glue_algorithm_impl.h b/Components/Include/pstl/internal/glue_algorithm_impl.h new file mode 100644 index 0000000..fb0c19d --- /dev/null +++ b/Components/Include/pstl/internal/glue_algorithm_impl.h @@ -0,0 +1,1108 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_ALGORITHM_IMPL_H +#define _PSTL_GLUE_ALGORITHM_IMPL_H + +#include + +#include "pstl_config.h" + +#include "execution_defs.h" +#include "utils.h" +#include "algorithm_fwd.h" +#include "numeric_fwd.h" /* count and count_if use __pattern_transform_reduce */ + +#include "execution_impl.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace std +{ + +// [alg.any_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_any_of(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pred); +} + +// [alg.all_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +all_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) +{ + return !std::any_of(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::not_fn(__pred)); +} + +// [alg.none_of] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +none_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +{ + return !std::any_of(std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); +} + +// [alg.foreach] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_walk1_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, + __f); +} + +// [alg.find] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_find_if(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +find_if_not(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +{ + return std::find_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::not_fn(__pred)); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) +{ + return std::find_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pstl::__internal::__equal_value<_Tp>(__value)); +} + +// [alg.find.end] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __s_first); + + return __pstl::__internal::__pattern_find_end(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __s_first, __s_last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last) +{ + return std::find_end(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, + std::equal_to<>()); +} + +// [alg.find_first_of] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __s_first); + + return __pstl::__internal::__pattern_find_first_of(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __s_first, __s_last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last) +{ + return std::find_first_of(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, + std::equal_to<>()); +} + +// [alg.adjacent_find] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + return __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, std::equal_to<_ValueType>(), /*first_semantic*/ false); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred, /*first_semantic*/ false); +} + +// [alg.count] + +// Implementation note: count and count_if call the pattern directly instead of calling std::transform_reduce +// so that we do not have to include . + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, + typename iterator_traits<_ForwardIterator>::difference_type> +count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + return __pstl::__internal::__pattern_count(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](const _ValueType& __x) { return __value == __x; }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, + typename iterator_traits<_ForwardIterator>::difference_type> +count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_count(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pred); +} + +// [alg.search] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __s_first); + + return __pstl::__internal::__pattern_search(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __s_last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1> +search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last) +{ + return std::search(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, std::equal_to<>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_search_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __count, __value, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value) +{ + return std::search_n(std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value, + std::equal_to::value_type>()); +} + +// [alg.copy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__pattern_walk2_brick( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + [](_ForwardIterator1 __begin, _ForwardIterator1 __end, _ForwardIterator2 __res) + { return __pstl::__internal::__brick_copy(__begin, __end, __res, __is_vector{}); }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _ForwardIterator2 __result) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__pattern_walk2_brick_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + [](_ForwardIterator1 __begin, _Size __sz, _ForwardIterator2 __res) + { return __pstl::__internal::__brick_copy_n(__begin, __sz, __res, __is_vector{}); }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + _Predicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_copy_if(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __pred); +} + +// [alg.swap] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2) +{ + typedef typename iterator_traits<_ForwardIterator1>::reference _ReferenceType1; + typedef typename iterator_traits<_ForwardIterator2>::reference _ReferenceType2; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + return __pstl::__internal::__pattern_walk2(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, + [](_ReferenceType1 __x, _ReferenceType2 __y) + { + using std::swap; + swap(__x, __y); + }); +} + +// [alg.transform] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + _UnaryOperation __op) +{ + typedef typename iterator_traits<_ForwardIterator1>::reference _InputType; + typedef typename iterator_traits<_ForwardIterator2>::reference _OutputType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_walk2(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, + [__op](_InputType __x, _OutputType __y) mutable { __y = __op(__x); }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator __result, _BinaryOperation __op) +{ + typedef typename iterator_traits<_ForwardIterator1>::reference _Input1Type; + typedef typename iterator_traits<_ForwardIterator2>::reference _Input2Type; + typedef typename iterator_traits<_ForwardIterator>::reference _OutputType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return __pstl::__internal::__pattern_walk3( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + [__op](_Input1Type x, _Input2Type y, _OutputType z) mutable { z = __op(x, y); }); +} + +// [alg.replace] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, + const _Tp& __new_value) +{ + typedef typename iterator_traits<_ForwardIterator>::reference _ElementType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__pred, &__new_value](_ElementType __elem) + { + if (__pred(__elem)) + { + __elem = __new_value; + } + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +replace(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_value, + const _Tp& __new_value) +{ + std::replace_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pstl::__internal::__equal_value<_Tp>(__old_value), __new_value); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value) +{ + typedef typename iterator_traits<_ForwardIterator1>::reference _InputType; + typedef typename iterator_traits<_ForwardIterator2>::reference _OutputType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_walk2( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + [__pred, &__new_value](_InputType __x, _OutputType __y) mutable { __y = __pred(__x) ? __new_value : __x; }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +replace_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + const _Tp& __old_value, const _Tp& __new_value) +{ + return std::replace_copy_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __pstl::__internal::__equal_value<_Tp>(__old_value), __new_value); +} + +// [alg.fill] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_fill(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __value); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const _Tp& __value) +{ + if (__count <= 0) + return __first; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_fill_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __count, __value); +} + +// [alg.generate] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_generate(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __g); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _Generator __g) +{ + if (__count <= 0) + return __first; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_generate_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __count, __g); +} + +// [alg.remove] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +remove_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Predicate __pred) +{ + return std::copy_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, std::not_fn(__pred)); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +remove_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + const _Tp& __value) +{ + return std::copy_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __pstl::__internal::__not_equal_value<_Tp>(__value)); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_remove_if(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +remove(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) +{ + return std::remove_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pstl::__internal::__equal_value<_Tp>(__value)); +} + +// [alg.unique] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_unique(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + return std::unique(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::equal_to<>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, + _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_unique_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result) +{ + return std::unique_copy(__exec, __first, __last, __result, std::equal_to<>()); +} + +// [alg.reverse] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_reverse(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, + _ForwardIterator __d_first) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first); + + return __pstl::__internal::__pattern_reverse_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __d_first); +} + +// [alg.rotate] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_rotate(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last, + _ForwardIterator2 __result) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_rotate_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __result); +} + +// [alg.partitions] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_is_partitioned(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_partition(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _BidirectionalIterator> +stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, + _UnaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_stable_partition(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +partition_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _ForwardIterator1 __out_true, _ForwardIterator2 __out_false, _UnaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __out_true, __out_false); + + return __pstl::__internal::__pattern_partition_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __out_true, __out_false, __pred); +} + +// [alg.sort] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + typedef typename iterator_traits<_RandomAccessIterator>::value_type _InputType; + return __pstl::__internal::__pattern_sort(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __comp, typename std::is_move_constructible<_InputType>::type()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last) +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType; + std::sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>()); +} + +// [stable.sort] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_stable_sort(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last) +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType; + std::stable_sort(__exec, __first, __last, std::less<_InputType>()); +} + +// [mismatch] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __pred) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + return __pstl::__internal::__pattern_mismatch(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _BinaryPredicate __pred) +{ + return std::mismatch(__exec, __first1, __last1, __first2, std::next(__first2, std::distance(__first1, __last1)), + __pred); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2) +{ + return std::mismatch(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, + std::equal_to<>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>> +mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) +{ + //TODO: to get rid of "distance" + return std::mismatch(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + std::next(__first2, std::distance(__first1, __last1))); +} + +// [alg.equal] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _BinaryPredicate __p) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + return __pstl::__internal::__pattern_equal(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __p); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) +{ + return std::equal(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, std::equal_to<>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _BinaryPredicate __p) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + return __pstl::__internal::__pattern_equal(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __p); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2) +{ + return equal(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, std::equal_to<>()); +} + +// [alg.move] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__pattern_walk2_brick( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, + [](_ForwardIterator1 __begin, _ForwardIterator1 __end, _ForwardIterator2 __res) + { return __pstl::__internal::__brick_move(__begin, __end, __res, __is_vector{}); }); +} + +// [partial.sort] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_partial_sort(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last) +{ + typedef typename iterator_traits<_RandomAccessIterator>::value_type _InputType; + std::partial_sort(__exec, __first, __middle, __last, std::less<_InputType>()); +} + +// [partial.sort.copy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first); + + return __pstl::__internal::__pattern_partial_sort_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first, __last, __d_first, __d_last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last) +{ + return std::partial_sort_copy(std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, + std::less<>()); +} + +// [is.sorted] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + const _ForwardIterator __res = + __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pstl::__internal::__reorder_pred<_Compare>(__comp), + /*first_semantic*/ false); + return __res == __last ? __last : std::next(__res); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType; + return is_sorted_until(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pstl::__internal::__reorder_pred<_Compare>(__comp), + /*or_semantic*/ true) == __last; +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType; + return std::is_sorted(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>()); +} + +// [alg.merge] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __d_first); + + return __pstl::__internal::__pattern_merge(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __d_first, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __d_first) +{ + return std::merge(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + std::less<>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_inplace_merge(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last) +{ + typedef typename std::iterator_traits<_BidirectionalIterator>::value_type _InputType; + std::inplace_merge(__exec, __first, __middle, __last, std::less<_InputType>()); +} + +// [includes] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + return __pstl::__internal::__pattern_includes(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2) +{ + return std::includes(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, std::less<>()); +} + +// [set.union] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return __pstl::__internal::__pattern_set_union(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __result, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _ForwardIterator __result) +{ + return std::set_union(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + std::less<>()); +} + +// [set.intersection] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return __pstl::__internal::__pattern_set_intersection(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result) +{ + return std::set_intersection(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + std::less<>()); +} + +// [set.difference] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return __pstl::__internal::__pattern_set_difference(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result) +{ + return std::set_difference(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + std::less<>()); +} + +// [set.symmetric.difference] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, + _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return __pstl::__internal::__pattern_set_symmetric_difference( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result) +{ + return std::set_symmetric_difference(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, + __result, std::less<>()); +} + +// [is.heap] +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__pattern_is_heap_until(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> +is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last) +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType; + return std::is_heap_until(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) +{ + return std::is_heap_until(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp) == __last; +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last) +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType; + return std::is_heap(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>()); +} + +// [alg.min.max] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_min_element(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType; + return std::min_element(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) +{ + return min_element(std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pstl::__internal::__reorder_pred<_Compare>(__comp)); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType; + return std::min_element(std::forward<_ExecutionPolicy>(__exec), __first, __last, + __pstl::__internal::__reorder_pred>(std::less<_InputType>())); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>> +minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_minmax_element(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>> +minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + return std::minmax_element(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_ValueType>()); +} + +// [alg.nth.element] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__pattern_nth_element(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __nth, + __last, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last) +{ + typedef typename iterator_traits<_RandomAccessIterator>::value_type _InputType; + std::nth_element(std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, std::less<_InputType>()); +} + +// [alg.lex.comparison] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + return __pstl::__internal::__pattern_lexicographical_compare(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __comp); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> +lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2) +{ + return std::lexicographical_compare(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, + std::less<>()); +} + +} // namespace std + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_GLUE_ALGORITHM_IMPL_H */ diff --git a/Components/Include/pstl/internal/glue_execution_defs.h b/Components/Include/pstl/internal/glue_execution_defs.h new file mode 100644 index 0000000..df9a477 --- /dev/null +++ b/Components/Include/pstl/internal/glue_execution_defs.h @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_EXECUTION_DEFS_H +#define _PSTL_GLUE_EXECUTION_DEFS_H + +#include + +#include "execution_defs.h" +#include "pstl_config.h" + +namespace std +{ +// Type trait +using __pstl::execution::is_execution_policy; +#if defined(_PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT) +# if defined(__INTEL_COMPILER) +template +constexpr bool is_execution_policy_v = is_execution_policy::value; +# else +using __pstl::execution::is_execution_policy_v; +# endif +#endif + +namespace execution +{ +// Standard C++ policy classes +using __pstl::execution::parallel_policy; +using __pstl::execution::parallel_unsequenced_policy; +using __pstl::execution::sequenced_policy; + +// Standard predefined policy instances +using __pstl::execution::par; +using __pstl::execution::par_unseq; +using __pstl::execution::seq; + +// Implementation-defined names +// Unsequenced policy is not yet standard, but for consistency +// we include it into namespace std::execution as well +using __pstl::execution::unseq; +using __pstl::execution::unsequenced_policy; +} // namespace execution +} // namespace std + +#include "algorithm_impl.h" +#include "numeric_impl.h" +#include "parallel_backend.h" + +#endif /* _PSTL_GLUE_EXECUTION_DEFS_H */ diff --git a/Components/Include/pstl/internal/glue_memory_defs.h b/Components/Include/pstl/internal/glue_memory_defs.h new file mode 100644 index 0000000..ae52333 --- /dev/null +++ b/Components/Include/pstl/internal/glue_memory_defs.h @@ -0,0 +1,85 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_MEMORY_DEFS_H +#define _PSTL_GLUE_MEMORY_DEFS_H + +#include "execution_defs.h" +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace std +{ + +// [uninitialized.copy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result); + +// [uninitialized.move] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result); + +// [uninitialized.fill] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, const _Tp& __value); + +// [specialized.destroy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n); + +// [uninitialized.construct.default] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n); + +// [uninitialized.construct.value] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n); + +} // namespace std + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_GLUE_MEMORY_DEFS_H */ diff --git a/Components/Include/pstl/internal/glue_memory_impl.h b/Components/Include/pstl/internal/glue_memory_impl.h new file mode 100644 index 0000000..39c595d --- /dev/null +++ b/Components/Include/pstl/internal/glue_memory_impl.h @@ -0,0 +1,352 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_MEMORY_IMPL_H +#define _PSTL_GLUE_MEMORY_IMPL_H + +#include "pstl_config.h" + +#include "execution_defs.h" +#include "utils.h" +#include "algorithm_fwd.h" + +#include "execution_impl.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace std +{ + +// [uninitialized.copy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result) +{ + typedef typename iterator_traits<_InputIterator>::value_type _ValueType1; + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2; + typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__invoke_if_else( + std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (), + [&]() + { + return __pstl::__internal::__pattern_walk2_brick( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + [](_InputIterator __begin, _InputIterator __end, _ForwardIterator __res) + { return __pstl::__internal::__brick_copy(__begin, __end, __res, __is_vector{}); }); + }, + [&]() + { + return __pstl::__internal::__pattern_walk2(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, + [](_ReferenceType1 __val1, _ReferenceType2 __val2) + { ::new (std::addressof(__val2)) _ValueType2(__val1); }); + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result) +{ + typedef typename iterator_traits<_InputIterator>::value_type _ValueType1; + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2; + typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__invoke_if_else( + std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (), + [&]() + { + return __pstl::__internal::__pattern_walk2_brick_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + [](_InputIterator __begin, _Size __sz, _ForwardIterator __res) + { return __pstl::__internal::__brick_copy_n(__begin, __sz, __res, __is_vector{}); }); + }, + [&]() + { + return __pstl::__internal::__pattern_walk2_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first, __n, __result, + [](_ReferenceType1 __val1, _ReferenceType2 __val2) + { ::new (std::addressof(__val2)) _ValueType2(__val1); }); + }); +} + +// [uninitialized.move] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result) +{ + typedef typename iterator_traits<_InputIterator>::value_type _ValueType1; + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2; + typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__invoke_if_else( + std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (), + [&]() + { + return __pstl::__internal::__pattern_walk2_brick( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + [](_InputIterator __begin, _InputIterator __end, _ForwardIterator __res) + { return __pstl::__internal::__brick_copy(__begin, __end, __res, __is_vector{}); }); + }, + [&]() + { + return __pstl::__internal::__pattern_walk2( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + [](_ReferenceType1 __val1, _ReferenceType2 __val2) + { ::new (std::addressof(__val2)) _ValueType2(std::move(__val1)); }); + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result) +{ + typedef typename iterator_traits<_InputIterator>::value_type _ValueType1; + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2; + typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__invoke_if_else( + std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (), + [&]() + { + return __pstl::__internal::__pattern_walk2_brick_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + [](_InputIterator __begin, _Size __sz, _ForwardIterator __res) + { return __pstl::__internal::__brick_copy_n(__begin, __sz, __res, __is_vector{}); }); + }, + [&]() + { + return __pstl::__internal::__pattern_walk2_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + [](_ReferenceType1 __val1, _ReferenceType2 __val2) + { ::new (std::addressof(__val2)) _ValueType2(std::move(__val1)); }); + }); +} + +// [uninitialized.fill] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + __pstl::__internal::__invoke_if_else( + std::is_arithmetic<_ValueType>(), + [&]() + { + __pstl::__internal::__pattern_walk_brick( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](_ForwardIterator __begin, _ForwardIterator __end) + { __pstl::__internal::__brick_fill(__begin, __end, _ValueType(__value), __is_vector{}); }); + }, + [&]() + { + __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](_ReferenceType __val) + { ::new (std::addressof(__val)) _ValueType(__value); }); + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, const _Tp& __value) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__invoke_if_else( + std::is_arithmetic<_ValueType>(), + [&]() + { + return __pstl::__internal::__pattern_walk_brick_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, + [&__value](_ForwardIterator __begin, _Size __count) + { return __pstl::__internal::__brick_fill_n(__begin, __count, _ValueType(__value), __is_vector{}); }); + }, + [&]() + { + return __pstl::__internal::__pattern_walk1_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, + [&__value](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType(__value); }); + }); +} + +// [specialized.destroy] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__invoke_if_not(std::is_trivially_destructible<_ValueType>(), + [&]() + { + __pstl::__internal::__pattern_walk1( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [](_ReferenceType __val) { __val.~_ValueType(); }); + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__invoke_if_else( + std::is_trivially_destructible<_ValueType>(), [&]() { return std::next(__first, __n); }, + [&]() + { + return __pstl::__internal::__pattern_walk1_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first, __n, + [](_ReferenceType __val) { __val.~_ValueType(); }); + }); +} + +// [uninitialized.construct.default] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + __pstl::__internal::__invoke_if_not(std::is_trivial<_ValueType>(), + [&]() + { + __pstl::__internal::__pattern_walk1( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType; }); + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + return __pstl::__internal::__invoke_if_else( + std::is_trivial<_ValueType>(), [&]() { return std::next(__first, __n); }, + [&]() + { + return __pstl::__internal::__pattern_walk1_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, + [](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType; }); + }); +} + +// [uninitialized.construct.value] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void> +uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + __pstl::__internal::__invoke_if_else( + std::is_trivial<_ValueType>(), + [&]() + { + __pstl::__internal::__pattern_walk_brick( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [](_ForwardIterator __begin, _ForwardIterator __end) + { __pstl::__internal::__brick_fill(__begin, __end, _ValueType(), __is_vector{}); }); + }, + [&]() + { + __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [](_ReferenceType __val) + { ::new (std::addressof(__val)) _ValueType(); }); + }); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> +uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + + using __is_vector = typename decltype(__dispatch_tag)::__is_vector; + + return __pstl::__internal::__invoke_if_else( + std::is_trivial<_ValueType>(), + [&]() + { + return __pstl::__internal::__pattern_walk_brick_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, + [](_ForwardIterator __begin, _Size __count) + { return __pstl::__internal::__brick_fill_n(__begin, __count, _ValueType(), __is_vector{}); }); + }, + [&]() + { + return __pstl::__internal::__pattern_walk1_n( + __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, + [](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType(); }); + }); +} + +} // namespace std + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_GLUE_MEMORY_IMPL_H */ diff --git a/Components/Include/pstl/internal/glue_numeric_defs.h b/Components/Include/pstl/internal/glue_numeric_defs.h new file mode 100644 index 0000000..86cd38b --- /dev/null +++ b/Components/Include/pstl/internal/glue_numeric_defs.h @@ -0,0 +1,124 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_NUMERIC_DEFS_H +#define _PSTL_GLUE_NUMERIC_DEFS_H + +#include + +#include "execution_defs.h" +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace std +{ +// [reduce] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, + typename iterator_traits<_ForwardIterator>::value_type> +reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Tp __init); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, + _BinaryOperation2 __binary_op2); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op); + +// [exclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Tp __init); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op); + +// [inclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op, _Tp __init); + +// [transform.exclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op); + +// [transform.inclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op, + _Tp __init); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op); + +// [adjacent.difference] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __d_first, _BinaryOperation op); + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __d_first); + +} // namespace std + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_GLUE_NUMERIC_DEFS_H */ diff --git a/Components/Include/pstl/internal/glue_numeric_impl.h b/Components/Include/pstl/internal/glue_numeric_impl.h new file mode 100644 index 0000000..ad268b5 --- /dev/null +++ b/Components/Include/pstl/internal/glue_numeric_impl.h @@ -0,0 +1,232 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_GLUE_NUMERIC_IMPL_H +#define _PSTL_GLUE_NUMERIC_IMPL_H + +#include + +#include "pstl_config.h" + +#include "utils.h" +#include "numeric_fwd.h" +#include "execution_impl.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace std +{ + +// [reduce] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op) +{ + return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, + __pstl::__internal::__no_op()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init) +{ + return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, std::plus<_Tp>(), + __pstl::__internal::__no_op()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, + typename iterator_traits<_ForwardIterator>::value_type> +reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) +{ + typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType; + return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, _ValueType{}, + std::plus<_ValueType>(), __pstl::__internal::__no_op()); +} + +// [transform.reduce] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Tp __init) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + + typedef typename iterator_traits<_ForwardIterator1>::value_type _InputType; + return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __init, std::plus<_InputType>(), + std::multiplies<_InputType>()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2); + return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> +transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first); + return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first, __last, __init, __binary_op, __unary_op); +} + +// [exclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Tp __init) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using namespace __pstl; + return __internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __pstl::__internal::__no_op(), __init, std::plus<_Tp>(), + /*inclusive=*/std::false_type()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + using namespace __pstl; + return __internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __pstl::__internal::__no_op(), __init, __binary_op, + /*inclusive=*/std::false_type()); +} + +// [inclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result) +{ + typedef typename iterator_traits<_ForwardIterator1>::value_type _InputType; + return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + std::plus<_InputType>(), __pstl::__internal::__no_op()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op) +{ + return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __binary_op, + __pstl::__internal::__no_op()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op, _Tp __init) +{ + return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __binary_op, + __pstl::__internal::__no_op(), __init); +} + +// [transform.exclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/std::false_type()); +} + +// [transform.inclusive.scan] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op, + _Tp __init) +{ + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result); + + return __pstl::__internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/std::true_type()); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op) +{ + if (__first != __last) + { + auto __tmp = __unary_op(*__first); + *__result = __tmp; + return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, + __binary_op, __unary_op, __tmp); + } + else + { + return __result; + } +} + +// [adjacent.difference] + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __d_first, _BinaryOperation __op) +{ + + if (__first == __last) + return __d_first; + + auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first); + + return __pstl::__internal::__pattern_adjacent_difference(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), + __first, __last, __d_first, __op); +} + +template +__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2> +adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __d_first) +{ + typedef typename iterator_traits<_ForwardIterator1>::value_type _ValueType; + return adjacent_difference(std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, + std::minus<_ValueType>()); +} + +} // namespace std + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_GLUE_NUMERIC_IMPL_H_ */ diff --git a/Components/Include/pstl/internal/memory_impl.h b/Components/Include/pstl/internal/memory_impl.h new file mode 100644 index 0000000..942a30e --- /dev/null +++ b/Components/Include/pstl/internal/memory_impl.h @@ -0,0 +1,112 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_MEMORY_IMPL_H +#define _PSTL_MEMORY_IMPL_H + +#include + +#include "pstl_config.h" +#include "unseq_backend_simd.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +//------------------------------------------------------------------------ +// uninitialized_move +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_uninitialized_move(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + /*vector=*/std::false_type) noexcept +{ + using _ValueType = typename std::iterator_traits<_OutputIterator>::value_type; + for (; __first != __last; ++__first, ++__result) + { + ::new (std::addressof(*__result)) _ValueType(std::move(*__first)); + } + return __result; +} + +template +_OutputIterator +__brick_uninitialized_move(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, + /*vector=*/std::true_type) noexcept +{ + using __ValueType = typename std::iterator_traits<_OutputIterator>::value_type; + using _ReferenceType1 = typename std::iterator_traits<_RandomAccessIterator>::reference; + using _ReferenceType2 = typename std::iterator_traits<_OutputIterator>::reference; + + return __unseq_backend::__simd_walk_2( + __first, __last - __first, __result, + [](_ReferenceType1 __x, _ReferenceType2 __y) { ::new (std::addressof(__y)) __ValueType(std::move(__x)); }); +} + +template +void +__brick_destroy(_Iterator __first, _Iterator __last, /*vector*/ std::false_type) noexcept +{ + using _ValueType = typename std::iterator_traits<_Iterator>::value_type; + + for (; __first != __last; ++__first) + __first->~_ValueType(); +} + +template +void +__brick_destroy(_RandomAccessIterator __first, _RandomAccessIterator __last, /*vector*/ std::true_type) noexcept +{ + using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type; + using _ReferenceType = typename std::iterator_traits<_RandomAccessIterator>::reference; + + __unseq_backend::__simd_walk_1(__first, __last - __first, [](_ReferenceType __x) { __x.~_ValueType(); }); +} + +//------------------------------------------------------------------------ +// uninitialized copy +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_uninitialized_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + /*vector=*/std::false_type) noexcept +{ + using _ValueType = typename std::iterator_traits<_OutputIterator>::value_type; + for (; __first != __last; ++__first, ++__result) + { + ::new (std::addressof(*__result)) _ValueType(*__first); + } + return __result; +} + +template +_OutputIterator +__brick_uninitialized_copy(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, + /*vector=*/std::true_type) noexcept +{ + using __ValueType = typename std::iterator_traits<_OutputIterator>::value_type; + using _ReferenceType1 = typename std::iterator_traits<_RandomAccessIterator>::reference; + using _ReferenceType2 = typename std::iterator_traits<_OutputIterator>::reference; + + return __unseq_backend::__simd_walk_2( + __first, __last - __first, __result, + [](_ReferenceType1 __x, _ReferenceType2 __y) { ::new (std::addressof(__y)) __ValueType(__x); }); +} + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_MEMORY_IMPL_H */ diff --git a/Components/Include/pstl/internal/numeric_fwd.h b/Components/Include/pstl/internal/numeric_fwd.h new file mode 100644 index 0000000..7f7845b --- /dev/null +++ b/Components/Include/pstl/internal/numeric_fwd.h @@ -0,0 +1,139 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_NUMERIC_FWD_H +#define _PSTL_NUMERIC_FWD_H + +#include +#include + +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +//------------------------------------------------------------------------ +// transform_reduce (version with two binary functions, according to draft N4659) +//------------------------------------------------------------------------ + +template +_Tp __brick_transform_reduce(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2, + /*__is_vector=*/std::true_type) noexcept; + +template +_Tp __brick_transform_reduce(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, _BinaryOperation1, + _BinaryOperation2, + /*__is_vector=*/std::false_type) noexcept; + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); + +//------------------------------------------------------------------------ +// transform_reduce (version with unary and binary functions) +//------------------------------------------------------------------------ + +template +_Tp __brick_transform_reduce(_RandomAccessIterator, _RandomAccessIterator, _Tp, _BinaryOperation, _UnaryOperation, + /*is_vector=*/std::true_type) noexcept; + +template +_Tp __brick_transform_reduce(_ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, _UnaryOperation, + /*is_vector=*/std::false_type) noexcept; + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, + _UnaryOperation) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Tp, _BinaryOperation, _UnaryOperation); + +//------------------------------------------------------------------------ +// transform_exclusive_scan +// +// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) +//------------------------------------------------------------------------ + +template +std::pair<_OutputIterator, _Tp> __brick_transform_scan(_ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryOperation, _Tp, _BinaryOperation, + /*Inclusive*/ std::false_type) noexcept; + +template +std::pair<_OutputIterator, _Tp> __brick_transform_scan(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, + _UnaryOperation, _Tp, _BinaryOperation, + /*Inclusive*/ std::true_type) noexcept; + +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, + _Tp, _BinaryOperation, _Inclusive) noexcept; + +template +typename std::enable_if::value, _OutputIterator>::type +__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&&, _RandomAccessIterator, + _RandomAccessIterator, _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); + +template +typename std::enable_if::value, _OutputIterator>::type +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); + +//------------------------------------------------------------------------ +// adjacent_difference +//------------------------------------------------------------------------ + +template +_OutputIterator __brick_adjacent_difference(_ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation, + /*is_vector*/ std::false_type) noexcept; + +template +_OutputIterator __brick_adjacent_difference(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, + _BinaryOperation, + /*is_vector*/ std::true_type) noexcept; + +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryOperation) noexcept; + +template +_OutputIterator +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, + _RandomAccessIterator, _OutputIterator, _BinaryOperation); + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_NUMERIC_FWD_H */ diff --git a/Components/Include/pstl/internal/numeric_impl.h b/Components/Include/pstl/internal/numeric_impl.h new file mode 100644 index 0000000..a0387ae --- /dev/null +++ b/Components/Include/pstl/internal/numeric_impl.h @@ -0,0 +1,383 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_NUMERIC_IMPL_H +#define _PSTL_NUMERIC_IMPL_H + +#include +#include +#include + +#include "parallel_backend.h" +#include "pstl_config.h" +#include "execution_impl.h" +#include "unseq_backend_simd.h" +#include "algorithm_fwd.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +//------------------------------------------------------------------------ +// transform_reduce (version with two binary functions, according to draft N4659) +//------------------------------------------------------------------------ + +template +_Tp +__brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2, + /*is_vector=*/std::false_type) noexcept +{ + return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2); +} + +template +_Tp +__brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, + _BinaryOperation2 __binary_op2, + /*is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; + return __unseq_backend::__simd_transform_reduce( + __last1 - __first1, __init, __binary_op1, + [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); +} + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, + _BinaryOperation2 __binary_op2) noexcept +{ + return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, + typename _Tag::__is_vector{}); +} + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __par_backend::__parallel_transform_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable + { return __binary_op2(*__i, *(__first2 + (__i - __first1))); }, + __init, + __binary_op1, // Combine + [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, + _Tp __init) -> _Tp + { + return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, + __binary_op1, __binary_op2, _IsVector{}); + }); + }); +} + +//------------------------------------------------------------------------ +// transform_reduce (version with unary and binary functions) +//------------------------------------------------------------------------ + +template +_Tp +__brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept +{ + return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op); +} + +template +_Tp +__brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op, + /*is_vector=*/std::true_type) noexcept +{ + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + return __unseq_backend::__simd_transform_reduce( + __last - __first, __init, __binary_op, + [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); +} + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept +{ + return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, + typename _Tag::__is_vector{}); +} + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + return __internal::__except_handler( + [&]() + { + return __par_backend::__parallel_transform_reduce( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, + [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { + return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{}); + }); + }); +} + +//------------------------------------------------------------------------ +// transform_exclusive_scan +// +// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...) +//------------------------------------------------------------------------ + +// Exclusive form +template +std::pair<_OutputIterator, _Tp> +__brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, + /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept +{ + for (; __first != __last; ++__first, ++__result) + { + *__result = __init; + _PSTL_PRAGMA_FORCEINLINE + __init = __binary_op(__init, __unary_op(*__first)); + } + return std::make_pair(__result, __init); +} + +// Inclusive form +template +std::pair<_OutputIterator, _Tp> +__brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, + _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, + /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept +{ + for (; __first != __last; ++__first, ++__result) + { + _PSTL_PRAGMA_FORCEINLINE + __init = __binary_op(__init, __unary_op(*__first)); + *__result = __init; + } + return std::make_pair(__result, __init); +} + +// type is arithmetic and binary operation is a user defined operation. +template +using is_arithmetic_udop = std::integral_constant::value && + !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; + +// [restriction] - T shall be DefaultConstructible. +// [violation] - default ctor of T shall set the identity value for binary_op. +template +typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type +__brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, + _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, + /*is_vector=*/std::true_type) noexcept +{ +#if defined(_PSTL_UDS_PRESENT) + return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op, + _Inclusive()); +#else + // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value + return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), + /*is_vector=*/std::false_type()); +#endif +} + +template +typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type +__brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result, + _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive, + /*is_vector=*/std::true_type) noexcept +{ + return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), + /*is_vector=*/std::false_type()); +} + +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, + _Inclusive) noexcept +{ + return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), + typename _Tag::__is_vector{}) + .first; +} + +template +typename std::enable_if::value, _OutputIterator>::type +__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_transform_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __last - __first, + [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, + __binary_op, + [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) + { + // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan. + return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, + __unary_op, + /*__is_vector*/ std::false_type()); + }, + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init) + { + return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, + __init, __binary_op, _Inclusive(), _IsVector{}) + .second; + }); + return __result + (__last - __first); + }); +} + +template +typename std::enable_if::value, _OutputIterator>::type +__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) +{ + using __backend_tag = typename decltype(__tag)::__backend_tag; + + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + _DifferenceType __n = __last - __first; + + if (__n <= 0) + { + return __result; + } + return __internal::__except_handler( + [&]() + { + __par_backend::__parallel_strict_scan( + __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, __init, + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) + { + return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, + __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{}) + .second; + }, + __binary_op, + [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) + { + return *(std::transform(__result + __i, __result + __i + __len, __result + __i, + [&__initial, &__binary_op](const _Tp& __x) + { + _PSTL_PRAGMA_FORCEINLINE + return __binary_op(__initial, __x); + }) - + 1); + }, + [](_Tp) {}); + return __result + (__last - __first); + }); +} + +//------------------------------------------------------------------------ +// adjacent_difference +//------------------------------------------------------------------------ + +template +_OutputIterator +__brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first, + _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept +{ + return std::adjacent_difference(__first, __last, __d_first, __op); +} + +template +_RandomAccessIterator2 +__brick_adjacent_difference(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __d_first, BinaryOperation __op, + /*is_vector=*/std::true_type) noexcept +{ + _PSTL_ASSERT(__first != __last); + + typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; + + auto __n = __last - __first; + *__d_first = *__first; + return __unseq_backend::__simd_walk_3( + __first + 1, __n - 1, __first, __d_first + 1, + [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); +} + +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __d_first, _BinaryOperation __op) noexcept +{ + return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{}); +} + +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __d_first, _BinaryOperation __op) +{ + _PSTL_ASSERT(__first != __last); + typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; + + using __backend_tag = typename decltype(__tag)::__backend_tag; + + *__d_first = *__first; + __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) + { + _RandomAccessIterator2 __d_b = __d_first + (__b - __first); + __internal::__brick_walk3( + __b, __e, __b + 1, __d_b + 1, + [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) + { __z = __op(__y, __x); }, + _IsVector{}); + }); + return __d_first + (__last - __first); +} + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_NUMERIC_IMPL_H */ diff --git a/Components/Include/pstl/internal/omp/parallel_for.h b/Components/Include/pstl/internal/omp/parallel_for.h new file mode 100644 index 0000000..0f841e5 --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_for.h @@ -0,0 +1,64 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_H +#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_H + +#include + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_for_body(_Index __first, _Index __last, _Fp __f) +{ + // initial partition of the iteration space into chunks + auto __policy = __omp_backend::__chunk_partitioner(__first, __last); + + // To avoid over-subscription we use taskloop for the nested parallelism + _PSTL_PRAGMA(omp taskloop untied mergeable) + for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) + { + __pstl::__omp_backend::__process_chunk(__policy, __first, __chunk, __f); + } +} + +//------------------------------------------------------------------------ +// Notation: +// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last) +//------------------------------------------------------------------------ + +template +void +__parallel_for(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + if (omp_in_parallel()) + { + // we don't create a nested parallel region in an existing parallel + // region: just create tasks + __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); + } + else + { + // in any case (nested or non-nested) one parallel region is created and + // only one thread creates a set of tasks + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_H diff --git a/Components/Include/pstl/internal/omp/parallel_for_each.h b/Components/Include/pstl/internal/omp/parallel_for_each.h new file mode 100644 index 0000000..b9bfb05 --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_for_each.h @@ -0,0 +1,59 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H +#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp __f) +{ + using DifferenceType = typename std::iterator_traits<_ForwardIterator>::difference_type; + // TODO: Think of an approach to remove the std::distance call + auto __size = std::distance(__first, __last); + + _PSTL_PRAGMA(omp taskloop untied mergeable) + for (DifferenceType __index = 0; __index < __size; ++__index) + { + // TODO: Think of an approach to remove the increment here each time. + auto __iter = std::next(__first, __index); + __f(*__iter); + } +} + +template +void +__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) +{ + if (omp_in_parallel()) + { + // we don't create a nested parallel region in an existing parallel + // region: just create tasks + __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); + } + else + { + // in any case (nested or non-nested) one parallel region is created and + // only one thread creates a set of tasks + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H diff --git a/Components/Include/pstl/internal/omp/parallel_invoke.h b/Components/Include/pstl/internal/omp/parallel_invoke.h new file mode 100644 index 0000000..045ccbe --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_invoke.h @@ -0,0 +1,50 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H +#define _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_invoke_body(_F1&& __f1, _F2&& __f2) +{ + _PSTL_PRAGMA(omp taskgroup) + { + _PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F1>(__f1)(); } + _PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F2>(__f2)(); } + } +} + +template +void +__parallel_invoke(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + if (omp_in_parallel()) + { + __pstl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + __pstl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H diff --git a/Components/Include/pstl/internal/omp/parallel_merge.h b/Components/Include/pstl/internal/omp/parallel_merge.h new file mode 100644 index 0000000..e6f82c5 --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_merge.h @@ -0,0 +1,98 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H +#define _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +{ + + if (__size_x + __size_y <= __omp_backend::__default_chunk_size) + { + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + return; + } + + _RandomAccessIterator1 __xm; + _RandomAccessIterator2 __ym; + + if (__size_x < __size_y) + { + __ym = __ys + (__size_y / 2); + __xm = std::upper_bound(__xs, __xe, *__ym, __comp); + } + else + { + __xm = __xs + (__size_x / 2); + __ym = std::lower_bound(__ys, __ye, *__xm, __comp); + } + + auto __zm = __zs + (__xm - __xs) + (__ym - __ys); + + _PSTL_PRAGMA(omp task untied mergeable default(none) + firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge)) + __pstl::__omp_backend::__parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, + __leaf_merge); + + _PSTL_PRAGMA(omp task untied mergeable default(none) + firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge)) + __pstl::__omp_backend::__parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, + __leaf_merge); + + _PSTL_PRAGMA(omp taskwait) +} + +template +void +__parallel_merge(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) + +{ + std::size_t __size_x = __xe - __xs; + std::size_t __size_y = __ye - __ys; + + /* + * Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the + * larger range as the search range. + */ + + if (omp_in_parallel()) + { + __pstl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, + __leaf_merge); + } + else + { + _PSTL_PRAGMA(omp parallel) + { + _PSTL_PRAGMA(omp single nowait) + __pstl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, + __leaf_merge); + } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H diff --git a/Components/Include/pstl/internal/omp/parallel_reduce.h b/Components/Include/pstl/internal/omp/parallel_reduce.h new file mode 100644 index 0000000..841d48f --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_reduce.h @@ -0,0 +1,73 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H +#define _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +_Value +__parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, + _RealBody __real_body, _Reduction __reduce) +{ + if (__should_run_serial(__first, __last)) + { + return __real_body(__first, __last, __identity); + } + + auto __middle = __first + ((__last - __first) / 2); + _Value __v1(__identity), __v2(__identity); + __parallel_invoke_body( + [&]() { __v1 = __parallel_reduce_body(__first, __middle, __identity, __real_body, __reduce); }, + [&]() { __v2 = __parallel_reduce_body(__middle, __last, __identity, __real_body, __reduce); }); + + return __reduce(__v1, __v2); +} + +//------------------------------------------------------------------------ +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// c(x,y) combines values x and y that were the result of r +//------------------------------------------------------------------------ + +template +_Value +__parallel_reduce(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Value __identity, _RealBody __real_body, _Reduction __reduction) +{ + // We don't create a nested parallel region in an existing parallel region: + // just create tasks. + if (omp_in_parallel()) + { + return __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); + } + + // In any case (nested or non-nested) one parallel region is created and only + // one thread creates a set of tasks. + _Value __res = __identity; + + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __res = __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); + } + + return __res; +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H diff --git a/Components/Include/pstl/internal/omp/parallel_scan.h b/Components/Include/pstl/internal/omp/parallel_scan.h new file mode 100644 index 0000000..f3eb967 --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_scan.h @@ -0,0 +1,136 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H +#define _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H + +#include "parallel_invoke.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +_Index +__split(_Index __m) +{ + _Index __k = 1; + while (2 * __k < __m) + __k *= 2; + return __k; +} + +template +void +__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine) +{ + if (__m == 1) + __r[0] = __reduce(__i * __tilesize, __lastsize); + else + { + _Index __k = __split(__m); + __omp_backend::__parallel_invoke_body( + [=] { __omp_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, + [=] { + __omp_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine); + }); + if (__m == 2 * __k) + __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]); + } +} + +template +void +__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine, + _Sp __scan) +{ + if (__m == 1) + __scan(__i * __tilesize, __lastsize, __initial); + else + { + const _Index __k = __split(__m); + __omp_backend::__parallel_invoke_body( + [=] { __omp_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, + // Assumes that __combine never throws. + // TODO: Consider adding a requirement for user functors to be constant. + [=, &__combine] + { + __omp_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, + __combine(__initial, __r[__k - 1]), __combine, __scan); + }); + } +} + +template +void +__parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + _Index __p = omp_get_num_threads(); + const _Index __slack = 4; + _Index __tilesize = (__n - 1) / (__slack * __p) + 1; + _Index __m = (__n - 1) / __tilesize; + __buffer<_Tp> __buf(__m + 1); + _Tp* __r = __buf.get(); + + __omp_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine); + + std::size_t __k = __m + 1; + _Tp __t = __r[__k - 1]; + while ((__k &= __k - 1)) + { + __t = __combine(__r[__k - 1], __t); + } + + __apex(__combine(__initial, __t)); + __omp_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, + __combine, __scan); +} + +template +void +__parallel_strict_scan(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + if (__n <= __default_chunk_size) + { + _Tp __sum = __initial; + if (__n) + { + __sum = __combine(__sum, __reduce(_Index(0), __n)); + } + __apex(__sum); + if (__n) + { + __scan(_Index(0), __n, __initial); + } + return; + } + + if (omp_in_parallel()) + { + __pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, + __scan, __apex); + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, + __scan, __apex); + } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H diff --git a/Components/Include/pstl/internal/omp/parallel_stable_partial_sort.h b/Components/Include/pstl/internal/omp/parallel_stable_partial_sort.h new file mode 100644 index 0000000..06cd55b --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_stable_partial_sort.h @@ -0,0 +1,33 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H +#define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_stable_partial_sort(__pstl::__internal::__openmp_backend_tag, _RandomAccessIterator __xs, + _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + std::size_t /* __nsort */) +{ + // TODO: "Parallel partial sort needs to be implemented."); + __leaf_sort(__xs, __xe, __comp); +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H diff --git a/Components/Include/pstl/internal/omp/parallel_stable_sort.h b/Components/Include/pstl/internal/omp/parallel_stable_sort.h new file mode 100644 index 0000000..e4d0676 --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_stable_sort.h @@ -0,0 +1,160 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H +#define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H + +#include "util.h" +#include "parallel_merge.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +namespace __sort_details +{ +struct __move_value +{ + template + void + operator()(_Iterator __x, _OutputIterator __z) const + { + *__z = std::move(*__x); + } +}; + +template +_OutputIterator +__parallel_move_range(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first) +{ + std::size_t __size = __last1 - __first1; + + // Perform serial moving of small chunks + + if (__size <= __default_chunk_size) + { + return std::move(__first1, __last1, __d_first); + } + + // Perform parallel moving of larger chunks + auto __policy = __pstl::__omp_backend::__chunk_partitioner(__first1, __last1); + + _PSTL_PRAGMA(omp taskloop) + for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) + { + __pstl::__omp_backend::__process_chunk(__policy, __first1, __chunk, + [&](auto __chunk_first, auto __chunk_last) + { + auto __chunk_offset = __chunk_first - __first1; + auto __output_it = __d_first + __chunk_offset; + std::move(__chunk_first, __chunk_last, __output_it); + }); + } + + return __d_first + __size; +} + +struct __move_range +{ + template + _OutputIterator + operator()(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first) const + { + return __pstl::__omp_backend::__sort_details::__parallel_move_range(__first1, __last1, __d_first); + } +}; +} // namespace __sort_details + +template +void +__parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, + _LeafSort __leaf_sort) +{ + using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type; + using _VecType = typename std::vector<_ValueType>; + using _OutputIterator = typename _VecType::iterator; + using _MoveValue = typename __omp_backend::__sort_details::__move_value; + using _MoveRange = __omp_backend::__sort_details::__move_range; + + if (__should_run_serial(__xs, __xe)) + { + __leaf_sort(__xs, __xe, __comp); + } + else + { + std::size_t __size = __xe - __xs; + auto __mid = __xs + (__size / 2); + __pstl::__omp_backend::__parallel_invoke_body( + [&]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); }, + [&]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); }); + + // Perform a parallel merge of the sorted ranges into __output_data. + _VecType __output_data(__size); + _MoveValue __move_value; + _MoveRange __move_range; + __utils::__serial_move_merge __merge(__size); + __pstl::__omp_backend::__parallel_merge_body( + __mid - __xs, __xe - __mid, __xs, __mid, __mid, __xe, __output_data.begin(), __comp, + [&__merge, &__move_value, &__move_range](_RandomAccessIterator __as, _RandomAccessIterator __ae, + _RandomAccessIterator __bs, _RandomAccessIterator __be, + _OutputIterator __cs, _Compare __comp) + { __merge(__as, __ae, __bs, __be, __cs, __comp, __move_value, __move_value, __move_range, __move_range); }); + + // Move the values from __output_data back in the original source range. + __pstl::__omp_backend::__sort_details::__parallel_move_range(__output_data.begin(), __output_data.end(), __xs); + } +} + +template +void +__parallel_stable_sort(__pstl::__internal::__openmp_backend_tag __tag, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + std::size_t __nsort = 0) +{ + auto __count = static_cast(__xe - __xs); + if (__count <= __default_chunk_size || __nsort < __count) + { + __leaf_sort(__xs, __xe, __comp); + return; + } + + // TODO: the partial sort implementation should + // be shared with the other backends. + + if (omp_in_parallel()) + { + if (__count <= __nsort) + { + __pstl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + __pstl::__omp_backend::__parallel_stable_partial_sort(__tag, __xs, __xe, __comp, __leaf_sort, __nsort); + } + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + if (__count <= __nsort) + { + __pstl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + __pstl::__omp_backend::__parallel_stable_partial_sort(__tag, __xs, __xe, __comp, __leaf_sort, __nsort); + } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H diff --git a/Components/Include/pstl/internal/omp/parallel_transform_reduce.h b/Components/Include/pstl/internal/omp/parallel_transform_reduce.h new file mode 100644 index 0000000..1d4cc0a --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_transform_reduce.h @@ -0,0 +1,113 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H +#define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +//------------------------------------------------------------------------ +// parallel_transform_reduce +// +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// u(i) returns f(i,i+1,identity) for a hypothetical left identity element +// of r c(x,y) combines values x and y that were the result of r or u +//------------------------------------------------------------------------ + +template +auto +__transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init, + _Combiner __combiner, _Reduction __reduction) +{ + const std::size_t __num_threads = omp_get_num_threads(); + const std::size_t __size = __last - __first; + + // Initial partition of the iteration space into chunks. If the range is too small, + // this will result in a nonsense policy, so we check on the size as well below. + auto __policy = __omp_backend::__chunk_partitioner(__first + __num_threads, __last); + + if (__size <= __num_threads || __policy.__n_chunks < 2) + { + return __reduction(__first, __last, __init); + } + + // Here, we cannot use OpenMP UDR because we must store the init value in + // the combiner and it will be used several times. Although there should be + // the only one; we manually generate the identity elements for each thread. + std::vector<_Value> __accums; + __accums.reserve(__num_threads); + + // initialize accumulators for all threads + for (std::size_t __i = 0; __i < __num_threads; ++__i) + { + __accums.emplace_back(__unary_op(__first + __i)); + } + + // main loop + _PSTL_PRAGMA(omp taskloop shared(__accums)) + for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) + { + __pstl::__omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk, + [&](auto __chunk_first, auto __chunk_last) + { + auto __thread_num = omp_get_thread_num(); + __accums[__thread_num] = + __reduction(__chunk_first, __chunk_last, __accums[__thread_num]); + }); + } + + // combine by accumulators + for (std::size_t __i = 0; __i < __num_threads; ++__i) + { + __init = __combiner(__init, __accums[__i]); + } + + return __init; +} + +template +_Value +__parallel_transform_reduce(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init, _Combiner __combiner, + _Reduction __reduction) +{ + _Value __result = __init; + if (omp_in_parallel()) + { + // We don't create a nested parallel region in an existing parallel + // region: just create tasks + __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner, + __reduction); + } + else + { + // Create a parallel region, and a single thread will create tasks + // for the region. + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner, + __reduction); + } + } + + return __result; +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H diff --git a/Components/Include/pstl/internal/omp/parallel_transform_scan.h b/Components/Include/pstl/internal/omp/parallel_transform_scan.h new file mode 100644 index 0000000..f836289 --- /dev/null +++ b/Components/Include/pstl/internal/omp/parallel_transform_scan.h @@ -0,0 +1,32 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H +#define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +_Tp +__parallel_transform_scan(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _Index __n, _Up /* __u */, + _Tp __init, _Cp /* __combine */, _Rp /* __brick_reduce */, _Sp __scan) +{ + // TODO: parallelize this function. + return __scan(_Index(0), __n, __init); +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H diff --git a/Components/Include/pstl/internal/omp/util.h b/Components/Include/pstl/internal/omp/util.h new file mode 100644 index 0000000..c88d980 --- /dev/null +++ b/Components/Include/pstl/internal/omp/util.h @@ -0,0 +1,173 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_OMP_UTIL_H +#define _PSTL_INTERNAL_OMP_UTIL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../parallel_backend_utils.h" +#include "../unseq_backend_simd.h" +#include "../utils.h" + +// Portability "#pragma" definition +#ifdef _MSC_VER +# define _PSTL_PRAGMA(x) __pragma(x) +#else +# define _PSTL_PRAGMA(x) _Pragma(# x) +#endif + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __omp_backend +{ + +//------------------------------------------------------------------------ +// use to cancel execution +//------------------------------------------------------------------------ +inline void +__cancel_execution() +{ + // TODO: Figure out how to make cancelation work. +} + +//------------------------------------------------------------------------ +// raw buffer +//------------------------------------------------------------------------ + +template +class __buffer +{ + std::allocator<_Tp> __allocator_; + _Tp* __ptr_; + const std::size_t __buf_size_; + __buffer(const __buffer&) = delete; + void + operator=(const __buffer&) = delete; + + public: + __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + + operator bool() const { return __ptr_ != nullptr; } + + _Tp* + get() const + { + return __ptr_; + } + ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); } +}; + +// Preliminary size of each chunk: requires further discussion +inline constexpr std::size_t __default_chunk_size = 2048; + +// Convenience function to determine when we should run serial. +template ::value, bool> = true> +constexpr auto +__should_run_serial(_Iterator __first, _Iterator __last) -> bool +{ + using _difference_type = typename std::iterator_traits<_Iterator>::difference_type; + auto __size = std::distance(__first, __last); + return __size <= static_cast<_difference_type>(__default_chunk_size); +} + +template ::value, bool> = true> +constexpr auto +__should_run_serial(_Index __first, _Index __last) -> bool +{ + using _difference_type = _Index; + auto __size = __last - __first; + return __size <= static_cast<_difference_type>(__default_chunk_size); +} + +struct __chunk_metrics +{ + std::size_t __n_chunks; + std::size_t __chunk_size; + std::size_t __first_chunk_size; +}; + +// The iteration space partitioner according to __requested_chunk_size +template +auto +__chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics +{ + /* + * This algorithm improves distribution of elements in chunks by avoiding + * small tail chunks. The leftover elements that do not fit neatly into + * the chunk size are redistributed to early chunks. This improves + * utilization of the processor's prefetch and reduces the number of + * tasks needed by 1. + */ + + const _Size __n = __last - __first; + _Size __n_chunks = 0; + _Size __chunk_size = 0; + _Size __first_chunk_size = 0; + if (__n < __requested_chunk_size) + { + __chunk_size = __n; + __first_chunk_size = __n; + __n_chunks = 1; + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; + } + + __n_chunks = (__n / __requested_chunk_size) + 1; + __chunk_size = __n / __n_chunks; + __first_chunk_size = __chunk_size; + const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size); + + if (__n_leftover_items == __chunk_size) + { + __n_chunks += 1; + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; + } + else if (__n_leftover_items == 0) + { + __first_chunk_size = __chunk_size; + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; + } + + const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks; + const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks); + + __chunk_size += __n_extra_items_per_chunk; + __first_chunk_size = __chunk_size + __n_final_leftover_items; + + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; +} + +template +void +__process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f) +{ + auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size; + auto __index = __chunk_index == 0 ? 0 + : (__chunk_index * __metrics.__chunk_size) + + (__metrics.__first_chunk_size - __metrics.__chunk_size); + auto __first = __base + __index; + auto __last = __first + __this_chunk_size; + __f(__first, __last); +} + +} // namespace __omp_backend +} // namespace __pstl + +#endif // _PSTL_INTERNAL_OMP_UTIL_H diff --git a/Components/Include/pstl/internal/parallel_backend.h b/Components/Include/pstl/internal/parallel_backend.h new file mode 100644 index 0000000..4da871b --- /dev/null +++ b/Components/Include/pstl/internal/parallel_backend.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_H +#define _PSTL_PARALLEL_BACKEND_H + +#include "pstl_config.h" + +#if defined(_PSTL_PAR_BACKEND_SERIAL) +# include "parallel_backend_serial.h" +namespace __pstl +{ +namespace __par_backend = __serial_backend; +} +#elif defined(_PSTL_PAR_BACKEND_TBB) +# include "parallel_backend_tbb.h" +namespace __pstl +{ +namespace __par_backend = __tbb_backend; +} +#elif defined(_PSTL_PAR_BACKEND_OPENMP) +# include "parallel_backend_omp.h" +namespace __pstl +{ +namespace __par_backend = __omp_backend; +} +#else +_PSTL_PRAGMA_MESSAGE("Parallel backend was not specified"); +#endif + +#endif /* _PSTL_PARALLEL_BACKEND_H */ diff --git a/Components/Include/pstl/internal/parallel_backend_omp.h b/Components/Include/pstl/internal/parallel_backend_omp.h new file mode 100644 index 0000000..7398cfe --- /dev/null +++ b/Components/Include/pstl/internal/parallel_backend_omp.h @@ -0,0 +1,58 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_OMP_H +#define _PSTL_PARALLEL_BACKEND_OMP_H + +//------------------------------------------------------------------------ +// parallel_invoke +//------------------------------------------------------------------------ + +#include "./omp/parallel_invoke.h" + +//------------------------------------------------------------------------ +// parallel_for +//------------------------------------------------------------------------ + +#include "./omp/parallel_for.h" + +//------------------------------------------------------------------------ +// parallel_for_each +//------------------------------------------------------------------------ + +#include "./omp/parallel_for_each.h" + +//------------------------------------------------------------------------ +// parallel_reduce +//------------------------------------------------------------------------ + +#include "./omp/parallel_reduce.h" +#include "./omp/parallel_transform_reduce.h" + +//------------------------------------------------------------------------ +// parallel_scan +//------------------------------------------------------------------------ + +#include "./omp/parallel_scan.h" +#include "./omp/parallel_transform_scan.h" + +//------------------------------------------------------------------------ +// parallel_stable_sort +//------------------------------------------------------------------------ + +#include "./omp/parallel_stable_partial_sort.h" +#include "./omp/parallel_stable_sort.h" + +//------------------------------------------------------------------------ +// parallel_merge +//------------------------------------------------------------------------ +#include "./omp/parallel_merge.h" + +#endif //_PSTL_PARALLEL_BACKEND_OMP_H diff --git a/Components/Include/pstl/internal/parallel_backend_serial.h b/Components/Include/pstl/internal/parallel_backend_serial.h new file mode 100644 index 0000000..ad2c5fc --- /dev/null +++ b/Components/Include/pstl/internal/parallel_backend_serial.h @@ -0,0 +1,137 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_SERIAL_H +#define _PSTL_PARALLEL_BACKEND_SERIAL_H + +#include +#include +#include +#include +#include + +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __serial_backend +{ + +template +class __buffer +{ + std::allocator<_Tp> __allocator_; + _Tp* __ptr_; + const std::size_t __buf_size_; + __buffer(const __buffer&) = delete; + void + operator=(const __buffer&) = delete; + + public: + __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + + operator bool() const { return __ptr_ != nullptr; } + _Tp* + get() const + { + return __ptr_; + } + ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); } +}; + +inline void +__cancel_execution() +{ +} + +template +void +__parallel_for(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + __f(__first, __last); +} + +template +_Value +__parallel_reduce(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction&) +{ + if (__first == __last) + { + return __identity; + } + else + { + return __real_body(__first, __last, __identity); + } +} + +template +_Tp +__parallel_transform_reduce(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) +{ + return __reduce(__first, __last, __init); +} + +template +void +__parallel_strict_scan(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + _Tp __sum = __initial; + if (__n) + __sum = __combine(__sum, __reduce(_Index(0), __n)); + __apex(__sum); + if (__n) + __scan(_Index(0), __n, __initial); +} + +template +_Tp +__parallel_transform_scan(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _UnaryOp, + _Tp __init, _BinaryOp, _Reduce, _Scan __scan) +{ + return __scan(_Index(0), __n, __init); +} + +template +void +__parallel_stable_sort(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort, std::size_t = 0) +{ + __leaf_sort(__first, __last, __comp); +} + +template +void +__parallel_merge(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __outit, _Compare __comp, _LeafMerge __leaf_merge) +{ + __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); +} + +template +void +__parallel_invoke(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + std::forward<_F1>(__f1)(); + std::forward<_F2>(__f2)(); +} + +} // namespace __serial_backend +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_PARALLEL_BACKEND_SERIAL_H */ diff --git a/Components/Include/pstl/internal/parallel_backend_tbb.h b/Components/Include/pstl/internal/parallel_backend_tbb.h new file mode 100644 index 0000000..e336f69 --- /dev/null +++ b/Components/Include/pstl/internal/parallel_backend_tbb.h @@ -0,0 +1,1296 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_TBB_H +#define _PSTL_PARALLEL_BACKEND_TBB_H + +#include +#include + +#include "pstl_config.h" +#include "parallel_backend_utils.h" + +// Bring in minimal required subset of Intel TBB +#include +#include +#include +#include +#include +#include +#include +#include + +#if TBB_INTERFACE_VERSION < 10000 +# error Intel(R) Threading Building Blocks 2018 is required; older versions are not supported. +#endif + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __tbb_backend +{ + +//! Raw memory buffer with automatic freeing and no exceptions. +/** Some of our algorithms need to start with raw memory buffer, +not an initialize array, because initialization/destruction +would make the span be at least O(N). */ +// tbb::allocator can improve performance in some cases. +template +class __buffer +{ + tbb::tbb_allocator<_Tp> _M_allocator; + _Tp* _M_ptr; + const std::size_t _M_buf_size; + __buffer(const __buffer&) = delete; + void + operator=(const __buffer&) = delete; + + public: + //! Try to obtain buffer of given size to store objects of _Tp type + __buffer(std::size_t n) : _M_allocator(), _M_ptr(_M_allocator.allocate(n)), _M_buf_size(n) {} + //! True if buffer was successfully obtained, zero otherwise. + operator bool() const { return _M_ptr != NULL; } + //! Return pointer to buffer, or NULL if buffer could not be obtained. + _Tp* + get() const + { + return _M_ptr; + } + //! Destroy buffer + ~__buffer() { _M_allocator.deallocate(_M_ptr, _M_buf_size); } +}; + +// Wrapper for tbb::task +inline void +__cancel_execution() +{ +#if TBB_INTERFACE_VERSION <= 12000 + tbb::task::self().group()->cancel_group_execution(); +#else + tbb::task::current_context()->cancel_group_execution(); +#endif +} + +//------------------------------------------------------------------------ +// parallel_for +//------------------------------------------------------------------------ + +template +class __parallel_for_body +{ + public: + __parallel_for_body(const _RealBody& __body) : _M_body(__body) {} + __parallel_for_body(const __parallel_for_body& __body) : _M_body(__body._M_body) {} + void + operator()(const tbb::blocked_range<_Index>& __range) const + { + _M_body(__range.begin(), __range.end()); + } + + private: + _RealBody _M_body; +}; + +//! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) +// wrapper over tbb::parallel_for +template +void +__parallel_for(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + tbb::this_task_arena::isolate([=]() { + tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); + }); +} + +//! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) +// wrapper over tbb::parallel_reduce +template +_Value +__parallel_reduce(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction& __reduction) +{ + return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value { + return tbb::parallel_reduce( + tbb::blocked_range<_Index>(__first, __last), __identity, + [__real_body](const tbb::blocked_range<_Index>& __r, const _Value& __value) -> _Value { + return __real_body(__r.begin(), __r.end(), __value); + }, + __reduction); + }); +} + +//------------------------------------------------------------------------ +// parallel_transform_reduce +// +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// u(i) returns f(i,i+1,identity) for a hypothetical left identity element of r +// c(x,y) combines values x and y that were the result of r or u +//------------------------------------------------------------------------ + +template +struct __par_trans_red_body +{ + alignas(_Tp) char _M_sum_storage[sizeof(_Tp)]; // Holds generalized non-commutative sum when has_sum==true + _Rp _M_brick_reduce; // Most likely to have non-empty layout + _Up _M_u; + _Cp _M_combine; + bool _M_has_sum; // Put last to minimize size of class + _Tp& + sum() + { + __TBB_ASSERT(_M_has_sum, "sum expected"); + return *(_Tp*)_M_sum_storage; + } + __par_trans_red_body(_Up __u, _Tp __init, _Cp __c, _Rp __r) + : _M_brick_reduce(__r), _M_u(__u), _M_combine(__c), _M_has_sum(true) + { + new (_M_sum_storage) _Tp(__init); + } + + __par_trans_red_body(__par_trans_red_body& __left, tbb::split) + : _M_brick_reduce(__left._M_brick_reduce), _M_u(__left._M_u), _M_combine(__left._M_combine), _M_has_sum(false) + { + } + + ~__par_trans_red_body() + { + // 17.6.5.12 tells us to not worry about catching exceptions from destructors. + if (_M_has_sum) + sum().~_Tp(); + } + + void + join(__par_trans_red_body& __rhs) + { + sum() = _M_combine(sum(), __rhs.sum()); + } + + void + operator()(const tbb::blocked_range<_Index>& __range) + { + _Index __i = __range.begin(); + _Index __j = __range.end(); + if (!_M_has_sum) + { + __TBB_ASSERT(__range.size() > 1, "there should be at least 2 elements"); + new (&_M_sum_storage) + _Tp(_M_combine(_M_u(__i), _M_u(__i + 1))); // The condition i+1 < j is provided by the grain size of 3 + _M_has_sum = true; + std::advance(__i, 2); + if (__i == __j) + return; + } + sum() = _M_brick_reduce(__i, __j, sum()); + } +}; + +template +_Tp +__parallel_transform_reduce(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce) +{ + __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); + // The grain size of 3 is used in order to provide mininum 2 elements for each body + tbb::this_task_arena::isolate( + [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); }); + return __body.sum(); +} + +//------------------------------------------------------------------------ +// parallel_scan +//------------------------------------------------------------------------ + +template +class __trans_scan_body +{ + alignas(_Tp) char _M_sum_storage[sizeof(_Tp)]; // Holds generalized non-commutative sum when has_sum==true + _Rp _M_brick_reduce; // Most likely to have non-empty layout + _Up _M_u; + _Cp _M_combine; + _Sp _M_scan; + bool _M_has_sum; // Put last to minimize size of class + public: + __trans_scan_body(_Up __u, _Tp __init, _Cp __combine, _Rp __reduce, _Sp __scan) + : _M_brick_reduce(__reduce), _M_u(__u), _M_combine(__combine), _M_scan(__scan), _M_has_sum(true) + { + new (_M_sum_storage) _Tp(__init); + } + + __trans_scan_body(__trans_scan_body& __b, tbb::split) + : _M_brick_reduce(__b._M_brick_reduce), _M_u(__b._M_u), _M_combine(__b._M_combine), _M_scan(__b._M_scan), + _M_has_sum(false) + { + } + + ~__trans_scan_body() + { + // 17.6.5.12 tells us to not worry about catching exceptions from destructors. + if (_M_has_sum) + sum().~_Tp(); + } + + _Tp& + sum() const + { + __TBB_ASSERT(_M_has_sum, "sum expected"); + return *const_cast<_Tp*>(reinterpret_cast<_Tp const*>(_M_sum_storage)); + } + + void + operator()(const tbb::blocked_range<_Index>& __range, tbb::pre_scan_tag) + { + _Index __i = __range.begin(); + _Index __j = __range.end(); + if (!_M_has_sum) + { + new (&_M_sum_storage) _Tp(_M_u(__i)); + _M_has_sum = true; + ++__i; + if (__i == __j) + return; + } + sum() = _M_brick_reduce(__i, __j, sum()); + } + + void + operator()(const tbb::blocked_range<_Index>& __range, tbb::final_scan_tag) + { + sum() = _M_scan(__range.begin(), __range.end(), sum()); + } + + void + reverse_join(__trans_scan_body& __a) + { + if (_M_has_sum) + { + sum() = _M_combine(__a.sum(), sum()); + } + else + { + new (&_M_sum_storage) _Tp(__a.sum()); + _M_has_sum = true; + } + } + + void + assign(__trans_scan_body& __b) + { + sum() = __b.sum(); + } +}; + +template +_Index +__split(_Index __m) +{ + _Index __k = 1; + while (2 * __k < __m) + __k *= 2; + return __k; +} + +//------------------------------------------------------------------------ +// __parallel_strict_scan +//------------------------------------------------------------------------ + +template +void +__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine) +{ + if (__m == 1) + __r[0] = __reduce(__i * __tilesize, __lastsize); + else + { + _Index __k = __split(__m); + tbb::parallel_invoke( + [=] { __tbb_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, + [=] { + __tbb_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine); + }); + if (__m == 2 * __k) + __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]); + } +} + +template +void +__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine, + _Sp __scan) +{ + if (__m == 1) + __scan(__i * __tilesize, __lastsize, __initial); + else + { + const _Index __k = __split(__m); + tbb::parallel_invoke( + [=] { __tbb_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, + // Assumes that __combine never throws. + //TODO: Consider adding a requirement for user functors to be constant. + [=, &__combine] { + __tbb_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, + __combine(__initial, __r[__k - 1]), __combine, __scan); + }); + } +} + +// Adapted from Intel(R) Cilk(TM) version from cilkpub. +// Let i:len denote a counted interval of length n starting at i. s denotes a generalized-sum value. +// Expected actions of the functors are: +// reduce(i,len) -> s -- return reduction value of i:len. +// combine(s1,s2) -> s -- return merged sum +// apex(s) -- do any processing necessary between reduce and scan. +// scan(i,len,initial) -- perform scan over i:len starting with initial. +// The initial range 0:n is partitioned into consecutive subranges. +// reduce and scan are each called exactly once per subrange. +// Thus callers can rely upon side effects in reduce. +// combine must not throw an exception. +// apex is called exactly once, after all calls to reduce and before all calls to scan. +// For example, it's useful for allocating a __buffer used by scan but whose size is the sum of all reduction values. +// T must have a trivial constructor and destructor. +template +void +__parallel_strict_scan(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + tbb::this_task_arena::isolate([=, &__combine]() { + if (__n > 1) + { + _Index __p = tbb::this_task_arena::max_concurrency(); + const _Index __slack = 4; + _Index __tilesize = (__n - 1) / (__slack * __p) + 1; + _Index __m = (__n - 1) / __tilesize; + __buffer<_Tp> __buf(__m + 1); + _Tp* __r = __buf.get(); + __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, + __combine); + + // When __apex is a no-op and __combine has no side effects, a good optimizer + // should be able to eliminate all code between here and __apex. + // Alternatively, provide a default value for __apex that can be + // recognized by metaprogramming that conditionlly executes the following. + size_t __k = __m + 1; + _Tp __t = __r[__k - 1]; + while ((__k &= __k - 1)) + __t = __combine(__r[__k - 1], __t); + __apex(__combine(__initial, __t)); + __tbb_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, + __combine, __scan); + return; + } + // Fewer than 2 elements in sequence, or out of memory. Handle has single block. + _Tp __sum = __initial; + if (__n) + __sum = __combine(__sum, __reduce(_Index(0), __n)); + __apex(__sum); + if (__n) + __scan(_Index(0), __n, __initial); + }); +} + +template +_Tp +__parallel_transform_scan(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, + _Cp __combine, _Rp __brick_reduce, _Sp __scan) +{ + __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); + auto __range = tbb::blocked_range<_Index>(0, __n); + tbb::this_task_arena::isolate([__range, &__body]() { tbb::parallel_scan(__range, __body); }); + return __body.sum(); +} + +//------------------------------------------------------------------------ +// parallel_stable_sort +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// stable_sort utilities +// +// These are used by parallel implementations but do not depend on them. +//------------------------------------------------------------------------ +#define _PSTL_MERGE_CUT_OFF 2000 + +template +class __func_task; +template +class __root_task; + +#if TBB_INTERFACE_VERSION <= 12000 +class __task : public tbb::task +{ + public: + template + __task* + make_continuation(_Fn&& __f) + { + return new (allocate_continuation()) __func_task::type>(std::forward<_Fn>(__f)); + } + + template + __task* + make_child_of(__task* parent, _Fn&& __f) + { + return new (parent->allocate_child()) __func_task::type>(std::forward<_Fn>(__f)); + } + + template + __task* + make_additional_child_of(tbb::task* parent, _Fn&& __f) + { + return new (tbb::task::allocate_additional_child_of(*parent)) + __func_task::type>(std::forward<_Fn>(__f)); + } + + inline void + recycle_as_continuation() + { + tbb::task::recycle_as_continuation(); + } + + inline void + recycle_as_child_of(__task* parent) + { + tbb::task::recycle_as_child_of(*parent); + } + + inline void + spawn(__task* __t) + { + tbb::task::spawn(*__t); + } + + template + static inline void + spawn_root_and_wait(__root_task<_Fn>& __root) + { + tbb::task::spawn_root_and_wait(*__root._M_task); + } +}; + +template +class __func_task : public __task +{ + _Func _M_func; + + tbb::task* + execute() + { + return _M_func(this); + }; + + public: + template + __func_task(_Fn&& __f) : _M_func{std::forward<_Fn>(__f)} + { + } + + _Func& + body() + { + return _M_func; + } +}; + +template +class __root_task +{ + tbb::task* _M_task; + + public: + template + __root_task(Args&&... args) + : _M_task{new (tbb::task::allocate_root()) __func_task<_Func>{_Func(std::forward(args)...)}} + { + } + + friend class __task; + friend class __func_task<_Func>; +}; + +#else // TBB_INTERFACE_VERSION <= 12000 +class __task : public tbb::detail::d1::task +{ + protected: + tbb::detail::d1::small_object_allocator _M_allocator{}; + tbb::detail::d1::execution_data* _M_execute_data{}; + __task* _M_parent{}; + std::atomic _M_refcount{}; + bool _M_recycle{}; + + template + __task* + allocate_func_task(_Fn&& __f) + { + _PSTL_ASSERT(_M_execute_data != nullptr); + tbb::detail::d1::small_object_allocator __alloc{}; + auto __t = + __alloc.new_object<__func_task::type>>(*_M_execute_data, std::forward<_Fn>(__f)); + __t->_M_allocator = __alloc; + return __t; + } + + public: + __task* + parent() + { + return _M_parent; + } + + void + set_ref_count(int __n) + { + _M_refcount.store(__n, std::memory_order_release); + } + + template + __task* + make_continuation(_Fn&& __f) + { + auto __t = allocate_func_task(std::forward<_Fn&&>(__f)); + __t->_M_parent = _M_parent; + _M_parent = nullptr; + return __t; + } + + template + __task* + make_child_of(__task* __parent, _Fn&& __f) + { + auto __t = allocate_func_task(std::forward<_Fn&&>(__f)); + __t->_M_parent = __parent; + return __t; + } + + template + __task* + make_additional_child_of(__task* __parent, _Fn&& __f) + { + auto __t = make_child_of(__parent, std::forward<_Fn>(__f)); + _PSTL_ASSERT(__parent->_M_refcount.load(std::memory_order_relaxed) > 0); + ++__parent->_M_refcount; + return __t; + } + + inline void + recycle_as_continuation() + { + _M_recycle = true; + } + + inline void + recycle_as_child_of(__task* parent) + { + _M_recycle = true; + _M_parent = parent; + } + + inline void + spawn(__task* __t) + { + _PSTL_ASSERT(_M_execute_data != nullptr); + tbb::detail::d1::spawn(*__t, *_M_execute_data->context); + } + + template + static inline void + spawn_root_and_wait(__root_task<_Fn>& __root) + { + tbb::detail::d1::execute_and_wait(*__root._M_func_task, __root._M_context, __root._M_wait_object, + __root._M_context); + } + + template + friend class __func_task; +}; + +template +class __func_task : public __task +{ + _Func _M_func; + + __task* + execute(tbb::detail::d1::execution_data& __ed) override + { + _M_execute_data = &__ed; + _M_recycle = false; + __task* __next = _M_func(this); + return finalize(__next); + }; + + __task* + cancel(tbb::detail::d1::execution_data& __ed) override + { + return finalize(nullptr); + } + + __task* + finalize(__task* __next) + { + bool __recycle = _M_recycle; + _M_recycle = false; + + if (__recycle) + { + return __next; + } + + auto __parent = _M_parent; + auto __alloc = _M_allocator; + auto __ed = _M_execute_data; + + this->~__func_task(); + + _PSTL_ASSERT(__parent != nullptr); + _PSTL_ASSERT(__parent->_M_refcount.load(std::memory_order_relaxed) > 0); + if (--__parent->_M_refcount == 0) + { + _PSTL_ASSERT(__next == nullptr); + __alloc.deallocate(this, *__ed); + return __parent; + } + + return __next; + } + + friend class __root_task<_Func>; + + public: + template + __func_task(_Fn&& __f) : _M_func(std::forward<_Fn>(__f)) + { + } + + _Func& + body() + { + return _M_func; + } +}; + +template +class __root_task : public __task +{ + __task* + execute(tbb::detail::d1::execution_data& __ed) override + { + _M_wait_object.release(); + return nullptr; + }; + + __task* + cancel(tbb::detail::d1::execution_data& __ed) override + { + _M_wait_object.release(); + return nullptr; + } + + __func_task<_Func>* _M_func_task{}; + tbb::detail::d1::wait_context _M_wait_object{0}; + tbb::task_group_context _M_context{}; + + public: + template + __root_task(Args&&... args) : _M_wait_object{1} + { + tbb::detail::d1::small_object_allocator __alloc{}; + _M_func_task = __alloc.new_object<__func_task<_Func>>(_Func(std::forward(args)...)); + _M_func_task->_M_allocator = __alloc; + _M_func_task->_M_parent = this; + _M_refcount.store(1, std::memory_order_relaxed); + } + + friend class __task; +}; +#endif // TBB_INTERFACE_VERSION <= 12000 + +template +class __merge_func +{ + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; + typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; + typedef typename std::iterator_traits<_RandomAccessIterator1>::value_type _ValueType; + + _RandomAccessIterator1 _M_x_beg; + _RandomAccessIterator2 _M_z_beg; + + _SizeType _M_xs, _M_xe; + _SizeType _M_ys, _M_ye; + _SizeType _M_zs; + _Compare _M_comp; + _LeafMerge _M_leaf_merge; + _SizeType _M_nsort; //number of elements to be sorted for partial_sort alforithm + + static const _SizeType __merge_cut_off = _PSTL_MERGE_CUT_OFF; + + bool _root; //means a task is merging root task + bool _x_orig; //"true" means X(or left ) subrange is in the original container; false - in the buffer + bool _y_orig; //"true" means Y(or right) subrange is in the original container; false - in the buffer + bool _split; //"true" means a merge task is a split task for parallel merging, the execution logic differs + + bool + is_partial() const + { + return _M_nsort > 0; + } + + struct __move_value + { + template + void + operator()(Iterator1 __x, Iterator2 __z) + { + *__z = std::move(*__x); + } + }; + + struct __move_value_construct + { + template + void + operator()(Iterator1 __x, Iterator2 __z) + { + ::new (std::addressof(*__z)) _ValueType(std::move(*__x)); + } + }; + + struct __move_range + { + template + Iterator2 + operator()(Iterator1 __first1, Iterator1 __last1, Iterator2 __first2) + { + if (__last1 - __first1 < __merge_cut_off) + return std::move(__first1, __last1, __first2); + + auto __n = __last1 - __first1; + tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off), + [__first1, __first2](const tbb::blocked_range<_SizeType>& __range) { + std::move(__first1 + __range.begin(), __first1 + __range.end(), + __first2 + __range.begin()); + }); + return __first2 + __n; + } + }; + + struct __move_range_construct + { + template + Iterator2 + operator()(Iterator1 __first1, Iterator1 __last1, Iterator2 __first2) + { + if (__last1 - __first1 < __merge_cut_off) + { + for (; __first1 != __last1; ++__first1, ++__first2) + __move_value_construct()(__first1, __first2); + return __first2; + } + + auto __n = __last1 - __first1; + tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off), + [__first1, __first2](const tbb::blocked_range<_SizeType>& __range) { + for (auto i = __range.begin(); i != __range.end(); ++i) + __move_value_construct()(__first1 + i, __first2 + i); + }); + return __first2 + __n; + } + }; + + struct __cleanup_range + { + template + void + operator()(Iterator __first, Iterator __last) + { + if (__last - __first < __merge_cut_off) + _Cleanup()(__first, __last); + else + { + auto __n = __last - __first; + tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off), + [__first](const tbb::blocked_range<_SizeType>& __range) { + _Cleanup()(__first + __range.begin(), __first + __range.end()); + }); + } + } + }; + + public: + __merge_func(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp, + _Cleanup, _LeafMerge __leaf_merge, _SizeType __nsort, _RandomAccessIterator1 __x_beg, + _RandomAccessIterator2 __z_beg, bool __x_orig, bool __y_orig, bool __root) + : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_x_beg(__x_beg), _M_z_beg(__z_beg), + _M_comp(__comp), _M_leaf_merge(__leaf_merge), _M_nsort(__nsort), _root(__root), + _x_orig(__x_orig), _y_orig(__y_orig), _split(false) + { + } + + bool + is_left(_SizeType __idx) const + { + return _M_xs == __idx; + } + + template + void + set_odd(IndexType __idx, bool __on_off) + { + if (is_left(__idx)) + _x_orig = __on_off; + else + _y_orig = __on_off; + } + + __task* + operator()(__task* __self); + + private: + __merge_func* + parent_merge(__task* __self) const + { + return _root ? nullptr : &static_cast<__func_task<__merge_func>*>(__self->parent())->body(); + } + bool + x_less_y() + { + const auto __nx = (_M_xe - _M_xs); + const auto __ny = (_M_ye - _M_ys); + _PSTL_ASSERT(__nx > 0 && __ny > 0); + + _PSTL_ASSERT(_x_orig == _y_orig); + _PSTL_ASSERT(!is_partial()); + + if (_x_orig) + { + _PSTL_ASSERT(std::is_sorted(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_comp)); + _PSTL_ASSERT(std::is_sorted(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_comp)); + return !_M_comp(*(_M_x_beg + _M_ys), *(_M_x_beg + _M_xe - 1)); + } + + _PSTL_ASSERT(std::is_sorted(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_comp)); + _PSTL_ASSERT(std::is_sorted(_M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_comp)); + return !_M_comp(*(_M_z_beg + _M_zs + __nx), *(_M_z_beg + _M_zs + __nx - 1)); + } + void + move_x_range() + { + const auto __nx = (_M_xe - _M_xs); + const auto __ny = (_M_ye - _M_ys); + _PSTL_ASSERT(__nx > 0 && __ny > 0); + + if (_x_orig) + __move_range_construct()(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_z_beg + _M_zs); + else + { + __move_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx, _M_x_beg + _M_xs); + __cleanup_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx); + } + + _x_orig = !_x_orig; + } + void + move_y_range() + { + const auto __nx = (_M_xe - _M_xs); + const auto __ny = (_M_ye - _M_ys); + + if (_y_orig) + __move_range_construct()(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs + __nx); + else + { + __move_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny, _M_x_beg + _M_ys); + __cleanup_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny); + } + + _y_orig = !_y_orig; + } + __task* + merge_ranges(__task* __self) + { + _PSTL_ASSERT(_x_orig == _y_orig); //two merged subrange must be lie into the same buffer + + const auto __nx = (_M_xe - _M_xs); + const auto __ny = (_M_ye - _M_ys); + const auto __n = __nx + __ny; + + // need to merge {x} and {y} + if (__n > __merge_cut_off) + return split_merging(__self); + + //merge to buffer + if (_x_orig) + { + _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs, + _M_comp, __move_value_construct(), __move_value_construct(), __move_range_construct(), + __move_range_construct()); + _PSTL_ASSERT(parent_merge(__self)); //not root merging task + } + //merge to "origin" + else + { + _PSTL_ASSERT(_x_orig == _y_orig); + + _PSTL_ASSERT(is_partial() || std::is_sorted(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_comp)); + _PSTL_ASSERT(is_partial() || std::is_sorted(_M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_comp)); + + const auto __nx = (_M_xe - _M_xs); + const auto __ny = (_M_ye - _M_ys); + + _M_leaf_merge(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_x_beg + _M_zs, + _M_comp, __move_value(), __move_value(), __move_range(), __move_range()); + + __cleanup_range()(_M_z_beg + _M_xs, _M_z_beg + _M_xe); + __cleanup_range()(_M_z_beg + _M_ys, _M_z_beg + _M_ye); + } + return nullptr; + } + + __task* + process_ranges(__task* __self) + { + _PSTL_ASSERT(_x_orig == _y_orig); + _PSTL_ASSERT(!_split); + + auto p = parent_merge(__self); + + if (!p) + { //root merging task + + //optimization, just for sort algorithm, //{x} <= {y} + if (!is_partial() && x_less_y()) //we have a solution + { + if (!_x_orig) + { //we have to move the solution to the origin + move_x_range(); //parallel moving + move_y_range(); //parallel moving + } + return nullptr; + } + //else: if we have data in the origin, + //we have to move data to the buffer for final merging into the origin. + if (_x_orig) + { + move_x_range(); //parallel moving + move_y_range(); //parallel moving + } + // need to merge {x} and {y}. + return merge_ranges(__self); + } + //else: not root merging task (parent_merge() == NULL) + //optimization, just for sort algorithm, //{x} <= {y} + if (!is_partial() && x_less_y()) + { + const auto id_range = _M_zs; + p->set_odd(id_range, _x_orig); + return nullptr; + } + //else: we have to revert "_x(y)_orig" flag of the parent merging task + const auto id_range = _M_zs; + p->set_odd(id_range, !_x_orig); + + return merge_ranges(__self); + } + + //splitting as merge task into 2 of the same level + __task* + split_merging(__task* __self) + { + _PSTL_ASSERT(_x_orig == _y_orig); + const auto __nx = (_M_xe - _M_xs); + const auto __ny = (_M_ye - _M_ys); + + _SizeType __xm{}; + _SizeType __ym{}; + if (__nx < __ny) + { + __ym = _M_ys + __ny / 2; + + if (_x_orig) + __xm = std::upper_bound(_M_x_beg + _M_xs, _M_x_beg + _M_xe, *(_M_x_beg + __ym), _M_comp) - _M_x_beg; + else + __xm = std::upper_bound(_M_z_beg + _M_xs, _M_z_beg + _M_xe, *(_M_z_beg + __ym), _M_comp) - _M_z_beg; + } + else + { + __xm = _M_xs + __nx / 2; + + if (_y_orig) + __ym = std::lower_bound(_M_x_beg + _M_ys, _M_x_beg + _M_ye, *(_M_x_beg + __xm), _M_comp) - _M_x_beg; + else + __ym = std::lower_bound(_M_z_beg + _M_ys, _M_z_beg + _M_ye, *(_M_z_beg + __xm), _M_comp) - _M_z_beg; + } + + auto __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); + __merge_func __right_func(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort, + _M_x_beg, _M_z_beg, _x_orig, _y_orig, _root); + __right_func._split = true; + auto __merge_task = __self->make_additional_child_of(__self->parent(), std::move(__right_func)); + __self->spawn(__merge_task); + __self->recycle_as_continuation(); + + _M_xe = __xm; + _M_ye = __ym; + _split = true; + + return __self; + } +}; + +template +__task* +__merge_func<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Cleanup, _LeafMerge>:: +operator()(__task* __self) +{ + //a. split merge task into 2 of the same level; the special logic, + //without processing(process_ranges) adjacent sub-ranges x and y + if (_split) + return merge_ranges(__self); + + //b. General merging of adjacent sub-ranges x and y (with optimization in case of {x} <= {y} ) + + //1. x and y are in the even buffer + //2. x and y are in the odd buffer + if (_x_orig == _y_orig) + return process_ranges(__self); + + //3. x is in even buffer, y is in the odd buffer + //4. x is in odd buffer, y is in the even buffer + if (!parent_merge(__self)) + { //root merge task + if (_x_orig) + move_x_range(); + else + move_y_range(); + } + else + { + const _SizeType __nx = (_M_xe - _M_xs); + const _SizeType __ny = (_M_ye - _M_ys); + _PSTL_ASSERT(__nx > 0); + _PSTL_ASSERT(__nx > 0); + + if (__nx < __ny) + move_x_range(); + else + move_y_range(); + } + + return process_ranges(__self); +} + +template +class __stable_sort_func +{ + public: + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; + typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; + + private: + _RandomAccessIterator1 _M_xs, _M_xe, _M_x_beg; + _RandomAccessIterator2 _M_zs, _M_z_beg; + _Compare _M_comp; + _LeafSort _M_leaf_sort; + bool _M_root; + _SizeType _M_nsort; //zero or number of elements to be sorted for partial_sort alforithm + + public: + __stable_sort_func(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, + bool __root, _Compare __comp, _LeafSort __leaf_sort, _SizeType __nsort, + _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg) + : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_comp(__comp), + _M_leaf_sort(__leaf_sort), _M_root(__root), _M_nsort(__nsort) + { + } + + __task* + operator()(__task* __self); +}; + +#define _PSTL_STABLE_SORT_CUT_OFF 500 + +template +__task* +__stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::operator()(__task* __self) +{ + typedef __merge_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy, + __utils::__serial_move_merge> + _MergeTaskType; + + const _SizeType __n = _M_xe - _M_xs; + const _SizeType __nmerge = _M_nsort > 0 ? _M_nsort : __n; + const _SizeType __sort_cut_off = _PSTL_STABLE_SORT_CUT_OFF; + if (__n <= __sort_cut_off) + { + _M_leaf_sort(_M_xs, _M_xe, _M_comp); + _PSTL_ASSERT(!_M_root); + return nullptr; + } + + const _RandomAccessIterator1 __xm = _M_xs + __n / 2; + const _RandomAccessIterator2 __zm = _M_zs + (__xm - _M_xs); + const _RandomAccessIterator2 __ze = _M_zs + __n; + _MergeTaskType __m(_MergeTaskType(_M_xs - _M_x_beg, __xm - _M_x_beg, __xm - _M_x_beg, _M_xe - _M_x_beg, + _M_zs - _M_z_beg, _M_comp, __utils::__serial_destroy(), + __utils::__serial_move_merge(__nmerge), _M_nsort, _M_x_beg, _M_z_beg, + /*x_orig*/ true, /*y_orig*/ true, /*root*/ _M_root)); + auto __parent = __self->make_continuation(std::move(__m)); + __parent->set_ref_count(2); + auto __right = __self->make_child_of( + __parent, __stable_sort_func(__xm, _M_xe, __zm, false, _M_comp, _M_leaf_sort, _M_nsort, _M_x_beg, _M_z_beg)); + __self->spawn(__right); + __self->recycle_as_child_of(__parent); + _M_root = false; + _M_xe = __xm; + + return __self; +} + +template +void +__parallel_stable_sort(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __xs, + _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0) +{ + tbb::this_task_arena::isolate([=, &__nsort]() { + //sorting based on task tree and parallel merge + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _ValueType; + typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + const _DifferenceType __n = __xe - __xs; + if (__nsort == __n) + __nsort = 0; // 'partial_sort' becames 'sort' + + const _DifferenceType __sort_cut_off = _PSTL_STABLE_SORT_CUT_OFF; + if (__n > __sort_cut_off) + { + __buffer<_ValueType> __buf(__n); + __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ + __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; + __task::spawn_root_and_wait(__root); + return; + } + //serial sort + __leaf_sort(__xs, __xe, __comp); + }); +} + +//------------------------------------------------------------------------ +// parallel_merge +//------------------------------------------------------------------------ +template +class __merge_func_static +{ + _RandomAccessIterator1 _M_xs, _M_xe; + _RandomAccessIterator2 _M_ys, _M_ye; + _RandomAccessIterator3 _M_zs; + _Compare _M_comp; + _LeafMerge _M_leaf_merge; + + public: + __merge_func_static(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, + _LeafMerge __leaf_merge) + : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_comp(__comp), _M_leaf_merge(__leaf_merge) + { + } + + __task* + operator()(__task* __self); +}; + +//TODO: consider usage of parallel_for with a custom blocked_range +template +__task* +__merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, _LeafMerge>:: +operator()(__task* __self) +{ + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; + typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; + const _SizeType __n = (_M_xe - _M_xs) + (_M_ye - _M_ys); + const _SizeType __merge_cut_off = _PSTL_MERGE_CUT_OFF; + if (__n <= __merge_cut_off) + { + _M_leaf_merge(_M_xs, _M_xe, _M_ys, _M_ye, _M_zs, _M_comp); + return nullptr; + } + + _RandomAccessIterator1 __xm; + _RandomAccessIterator2 __ym; + if (_M_xe - _M_xs < _M_ye - _M_ys) + { + __ym = _M_ys + (_M_ye - _M_ys) / 2; + __xm = std::upper_bound(_M_xs, _M_xe, *__ym, _M_comp); + } + else + { + __xm = _M_xs + (_M_xe - _M_xs) / 2; + __ym = std::lower_bound(_M_ys, _M_ye, *__xm, _M_comp); + } + const _RandomAccessIterator3 __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); + auto __right = __self->make_additional_child_of( + __self->parent(), __merge_func_static(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_leaf_merge)); + __self->spawn(__right); + __self->recycle_as_continuation(); + _M_xe = __xm; + _M_ye = __ym; + + return __self; +} + +template +void +__parallel_merge(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +{ + typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; + typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; + typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType; + const _SizeType __n = (__xe - __xs) + (__ye - __ys); + const _SizeType __merge_cut_off = _PSTL_MERGE_CUT_OFF; + if (__n <= __merge_cut_off) + { + // Fall back on serial merge + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + } + else + { + tbb::this_task_arena::isolate([=]() { + typedef __merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, + _Compare, _LeafMerge> + _TaskType; + __root_task<_TaskType> __root{__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge}; + __task::spawn_root_and_wait(__root); + }); + } +} + +//------------------------------------------------------------------------ +// parallel_invoke +//------------------------------------------------------------------------ +template +void +__parallel_invoke(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future + tbb::this_task_arena::isolate([&]() { tbb::parallel_invoke(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); }); +} + +} // namespace __tbb_backend +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_PARALLEL_BACKEND_TBB_H */ diff --git a/Components/Include/pstl/internal/parallel_backend_utils.h b/Components/Include/pstl/internal/parallel_backend_utils.h new file mode 100644 index 0000000..e176d7e --- /dev/null +++ b/Components/Include/pstl/internal/parallel_backend_utils.h @@ -0,0 +1,263 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_UTILS_H +#define _PSTL_PARALLEL_BACKEND_UTILS_H + +#include +#include +#include "utils.h" + +#include "pstl_config.h" + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ + +namespace __utils +{ + +//! Destroy sequence [xs,xe) +struct __serial_destroy +{ + template + void + operator()(_RandomAccessIterator __zs, _RandomAccessIterator __ze) + { + typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _ValueType; + while (__zs != __ze) + { + --__ze; + (*__ze).~_ValueType(); + } + } +}; + +//! Merge sequences [__xs,__xe) and [__ys,__ye) to output sequence [__zs,(__xe-__xs)+(__ye-__ys)), using std::move +struct __serial_move_merge +{ + const std::size_t _M_nmerge; + + explicit __serial_move_merge(std::size_t __nmerge) : _M_nmerge(__nmerge) {} + template + void + operator()(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _MoveValueX __move_value_x, + _MoveValueY __move_value_y, _MoveSequenceX __move_sequence_x, _MoveSequenceY __move_sequence_y) + { + constexpr bool __same_move_val = std::is_same<_MoveValueX, _MoveValueY>::value; + constexpr bool __same_move_seq = std::is_same<_MoveSequenceX, _MoveSequenceY>::value; + + auto __n = _M_nmerge; + _PSTL_ASSERT(__n > 0); + + auto __nx = __xe - __xs; + //auto __ny = __ye - __ys; + _RandomAccessIterator3 __zs_beg = __zs; + + if (__xs != __xe) + { + if (__ys != __ye) + { + for (;;) + { + if (__comp(*__ys, *__xs)) + { + const auto __i = __zs - __zs_beg; + if (__i < __nx) + __move_value_x(__ys, __zs); + else + __move_value_y(__ys, __zs); + ++__zs, --__n; + if (++__ys == __ye) + { + break; + } + else if (__n == 0) + { + const auto __j = __zs - __zs_beg; + if (__same_move_seq || __j < __nx) + __zs = __move_sequence_x(__ys, __ye, __zs); + else + __zs = __move_sequence_y(__ys, __ye, __zs); + break; + } + } + else + { + const auto __i = __zs - __zs_beg; + if (__same_move_val || __i < __nx) + __move_value_x(__xs, __zs); + else + __move_value_y(__xs, __zs); + ++__zs, --__n; + if (++__xs == __xe) + { + const auto __j = __zs - __zs_beg; + if (__same_move_seq || __j < __nx) + __move_sequence_x(__ys, __ye, __zs); + else + __move_sequence_y(__ys, __ye, __zs); + return; + } + else if (__n == 0) + { + const auto __j = __zs - __zs_beg; + if (__same_move_seq || __j < __nx) + { + __zs = __move_sequence_x(__xs, __xe, __zs); + __move_sequence_x(__ys, __ye, __zs); + } + else + { + __zs = __move_sequence_y(__xs, __xe, __zs); + __move_sequence_y(__ys, __ye, __zs); + } + return; + } + } + } + } + __ys = __xs; + __ye = __xe; + } + const auto __i = __zs - __zs_beg; + if (__same_move_seq || __i < __nx) + __move_sequence_x(__ys, __ye, __zs); + else + __move_sequence_y(__ys, __ye, __zs); + } +}; + +template +_OutputIterator +__set_union_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + _CopyConstructRange __cc_range) +{ + using _Tp = typename std::iterator_traits<_OutputIterator>::value_type; + + for (; __first1 != __last1; ++__result) + { + if (__first2 == __last2) + return __cc_range(__first1, __last1, __result); + if (__comp(*__first2, *__first1)) + { + ::new (std::addressof(*__result)) _Tp(*__first2); + ++__first2; + } + else + { + ::new (std::addressof(*__result)) _Tp(*__first1); + if (!__comp(*__first1, *__first2)) + ++__first2; + ++__first1; + } + } + return __cc_range(__first2, __last2, __result); +} + +template +_OutputIterator +__set_intersection_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp) +{ + using _Tp = typename std::iterator_traits<_OutputIterator>::value_type; + + for (; __first1 != __last1 && __first2 != __last2;) + { + if (__comp(*__first1, *__first2)) + ++__first1; + else + { + if (!__comp(*__first2, *__first1)) + { + ::new (std::addressof(*__result)) _Tp(*__first1); + ++__result; + ++__first1; + } + ++__first2; + } + } + return __result; +} + +template +_OutputIterator +__set_difference_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + _CopyConstructRange __cc_range) +{ + using _Tp = typename std::iterator_traits<_OutputIterator>::value_type; + + for (; __first1 != __last1;) + { + if (__first2 == __last2) + return __cc_range(__first1, __last1, __result); + + if (__comp(*__first1, *__first2)) + { + ::new (std::addressof(*__result)) _Tp(*__first1); + ++__result; + ++__first1; + } + else + { + if (!__comp(*__first2, *__first1)) + ++__first1; + ++__first2; + } + } + return __result; +} +template +_OutputIterator +__set_symmetric_difference_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + _CopyConstructRange __cc_range) +{ + using _Tp = typename std::iterator_traits<_OutputIterator>::value_type; + + for (; __first1 != __last1;) + { + if (__first2 == __last2) + return __cc_range(__first1, __last1, __result); + + if (__comp(*__first1, *__first2)) + { + ::new (std::addressof(*__result)) _Tp(*__first1); + ++__result; + ++__first1; + } + else + { + if (__comp(*__first2, *__first1)) + { + ::new (std::addressof(*__result)) _Tp(*__first2); + ++__result; + } + else + ++__first1; + ++__first2; + } + } + return __cc_range(__first2, __last2, __result); +} + +} // namespace __utils +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_PARALLEL_BACKEND_UTILS_H */ diff --git a/Components/Include/pstl/internal/parallel_impl.h b/Components/Include/pstl/internal/parallel_impl.h new file mode 100644 index 0000000..76b3f43 --- /dev/null +++ b/Components/Include/pstl/internal/parallel_impl.h @@ -0,0 +1,90 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_IMPL_H +#define _PSTL_PARALLEL_IMPL_H + +#include "pstl_config.h" + +#include +// This header defines the minimum set of parallel routines required to support Parallel STL, +// implemented on top of Intel(R) Threading Building Blocks (Intel(R) TBB) library + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +//------------------------------------------------------------------------ +// parallel_find +//----------------------------------------------------------------------- +/** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last) +Each f[i,j) must return a value in [i,j). */ +template +_Index +__parallel_find(_BackendTag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, + _Compare __comp, bool __b_first) +{ + typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; + const _DifferenceType __n = __last - __first; + _DifferenceType __initial_dist = __b_first ? __n : -1; + std::atomic<_DifferenceType> __extremum(__initial_dist); + // TODO: find out what is better here: parallel_for or parallel_reduce + __par_backend::__parallel_for(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__comp, __f, __first, &__extremum](_Index __i, _Index __j) + { + // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of + // why using a shared variable scales fairly well in this situation. + if (__comp(__i - __first, __extremum)) + { + _Index __res = __f(__i, __j); + // If not '__last' returned then we found what we want so put this to extremum + if (__res != __j) + { + const _DifferenceType __k = __res - __first; + for (_DifferenceType __old = __extremum; __comp(__k, __old); + __old = __extremum) + { + __extremum.compare_exchange_weak(__old, __k); + } + } + } + }); + return __extremum != __initial_dist ? __first + __extremum : __last; +} + +//------------------------------------------------------------------------ +// parallel_or +//------------------------------------------------------------------------ +//! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) +template +bool +__parallel_or(_BackendTag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) +{ + std::atomic __found(false); + __par_backend::__parallel_for(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f, &__found](_Index __i, _Index __j) + { + if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) + { + __found.store(true, std::memory_order_relaxed); + __par_backend::__cancel_execution(); + } + }); + return __found; +} + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_PARALLEL_IMPL_H */ diff --git a/Components/Include/pstl/internal/pstl_config.h b/Components/Include/pstl/internal/pstl_config.h new file mode 100644 index 0000000..9da6b1c --- /dev/null +++ b/Components/Include/pstl/internal/pstl_config.h @@ -0,0 +1,204 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_CONFIG_H +#define _PSTL_CONFIG_H + +#include <__pstl_config_site> + +// The version is XYYZ, where X is major, YY is minor, and Z is patch (i.e. X.YY.Z) +#define _PSTL_VERSION 17000 +#define _PSTL_VERSION_MAJOR (_PSTL_VERSION / 1000) +#define _PSTL_VERSION_MINOR ((_PSTL_VERSION % 1000) / 10) +#define _PSTL_VERSION_PATCH (_PSTL_VERSION % 10) + +#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) && !defined(_PSTL_PAR_BACKEND_OPENMP) +# error "A parallel backend must be specified" +#endif + +// Check the user-defined macro for warnings +#if defined(PSTL_USAGE_WARNINGS) +# define _PSTL_USAGE_WARNINGS +#endif + +// #if defined(_LIBCPP_VERSION) +// # include +// # define _PSTL_ASSERT(pred) _LIBCPP_ASSERT(pred, "") +// #elif defined(__GLIBCXX__) +// # define _PSTL_ASSERT(pred) __glibcxx_assert(pred) +// #else +// # include +# define _PSTL_ASSERT(pred) (assert((pred))) +// #endif + +// Portability "#pragma" definition +#ifdef _MSC_VER +# define _PSTL_PRAGMA(x) __pragma(x) +#else +# define _PSTL_PRAGMA(x) _Pragma(# x) +#endif + +#define _PSTL_STRING_AUX(x) #x +#define _PSTL_STRING(x) _PSTL_STRING_AUX(x) +#define _PSTL_STRING_CONCAT(x, y) x #y + +#ifdef _PSTL_HIDE_FROM_ABI_PER_TU +# define _PSTL_HIDE_FROM_ABI_PUSH \ + _Pragma("clang attribute push(__attribute__((internal_linkage)), apply_to=any(function,record))") +# define _PSTL_HIDE_FROM_ABI_POP _Pragma("clang attribute pop") +#else +# define _PSTL_HIDE_FROM_ABI_PUSH /* nothing */ +# define _PSTL_HIDE_FROM_ABI_POP /* nothing */ +#endif + +// note that when ICC or Clang is in use, _PSTL_GCC_VERSION might not fully match +// the actual GCC version on the system. +#define _PSTL_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +#if defined(__clang__) +// according to clang documentation, version can be vendor specific +# define _PSTL_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif + +// Enable SIMD for compilers that support OpenMP 4.0 +#if (defined(_OPENMP) && _OPENMP >= 201307) || \ + (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600) || \ + (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \ + defined(__clang__) +# define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd) +# define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd) +# define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM)) +#elif !defined(_MSC_VER) //#pragma simd +# define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(simd) +# define _PSTL_PRAGMA_DECLARE_SIMD +# define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(simd reduction(PRM)) +#else //no simd +# define _PSTL_PRAGMA_SIMD +# define _PSTL_PRAGMA_DECLARE_SIMD +# define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) +#endif //Enable SIMD + +#if defined(__INTEL_COMPILER) +# define _PSTL_PRAGMA_FORCEINLINE _PSTL_PRAGMA(forceinline) +#else +# define _PSTL_PRAGMA_FORCEINLINE +#endif + +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 +# define _PSTL_PRAGMA_SIMD_SCAN(PRM) _PSTL_PRAGMA(omp simd reduction(inscan, PRM)) +# define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan inclusive(PRM)) +# define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan exclusive(PRM)) +#else +# define _PSTL_PRAGMA_SIMD_SCAN(PRM) +# define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) +# define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) +#endif + +// Should be defined to 1 for environments with a vendor implementation of C++17 execution policies +#define _PSTL_CPP17_EXECUTION_POLICIES_PRESENT (_MSC_VER >= 1912 && _MSVC_LANG >= 201703L) || \ + (_GLIBCXX_RELEASE >= 9 && __GLIBCXX__ >= 20190503 && __cplusplus >= 201703L) + +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ + __cplusplus >= 201300L || \ + __cpp_lib_robust_nonmodifying_seq_ops == 201304 +# define _PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT +#endif +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ + __cplusplus >= 201402L || \ + __cpp_lib_make_reverse_iterator == 201402 +# define _PSTL_CPP14_MAKE_REVERSE_ITERATOR_PRESENT +#endif +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || __cplusplus >= 201402L +# define _PSTL_CPP14_INTEGER_SEQUENCE_PRESENT +#endif +#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1700) || \ + (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918) || \ + __cplusplus >= 201402L +# define _PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT +#endif + +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800 +# define _PSTL_EARLYEXIT_PRESENT +# define _PSTL_MONOTONIC_PRESENT +#endif + +#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) || \ + (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \ + (defined(_OPENMP) && _OPENMP >= 201307) +# define _PSTL_UDR_PRESENT +#endif + +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 && __INTEL_COMPILER_BUILD_DATE >= 20180626 +# define _PSTL_UDS_PRESENT +#endif + +#if defined(_PSTL_EARLYEXIT_PRESENT) +# define _PSTL_PRAGMA_SIMD_EARLYEXIT _PSTL_PRAGMA(omp simd early_exit) +#else +# define _PSTL_PRAGMA_SIMD_EARLYEXIT +#endif + +#if defined(_PSTL_MONOTONIC_PRESENT) +# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) _PSTL_PRAGMA(omp ordered simd monotonic(PRM)) +# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) _PSTL_PRAGMA(omp ordered simd monotonic(PRM1, PRM2)) +#else +# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) +# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) +#endif + +// Declaration of reduction functor, where +// NAME - the name of the functor +// OP - type of the callable object with the reduction operation +// omp_in - refers to the local partial result +// omp_out - refers to the final value of the combiner operator +// omp_priv - refers to the private copy of the initial value +// omp_orig - refers to the original variable to be reduced +#define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) \ + _PSTL_PRAGMA(omp declare reduction(NAME:OP : omp_out(omp_in)) initializer(omp_priv = omp_orig)) + +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600 +# define _PSTL_PRAGMA_VECTOR_UNALIGNED _PSTL_PRAGMA(vector unaligned) +#else +# define _PSTL_PRAGMA_VECTOR_UNALIGNED +#endif + +// Check the user-defined macro to use non-temporal stores +#if defined(PSTL_USE_NONTEMPORAL_STORES) && (__INTEL_COMPILER >= 1600) +# define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED _PSTL_PRAGMA(vector nontemporal) +#else +# define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED +#endif + +#if defined(_MSC_VER) || defined(__INTEL_COMPILER) // the preprocessors don't type a message location +# define _PSTL_PRAGMA_LOCATION __FILE__ ":" _PSTL_STRING(__LINE__) ": [Parallel STL message]: " +#else +# define _PSTL_PRAGMA_LOCATION " [Parallel STL message]: " +#endif + +#define _PSTL_PRAGMA_MESSAGE_IMPL(x) _PSTL_PRAGMA(message(_PSTL_STRING_CONCAT(_PSTL_PRAGMA_LOCATION, x))) + +#if defined(_PSTL_USAGE_WARNINGS) +# define _PSTL_PRAGMA_MESSAGE(x) _PSTL_PRAGMA_MESSAGE_IMPL(x) +# define _PSTL_PRAGMA_MESSAGE_POLICIES(x) _PSTL_PRAGMA_MESSAGE_IMPL(x) +#else +# define _PSTL_PRAGMA_MESSAGE(x) +# define _PSTL_PRAGMA_MESSAGE_POLICIES(x) +#endif + +// broken macros +#if (defined(__GLIBCXX__) && __GLIBCXX__ < 20150716) || \ + (defined(_MSC_VER) && _MSC_VER < 1800) +# define _PSTL_CPP11_STD_ROTATE_BROKEN +#endif + +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER == 1800 +# define _PSTL_ICC_18_OMP_SIMD_BROKEN +#endif + +#endif /* _PSTL_CONFIG_H */ diff --git a/Components/Include/pstl/internal/unseq_backend_simd.h b/Components/Include/pstl/internal/unseq_backend_simd.h new file mode 100644 index 0000000..af2a143 --- /dev/null +++ b/Components/Include/pstl/internal/unseq_backend_simd.h @@ -0,0 +1,862 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_UNSEQ_BACKEND_SIMD_H +#define _PSTL_UNSEQ_BACKEND_SIMD_H + +#include + +#include "pstl_config.h" +#include "utils.h" + +// This header defines the minimum set of vector routines required +// to support parallel STL. + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __unseq_backend +{ + +// Expect vector width up to 64 (or 512 bit) +const std::size_t __lane_size = 64; + +template +_Iterator +__simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept +{ + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + __f(__first[__i]); + + return __first + __n; +} + +template +_Iterator2 +__simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept +{ + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + __f(__first1[__i], __first2[__i]); + return __first2 + __n; +} + +template +_Iterator3 +__simd_walk_3(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3, + _Function __f) noexcept +{ + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + __f(__first1[__i], __first2[__i], __first3[__i]); + return __first3 + __n; +} + +// TODO: check whether __simd_first() can be used here +template +bool +__simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept +{ +#if defined(_PSTL_EARLYEXIT_PRESENT) + _DifferenceType __i; + _PSTL_PRAGMA_VECTOR_UNALIGNED + _PSTL_PRAGMA_SIMD_EARLYEXIT + for (__i = 0; __i < __n; ++__i) + if (__pred(__first[__i])) + break; + return __i < __n; +#else + _DifferenceType __block_size = 4 < __n ? 4 : __n; + const _Index __last = __first + __n; + while (__last != __first) + { + int32_t __flag = 1; + _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag) + for (_DifferenceType __i = 0; __i < __block_size; ++__i) + if (__pred(*(__first + __i))) + __flag = 0; + if (!__flag) + return true; + + __first += __block_size; + if (__last - __first >= __block_size << 1) + { + // Double the block _Size. Any unnecessary iterations can be amortized against work done so far. + __block_size <<= 1; + } + else + { + __block_size = __last - __first; + } + } + return false; +#endif +} + +template +_Index +__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept +{ +#if defined(_PSTL_EARLYEXIT_PRESENT) + _DifferenceType __i = __begin; + _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part + _PSTL_PRAGMA_SIMD_EARLYEXIT for (; __i < __end; ++__i) + { + if (__comp(__first, __i)) + { + break; + } + } + return __first + __i; +#else + // Experiments show good block sizes like this + const _DifferenceType __block_size = 8; + alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; + while (__end - __begin >= __block_size) + { + _DifferenceType __found = 0; + _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part + _PSTL_PRAGMA_SIMD_REDUCTION(| + : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; + ++__i) + { + const _DifferenceType __t = __comp(__first, __i); + __lane[__i - __begin] = __t; + __found |= __t; + } + if (__found) + { + _DifferenceType __i; + // This will vectorize + for (__i = 0; __i < __block_size; ++__i) + { + if (__lane[__i]) + { + break; + } + } + return __first + __begin + __i; + } + __begin += __block_size; + } + + //Keep remainder scalar + while (__begin != __end) + { + if (__comp(__first, __begin)) + { + return __first + __begin; + } + ++__begin; + } + return __first + __end; +#endif //_PSTL_EARLYEXIT_PRESENT +} + +template +std::pair<_Index1, _Index2> +__simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pred) noexcept +{ +#if defined(_PSTL_EARLYEXIT_PRESENT) + _DifferenceType __i = 0; + _PSTL_PRAGMA_VECTOR_UNALIGNED + _PSTL_PRAGMA_SIMD_EARLYEXIT + for (; __i < __n; ++__i) + if (__pred(__first1[__i], __first2[__i])) + break; + return std::make_pair(__first1 + __i, __first2 + __i); +#else + const _Index1 __last1 = __first1 + __n; + const _Index2 __last2 = __first2 + __n; + // Experiments show good block sizes like this + const _DifferenceType __block_size = 8; + alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; + while (__last1 - __first1 >= __block_size) + { + _DifferenceType __found = 0; + _DifferenceType __i; + _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part + _PSTL_PRAGMA_SIMD_REDUCTION(| + : __found) for (__i = 0; __i < __block_size; ++__i) + { + const _DifferenceType __t = __pred(__first1[__i], __first2[__i]); + __lane[__i] = __t; + __found |= __t; + } + if (__found) + { + _DifferenceType __i2; + // This will vectorize + for (__i2 = 0; __i2 < __block_size; ++__i2) + { + if (__lane[__i2]) + break; + } + return std::make_pair(__first1 + __i2, __first2 + __i2); + } + __first1 += __block_size; + __first2 += __block_size; + } + + //Keep remainder scalar + for (; __last1 != __first1; ++__first1, ++__first2) + if (__pred(*(__first1), *(__first2))) + return std::make_pair(__first1, __first2); + + return std::make_pair(__last1, __last2); +#endif //_PSTL_EARLYEXIT_PRESENT +} + +template +_DifferenceType +__simd_count(_Index __index, _DifferenceType __n, _Pred __pred) noexcept +{ + _DifferenceType __count = 0; + _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count) + for (_DifferenceType __i = 0; __i < __n; ++__i) + if (__pred(*(__index + __i))) + ++__count; + + return __count; +} + +template +_OutputIterator +__simd_unique_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, + _BinaryPredicate __pred) noexcept +{ + if (__n == 0) + return __result; + + _DifferenceType __cnt = 1; + __result[0] = __first[0]; + + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 1; __i < __n; ++__i) + { + _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) + if (!__pred(__first[__i], __first[__i - 1])) + { + __result[__cnt] = __first[__i]; + ++__cnt; + } + } + return __result + __cnt; +} + +template +_OutputIterator +__simd_assign(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _Assigner __assigner) noexcept +{ + _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + __assigner(__first + __i, __result + __i); + return __result + __n; +} + +template +_OutputIterator +__simd_copy_if(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _UnaryPredicate __pred) noexcept +{ + _DifferenceType __cnt = 0; + + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) + if (__pred(__first[__i])) + { + __result[__cnt] = __first[__i]; + ++__cnt; + } + } + return __result + __cnt; +} + +template +_DifferenceType +__simd_calc_mask_2(_InputIterator __first, _DifferenceType __n, bool* __mask, _BinaryPredicate __pred) noexcept +{ + _DifferenceType __count = 0; + + _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count) + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + __mask[__i] = !__pred(__first[__i], __first[__i - 1]); + __count += __mask[__i]; + } + return __count; +} + +template +_DifferenceType +__simd_calc_mask_1(_InputIterator __first, _DifferenceType __n, bool* __mask, _UnaryPredicate __pred) noexcept +{ + _DifferenceType __count = 0; + + _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count) + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + __mask[__i] = __pred(__first[__i]); + __count += __mask[__i]; + } + return __count; +} + +template +void +__simd_copy_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, bool* __mask, + _Assigner __assigner) noexcept +{ + _DifferenceType __cnt = 0; + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + if (__mask[__i]) + { + _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) + { + __assigner(__first + __i, __result + __cnt); + ++__cnt; + } + } + } +} + +template +void +__simd_partition_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true, + _OutputIterator2 __out_false, bool* __mask) noexcept +{ + _DifferenceType __cnt_true = 0, __cnt_false = 0; + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true : 1, __cnt_false : 1) + if (__mask[__i]) + { + __out_true[__cnt_true] = __first[__i]; + ++__cnt_true; + } + else + { + __out_false[__cnt_false] = __first[__i]; + ++__cnt_false; + } + } +} + +template +_Index +__simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept +{ + _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + __first[__i] = __value; + return __first + __n; +} + +template +_Index +__simd_generate_n(_Index __first, _DifferenceType __size, _Generator __g) noexcept +{ + _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __size; ++__i) + __first[__i] = __g(); + return __first + __size; +} + +template +_Index +__simd_adjacent_find(_Index __first, _Index __last, _BinaryPredicate __pred, bool __or_semantic) noexcept +{ + if (__last - __first < 2) + return __last; + + typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; + _DifferenceType __i = 0; + +#if defined(_PSTL_EARLYEXIT_PRESENT) + //Some compiler versions fail to compile the following loop when iterators are used. Indices are used instead + const _DifferenceType __n = __last - __first - 1; + _PSTL_PRAGMA_VECTOR_UNALIGNED + _PSTL_PRAGMA_SIMD_EARLYEXIT + for (; __i < __n; ++__i) + if (__pred(__first[__i], __first[__i + 1])) + break; + + return __i < __n ? __first + __i : __last; +#else + // Experiments show good block sizes like this + //TODO: to consider tuning block_size for various data types + const _DifferenceType __block_size = 8; + alignas(__lane_size) _DifferenceType __lane[__block_size] = {0}; + while (__last - __first >= __block_size) + { + _DifferenceType __found = 0; + _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part + _PSTL_PRAGMA_SIMD_REDUCTION(| + : __found) for (__i = 0; __i < __block_size - 1; ++__i) + { + //TODO: to improve SIMD vectorization + const _DifferenceType __t = __pred(*(__first + __i), *(__first + __i + 1)); + __lane[__i] = __t; + __found |= __t; + } + + //Process a pair of elements on a boundary of a data block + if (__first + __block_size < __last && __pred(*(__first + __i), *(__first + __i + 1))) + __lane[__i] = __found = 1; + + if (__found) + { + if (__or_semantic) + return __first; + + // This will vectorize + for (__i = 0; __i < __block_size; ++__i) + if (__lane[__i]) + break; + return __first + __i; //As far as found is true a __result (__lane[__i] is true) is guaranteed + } + __first += __block_size; + } + //Process the rest elements + for (; __last - __first > 1; ++__first) + if (__pred(*__first, *(__first + 1))) + return __first; + + return __last; +#endif +} + +// It was created to reduce the code inside std::enable_if +template +using is_arithmetic_plus = std::integral_constant::value && + std::is_same<_BinaryOperation, std::plus<_Tp>>::value>; + +template +typename std::enable_if::value, _Tp>::type +__simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept +{ + _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init) + for (_DifferenceType __i = 0; __i < __n; ++__i) + __init += __f(__i); + return __init; +} + +template +typename std::enable_if::value, _Tp>::type +__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept +{ + const _Size __block_size = __lane_size / sizeof(_Tp); + if (__n > 2 * __block_size && __block_size > 1) + { + alignas(__lane_size) char __lane_[__lane_size]; + _Tp* __lane = reinterpret_cast<_Tp*>(__lane_); + + // initializer + _PSTL_PRAGMA_SIMD + for (_Size __i = 0; __i < __block_size; ++__i) + { + ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i))); + } + // main loop + _Size __i = 2 * __block_size; + const _Size last_iteration = __block_size * (__n / __block_size); + for (; __i < last_iteration; __i += __block_size) + { + _PSTL_PRAGMA_SIMD + for (_Size __j = 0; __j < __block_size; ++__j) + { + __lane[__j] = __binary_op(__lane[__j], __f(__i + __j)); + } + } + // remainder + _PSTL_PRAGMA_SIMD + for (_Size __j = 0; __j < __n - last_iteration; ++__j) + { + __lane[__j] = __binary_op(__lane[__j], __f(last_iteration + __j)); + } + // combiner + for (_Size __j = 0; __j < __block_size; ++__j) + { + __init = __binary_op(__init, __lane[__j]); + } + // destroyer + _PSTL_PRAGMA_SIMD + for (_Size __j = 0; __j < __block_size; ++__j) + { + __lane[__j].~_Tp(); + } + } + else + { + for (_Size __i = 0; __i < __n; ++__i) + { + __init = __binary_op(__init, __f(__i)); + } + } + return __init; +} + +// Exclusive scan for "+" and arithmetic types +template +typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type +__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation, /*Inclusive*/ std::false_type) +{ + _PSTL_PRAGMA_SIMD_SCAN(+ : __init) + for (_Size __i = 0; __i < __n; ++__i) + { + __result[__i] = __init; + _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init) + __init += __unary_op(__first[__i]); + } + return std::make_pair(__result + __n, __init); +} + +// As soon as we cannot call __binary_op in "combiner" we create a wrapper over _Tp to encapsulate __binary_op +template +struct _Combiner +{ + _Tp __value; + _BinaryOp* __bin_op; // Here is a pointer to function because of default ctor + + _Combiner() : __value{}, __bin_op(nullptr) {} + _Combiner(const _Tp& value, const _BinaryOp* bin_op) : __value(value), __bin_op(const_cast<_BinaryOp*>(bin_op)) {} + _Combiner(const _Combiner& __obj) : __value{}, __bin_op(__obj.__bin_op) {} + + void + operator()(const _Combiner& __obj) + { + __value = (*__bin_op)(__value, __obj.__value); + } +}; + +// Exclusive scan for other binary operations and types +template +typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type +__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, /*Inclusive*/ std::false_type) +{ + typedef _Combiner<_Tp, _BinaryOperation> _CombinerType; + _CombinerType __init_{__init, &__binary_op}; + + _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType) + + _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_) + for (_Size __i = 0; __i < __n; ++__i) + { + __result[__i] = __init_.__value; + _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init_) + _PSTL_PRAGMA_FORCEINLINE + __init_.__value = __binary_op(__init_.__value, __unary_op(__first[__i])); + } + return std::make_pair(__result + __n, __init_.__value); +} + +// Inclusive scan for "+" and arithmetic types +template +typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type +__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation, /*Inclusive*/ std::true_type) +{ + _PSTL_PRAGMA_SIMD_SCAN(+ : __init) + for (_Size __i = 0; __i < __n; ++__i) + { + __init += __unary_op(__first[__i]); + _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init) + __result[__i] = __init; + } + return std::make_pair(__result + __n, __init); +} + +// Inclusive scan for other binary operations and types +template +typename std::enable_if::value, std::pair<_OutputIterator, _Tp>>::type +__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, std::true_type) +{ + typedef _Combiner<_Tp, _BinaryOperation> _CombinerType; + _CombinerType __init_{__init, &__binary_op}; + + _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType) + + _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_) + for (_Size __i = 0; __i < __n; ++__i) + { + _PSTL_PRAGMA_FORCEINLINE + __init_.__value = __binary_op(__init_.__value, __unary_op(__first[__i])); + _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init_) + __result[__i] = __init_.__value; + } + return std::make_pair(__result + __n, __init_.__value); +} + +// [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible. +// complexity [violation] - We will have at most (__n-1 + number_of_lanes) comparisons instead of at most __n-1. +template +_ForwardIterator +__simd_min_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept +{ + if (__n == 0) + { + return __first; + } + + typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType; + struct _ComplexType + { + _ValueType __min_val; + _Size __min_ind; + _Compare* __min_comp; + + _ComplexType() : __min_val{}, __min_ind{}, __min_comp(nullptr) {} + _ComplexType(const _ValueType& val, const _Compare* comp) + : __min_val(val), __min_ind(0), __min_comp(const_cast<_Compare*>(comp)) + { + } + _ComplexType(const _ComplexType& __obj) + : __min_val(__obj.__min_val), __min_ind(__obj.__min_ind), __min_comp(__obj.__min_comp) + { + } + + _PSTL_PRAGMA_DECLARE_SIMD + void + operator()(const _ComplexType& __obj) + { + if (!(*__min_comp)(__min_val, __obj.__min_val) && + ((*__min_comp)(__obj.__min_val, __min_val) || __obj.__min_ind - __min_ind < 0)) + { + __min_val = __obj.__min_val; + __min_ind = __obj.__min_ind; + } + } + }; + + _ComplexType __init{*__first, &__comp}; + + _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType) + + _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init) + for (_Size __i = 1; __i < __n; ++__i) + { + const _ValueType __min_val = __init.__min_val; + const _ValueType __current = __first[__i]; + if (__comp(__current, __min_val)) + { + __init.__min_val = __current; + __init.__min_ind = __i; + } + } + return __first + __init.__min_ind; +} + +// [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible. +// complexity [violation] - We will have at most (2*(__n-1) + 4*number_of_lanes) comparisons instead of at most [1.5*(__n-1)]. +template +std::pair<_ForwardIterator, _ForwardIterator> +__simd_minmax_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept +{ + if (__n == 0) + { + return std::make_pair(__first, __first); + } + typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType; + + struct _ComplexType + { + _ValueType __min_val; + _ValueType __max_val; + _Size __min_ind; + _Size __max_ind; + _Compare* __minmax_comp; + + _ComplexType() : __min_val{}, __max_val{}, __min_ind{}, __max_ind{}, __minmax_comp(nullptr) {} + _ComplexType(const _ValueType& min_val, const _ValueType& max_val, const _Compare* comp) + : __min_val(min_val), __max_val(max_val), __min_ind(0), __max_ind(0), + __minmax_comp(const_cast<_Compare*>(comp)) + { + } + _ComplexType(const _ComplexType& __obj) + : __min_val(__obj.__min_val), __max_val(__obj.__max_val), __min_ind(__obj.__min_ind), + __max_ind(__obj.__max_ind), __minmax_comp(__obj.__minmax_comp) + { + } + + void + operator()(const _ComplexType& __obj) + { + // min + if ((*__minmax_comp)(__obj.__min_val, __min_val)) + { + __min_val = __obj.__min_val; + __min_ind = __obj.__min_ind; + } + else if (!(*__minmax_comp)(__min_val, __obj.__min_val)) + { + __min_val = __obj.__min_val; + __min_ind = (__min_ind - __obj.__min_ind < 0) ? __min_ind : __obj.__min_ind; + } + + // max + if ((*__minmax_comp)(__max_val, __obj.__max_val)) + { + __max_val = __obj.__max_val; + __max_ind = __obj.__max_ind; + } + else if (!(*__minmax_comp)(__obj.__max_val, __max_val)) + { + __max_val = __obj.__max_val; + __max_ind = (__max_ind - __obj.__max_ind < 0) ? __obj.__max_ind : __max_ind; + } + } + }; + + _ComplexType __init{*__first, *__first, &__comp}; + + _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType); + + _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init) + for (_Size __i = 1; __i < __n; ++__i) + { + auto __min_val = __init.__min_val; + auto __max_val = __init.__max_val; + auto __current = __first + __i; + if (__comp(*__current, __min_val)) + { + __init.__min_val = *__current; + __init.__min_ind = __i; + } + else if (!__comp(*__current, __max_val)) + { + __init.__max_val = *__current; + __init.__max_ind = __i; + } + } + return std::make_pair(__first + __init.__min_ind, __first + __init.__max_ind); +} + +template +std::pair<_OutputIterator1, _OutputIterator2> +__simd_partition_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true, + _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept +{ + _DifferenceType __cnt_true = 0, __cnt_false = 0; + + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 0; __i < __n; ++__i) + { + _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true : 1, __cnt_false : 1) + if (__pred(__first[__i])) + { + __out_true[__cnt_true] = __first[__i]; + ++__cnt_true; + } + else + { + __out_false[__cnt_false] = __first[__i]; + ++__cnt_false; + } + } + return std::make_pair(__out_true + __cnt_true, __out_false + __cnt_false); +} + +template +_ForwardIterator1 +__simd_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, + _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept +{ + typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferencType; + + const _DifferencType __n1 = __last - __first; + const _DifferencType __n2 = __s_last - __s_first; + if (__n1 == 0 || __n2 == 0) + { + return __last; // according to the standard + } + + // Common case + // If first sequence larger than second then we'll run simd_first with parameters of first sequence. + // Otherwise, vice versa. + if (__n1 < __n2) + { + for (; __first != __last; ++__first) + { + if (__unseq_backend::__simd_or( + __s_first, __n2, + __internal::__equal_value_by_pred(*__first, __pred))) + { + return __first; + } + } + } + else + { + for (; __s_first != __s_last; ++__s_first) + { + const auto __result = __unseq_backend::__simd_first( + __first, _DifferencType(0), __n1, [__s_first, &__pred](_ForwardIterator1 __it, _DifferencType __i) { + return __pred(__it[__i], *__s_first); + }); + if (__result != __last) + { + return __result; + } + } + } + return __last; +} + +template +_RandomAccessIterator +__simd_remove_if(_RandomAccessIterator __first, _DifferenceType __n, _UnaryPredicate __pred) noexcept +{ + // find first element we need to remove + auto __current = __unseq_backend::__simd_first( + __first, _DifferenceType(0), __n, + [&__pred](_RandomAccessIterator __it, _DifferenceType __i) { return __pred(__it[__i]); }); + __n -= __current - __first; + + // if we have in sequence only one element that pred(__current[1]) != false we can exit the function + if (__n < 2) + { + return __current; + } + + _DifferenceType __cnt = 0; + _PSTL_PRAGMA_SIMD + for (_DifferenceType __i = 1; __i < __n; ++__i) + { + _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1) + if (!__pred(__current[__i])) + { + __current[__cnt] = std::move(__current[__i]); + ++__cnt; + } + } + return __current + __cnt; +} +} // namespace __unseq_backend +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_UNSEQ_BACKEND_SIMD_H */ diff --git a/Components/Include/pstl/internal/utils.h b/Components/Include/pstl/internal/utils.h new file mode 100644 index 0000000..ec9d467 --- /dev/null +++ b/Components/Include/pstl/internal/utils.h @@ -0,0 +1,177 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_UTILS_H +#define _PSTL_UTILS_H + +#include +#include + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __internal +{ + +template +auto +__except_handler(_Fp __f) -> decltype(__f()) +{ + try + { + return __f(); + } + catch (const std::bad_alloc&) + { + throw; // re-throw bad_alloc according to the standard [algorithms.parallel.exceptions] + } + catch (...) + { + std::terminate(); // Good bye according to the standard [algorithms.parallel.exceptions] + } +} + +template +void +__invoke_if(std::true_type, _Fp __f) +{ + __f(); +} + +template +void __invoke_if(std::false_type, _Fp) +{ +} + +template +void +__invoke_if_not(std::false_type, _Fp __f) +{ + __f(); +} + +template +void __invoke_if_not(std::true_type, _Fp) +{ +} + +template +auto +__invoke_if_else(std::true_type, _F1 __f1, _F2) -> decltype(__f1()) +{ + return __f1(); +} + +template +auto +__invoke_if_else(std::false_type, _F1, _F2 __f2) -> decltype(__f2()) +{ + return __f2(); +} + +//! Unary operator that returns reference to its argument. +struct __no_op +{ + template + _Tp&& + operator()(_Tp&& __a) const + { + return std::forward<_Tp>(__a); + } +}; + +template +class __reorder_pred +{ + _Pred _M_pred; + + public: + explicit __reorder_pred(_Pred __pred) : _M_pred(__pred) {} + + template + bool + operator()(_FTp&& __a, _STp&& __b) + { + return _M_pred(std::forward<_STp>(__b), std::forward<_FTp>(__a)); + } +}; + +//! Like a polymorphic lambda for pred(...,value) +template +class __equal_value_by_pred +{ + const _Tp& _M_value; + _Predicate _M_pred; + + public: + __equal_value_by_pred(const _Tp& __value, _Predicate __pred) : _M_value(__value), _M_pred(__pred) {} + + template + bool + operator()(_Arg&& __arg) + { + return _M_pred(std::forward<_Arg>(__arg), _M_value); + } +}; + +//! Like a polymorphic lambda for ==value +template +class __equal_value +{ + const _Tp& _M_value; + + public: + explicit __equal_value(const _Tp& __value) : _M_value(__value) {} + + template + bool + operator()(_Arg&& __arg) const + { + return std::forward<_Arg>(__arg) == _M_value; + } +}; + +//! Logical negation of ==value +template +class __not_equal_value +{ + const _Tp& _M_value; + + public: + explicit __not_equal_value(const _Tp& __value) : _M_value(__value) {} + + template + bool + operator()(_Arg&& __arg) const + { + return !(std::forward<_Arg>(__arg) == _M_value); + } +}; + +template +_ForwardIterator +__cmp_iterators_by_values(_ForwardIterator __a, _ForwardIterator __b, _Compare __comp) +{ + if (__a < __b) + { // we should return closer iterator + return __comp(*__b, *__a) ? __b : __a; + } + else + { + return __comp(*__a, *__b) ? __a : __b; + } +} + +} // namespace __internal +} // namespace __pstl + +_PSTL_HIDE_FROM_ABI_POP + +#endif /* _PSTL_UTILS_H */ diff --git a/Interfaces/VideoSubFinderCli/CMakeLists.txt b/Interfaces/VideoSubFinderCli/CMakeLists.txt new file mode 100644 index 0000000..f588ca6 --- /dev/null +++ b/Interfaces/VideoSubFinderCli/CMakeLists.txt @@ -0,0 +1,88 @@ +SET(VideoSubFinderCli_src + VideoSubFinderCli.h + VideoSubFinderCli.cpp + ) + +add_executable(VideoSubFinderCli ${VideoSubFinderCli_src}) + + +target_compile_features(VideoSubFinderCli PUBLIC cxx_std_17) + +target_include_directories(VideoSubFinderCli PUBLIC + ${wxWidgets_INCLUDE_DIRS} + ${OpenCV_INCLUDE_DIRS} + "../../Components/Include" + "../../Components/IPAlgorithms" + "../../Components/OCVVideo" + "../../Components/FFMPEGVideo" +) + +if (WIN32) + target_link_directories(VideoSubFinderCli PUBLIC + ${VideoSubFinderCli_LINK_DIRS} + ) + + target_link_libraries(VideoSubFinderCli PUBLIC + IPAlgorithms + OCVVideo + FFMPEGVideo + ${CUDAKernels_LIB} + wxmsw32u_aui.lib + wxmsw32u_media.lib + wxmsw32u_core.lib + wxmsw32u_adv.lib + wxbase32u.lib + wxtiff.lib + wxjpeg.lib + wxpng.lib + wxzlib.lib + wxregexu.lib + wxexpat.lib + winmm.lib + comctl32.lib + rpcrt4.lib + wsock32.lib + odbc32.lib + vfw32.lib + avdevice.lib + avformat.lib + avfilter.lib + avcodec.lib + swresample.lib + swscale.lib + avutil.lib + ${OpenCV_LIBS} + ) +else() + target_compile_definitions(VideoSubFinderCli PUBLIC + ${wxWidgets_DEFINITIONS} + $<$:${wxWidgets_DEFINITIONS_DEBUG}>) + target_compile_options(VideoSubFinderCli PRIVATE ${wxWidgets_CXX_FLAGS}) + + target_link_directories(VideoSubFinderCli PUBLIC + "${wxWidgets_LIBRARY_DIRS}" + ) + + target_link_libraries(VideoSubFinderCli PUBLIC + IPAlgorithms + OCVVideo + FFMPEGVideo + ${CUDAKernels_LIB} + ${wxWidgets_LIBRARIES} + ${OpenCV_LIBS} + avcodec + avformat + avutil + swscale + avfilter + tbb + ) + + +endif (WIN32) + +if (APPLE) + target_link_libraries(VideoSubFinderCli PUBLIC + wx_osx_cocoau_core-3.2 + ) +endif(APPLE) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2a9bc59 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# videosubfinder-cli + +## Install dependencies +- macOS +```bash + brew install wxwidgets@3.2 opencv@4 ffmpeg tbb +``` +- Ubuntu 20.04 + + see [Build/Docker/run_cuda.Dockerfile](Build/Docker/run_cuda.Dockerfile) + +## Usage +```bash +Usage: VideoSubFinderCli [-h] [--verbose] [-c] [-r] [-ces ] [-i ] [-ovocv] + [-ovffmpeg] [-uc] [-dsi] [-s ] [-e ] [-te ] [-be ] + [-le ] [-re ] [-o ] [-nthr ] [-h] +-h, --help show this help message +--verbose generate verbose log messages +-c, --clear_dirs Clear Folders (remove all images), performed before any other steps +-r, --run_search Run Search (find frames with hardcoded text (hardsub) on video) +-ces, --create_empty_sub= Create Empty Sub With Provided Output File Name (*.srt) +-i, --input_video= input video file +-ovocv, --open_video_opencv open video by OpenCV (default) +-ovffmpeg, --open_video_ffmpeg open video by FFMPEG +-uc, --use_cuda use cuda +-dsi, --dont_save_images Don't save images +-s, --start_time= start time, default = 0:00:00:000 (in format hour:min:sec:milisec) +-e, --end_time= end time, default = video length +-te, --top_video_image_percent_end= top video image percent offset from image bottom, can be in range [0.0,1.0], default = 1.0 +-be, --bottom_video_image_percent_end= bottom video image percent offset from image bottom, can be in range [0.0,1.0], default = 0.0 +-le, --left_video_image_percent_end= left video image percent end, can be in range [0.0,1.0], default = 0.0 +-re, --right_video_image_percent_end= right video image percent end, can be in range [0.0,1.0], default = 1.0 +-o, --output_dir= output dir (root directory where results will be stored) +-nthr, --num_threads= number of threads used for Run Search +-h, --help show this help message + +Example of usage: +./VideoSubFinderCli -c -r -i "./test_video.mp4" -o "./ResultsDir" -te 0.5 -be 0.1 -le 0.1 -re 0.9 -s 0:00:10:300 -e 0:00:13:100 +``` diff --git a/docker/build.Dockerfile b/docker/build.Dockerfile deleted file mode 100644 index 6364d57..0000000 --- a/docker/build.Dockerfile +++ /dev/null @@ -1,72 +0,0 @@ -FROM ubuntu:20.04 as builder -# Allow ubuntu to cache package downloads -RUN rm -f /etc/apt/apt.conf.d/docker-clean -RUN --mount=type=cache,target=/var/cache/apt \ - apt update \ - && DEBIAN_FRONTEND=noninteractive apt install -y git build-essential libgtk-3-dev ffmpeg libavcodec-dev libavformat-dev \ - libavutil-dev libswscale-dev libx264-dev cmake libavcodec-dev libavformat-dev \ - libavutil-dev libswscale-dev libavfilter-dev libtbb-dev wget -ENV http_proxy http://192.168.50.86:10801 -ENV https_proxy http://192.168.50.86:10801 -ENV all_proxy http://192.168.50.86:10801 -RUN mkdir -p /tmp/work \ - && cd /tmp/work \ - && git clone https://github.com/wxWidgets/wxWidgets.git \ - && cd wxWidgets/ \ - && git checkout v3.2.1 \ - && git submodule update --init --recursive \ - && mkdir buildgtk \ - && cd buildgtk/ \ - && ../configure --disable-gui \ - && make -j$(nproc) \ - && make install \ - && rm -rf /tmp/work/wxWidgets -RUN cd /tmp/work \ - && wget https://github.com/opencv/opencv/archive/4.7.0.tar.gz \ - && tar xvf 4.7.0.tar.gz \ - && cd opencv-4.7.0/ \ - && mkdir -p build \ - && cd build \ - && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GTK=OFF -DWITH_FFMPEG=ON -D CMAKE_BUILD_TYPE=RELEASE \ - -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_V4L=ON -D WITH_OPENGL=ON \ - -D WITH_CUBLAS=ON -DWITH_QT=OFF -DCUDA_NVCC_FLAGS="-D_FORCE_INLINES" .. \ - && cmake --build . --config Release -j $(nproc) \ - && make install \ - && rm -rf /tmp/work/opencv-4.7.0 \ - && rm -f /tmp/work/4.7.0.tar.gz -COPY . /tmp/work/videosubfinder-src - -RUN cd /tmp/work/videosubfinder-src \ - && rm -rf linux_build \ - && mkdir -p linux_build \ - && cd linux_build/ \ - && cmake -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=OFF .. \ - && cmake --build . --config Release -j $(nproc) \ - && cp ./Interfaces/VideoSubFinderCli/VideoSubFinderCli /tmp/work/ \ - && rm -rf /tmp/work/videosubfinder-src -RUN cp -L /usr/local/lib/libwx_baseu-3.2.so.0 \ - /usr/local/lib/libopencv_videoio.so.407 \ - /usr/local/lib/libopencv_core.so.407 \ - /usr/local/lib/libopencv_imgproc.so.407 \ - /usr/local/lib/libopencv_imgcodecs.so.407 \ - /tmp/work/ - - - - - - - - - - - - - - - - - - - - diff --git a/docker/build.sh b/docker/build.sh deleted file mode 100644 index 6c6340e..0000000 --- a/docker/build.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -e -cd ${0%/*} -docker build -t videosubfinder-build:cpu -f build.Dockerfile .. -mkdir -p build/cpu/ -docker run --rm -v $PWD/build/cpu/:$PWD/build/cpu/ videosubfinder-build:cpu \ - bash -c "cd /tmp/work/ && tar cvzf $PWD/build/cpu/VideoSubFinderCli.tar.gz *" \ No newline at end of file diff --git a/docker/build_cuda.Dockerfile b/docker/build_cuda.Dockerfile deleted file mode 100644 index 9eb6177..0000000 --- a/docker/build_cuda.Dockerfile +++ /dev/null @@ -1,71 +0,0 @@ -FROM nvidia/cuda:11.7.0-devel-ubuntu20.04 as builder -# Allow ubuntu to cache package downloads -RUN rm -f /etc/apt/apt.conf.d/docker-clean -ARG USE_GUI=0 -RUN --mount=type=cache,target=/var/cache/apt \ - apt update \ - && DEBIAN_FRONTEND=noninteractive apt install -y git cmake wget libtbb-dev \ - libavcodec-dev libgtk-3-dev libavformat-dev libswscale-dev libavfilter-dev \ - && if [[ "USE_GUI" = "1" ]] ; then DEBIAN_FRONTEND=noninteractive apt install -y \ - build-essential libgtk-3-dev ffmpeg libavutil-dev libx264-dev \ - ;fi -ENV http_proxy http://192.168.50.86:10801 -ENV https_proxy http://192.168.50.86:10801 -ENV all_proxy http://192.168.50.86:10801 -RUN mkdir -p /tmp/work \ - && cd /tmp/work \ - && git clone https://github.com/wxWidgets/wxWidgets.git --branch v3.2.1 --depth=1 --recurse-submodules -j8 \ - && cd wxWidgets/ \ - && mkdir buildgtk \ - && cd buildgtk/ \ - && ../configure --disable-gui \ - && make -j$(nproc) \ - && make install \ - && rm -rf /tmp/work/wxWidgets -RUN cd /tmp/work \ - && wget https://github.com/opencv/opencv/archive/4.7.0.tar.gz \ - && tar xvf 4.7.0.tar.gz \ - && cd opencv-4.7.0/ \ - && mkdir -p build \ - && cd build \ - && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GTK=OFF -DWITH_FFMPEG=ON -D CMAKE_BUILD_TYPE=RELEASE \ - -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_V4L=ON -D WITH_OPENGL=ON \ - -D WITH_CUBLAS=ON -DWITH_QT=OFF -DCUDA_NVCC_FLAGS="-D_FORCE_INLINES" .. \ - && cmake --build . --config Release -j $(nproc) \ - && make install \ - && rm -rf /tmp/work/opencv-4.7.0 \ - && rm -f /tmp/work/4.7.0.tar.gz -COPY . /tmp/work/videosubfinder-src -RUN CUDA_DIR="$(ls -d1 /usr/local/cuda-*|head -1)" \ - && ln -s $CUDA_DIR/targets/x86_64-linux/lib/libcudart.so /usr/lib/libcudart.so \ - && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_DIR/lib64:$CUDA_DIR/extras/CUPTI/lib64 export PATH=$PATH:$CUDA_DIR/bin \ - && cd /tmp/work/videosubfinder-src \ - && rm -rf linux_build \ - && mkdir -p linux_build \ - && cd linux_build/ \ - && cmake -DCMAKE_BUILD_TYPE=Release -DUSE_CUDA=ON .. \ - && cmake --build . --config Release -j $(nproc) \ - && if [[ "USE_GUI" = "1" ]] ; then cp ./Interfaces/VideoSubFinderWXW/VideoSubFinderWXW /tmp/work/; \ - else cp ./Interfaces/VideoSubFinderCli/VideoSubFinderCli /tmp/work/ ; fi \ - && rm -rf /tmp/work/videosubfinder-src -RUN cp -L /usr/local/lib/libwx_baseu-3.2.so.0 \ - /usr/local/lib/libopencv_videoio.so.407 \ - /usr/local/lib/libopencv_core.so.407 \ - /usr/local/lib/libopencv_imgproc.so.407 \ - /usr/local/lib/libopencv_imgcodecs.so.407 \ - /tmp/work/ - - - - - - - - - - - - - - -