Skip to content

Commit

Permalink
Add support for host target setting of support of features
Browse files Browse the repository at this point in the history
Allow features to be added using env variable or cmake using standard
mechanism of "+v,+zfence" etc
  • Loading branch information
coldav authored and hvdijk committed Jul 19, 2024
1 parent 7a3b1a4 commit 05b3601
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 96 deletions.
28 changes: 19 additions & 9 deletions doc/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,23 +319,28 @@ The builtin CMake options used when invoking CMake on the command line.
`-DCMAKE_SKIP_RPATH=ON` when configuring CMake in build directory.

* `CA_HOST_TARGET_<arch>_CPU`: This option is used by the `host` target to
optimize for performance on a given CPU.`arch` should be a capitalized
optimize for performance on a given CPU. `arch` should be a capitalized
version of the `host` target architecture e.g. `X86_64`, `RISCV64` or
`AARCH64`. If set to "native" host will optimize for the CPU being used to
compile it. Otherwise a CPU name can be provided, for example "skylake", but
be warned that this string will be passed directly to the llvm backend so make
sure it's a valid CPU name. Information about your host CPU can be found by
running`llc --version`, and a list of host CPUs supported by your installed
version of LLVM can be found by running`llc --march=[your-arch] --mcpu=help`.
running `llc --version`, and a list of host CPUs supported by your installed
version of LLVM can be found by running `llc --march=[your-arch] --mcpu=help`.

Setting a specific `CA_HOST_TARGET_<arch>_CPU` ignores the default features
and enables the features supported by that CPU.

Be aware that if `host` is compiled with this option set, running it on a
different CPU from the one specified (or the one compiled with if "native" was
specified) isn't supported and bad things may happen. When the oneAPI
Construction Kit is built in debug mode, the environment variable
`CA_HOST_TARGET_CPU` is also respected across all `host` targets, which can
help track down codegen differences among different machine targets. The
caveats above apply, and this may result in an illegal instruction crash if
your CPU doesn't support the generated instructions.
specified) isn't supported and bad things may happen.

* `CA_HOST_TARGET_<arch>_FEATURES`: This option is used by the `host` target to
enable features on a given CPU. `arch` should be a capitalized version of the
`host` target architecture e.g. `X86_64`, `RISCV64` or `AARCH64`. This should
be of the form of a comma separated list of features with either a '+' or '-'
preceding each feature to enable or disable e.g. "+v,-zfencei". This is
compatible with `--mattr` in `LLVM` tools such as `llc` or `opt`.

* `CA_USE_SPLIT_DWARF`: When building with gcc, enable split dwarf debuginfo.
This significantly reduces binary size (especially when static linking) and
Expand Down Expand Up @@ -1115,6 +1120,11 @@ options without having to modify the source.
[below](#debugging-the-llvm-compiler) for example of how this can be used.
* `CA_HOST_NUM_THREADS`: Sets the maximum number of threads the `host` device
will create. `host` may create fewer threads than this value.
* `CA_HOST_TARGET_CPU`, `CA_HOST_TARGET_FEATURES`: These environment variables
can be used in debug builds to override the default CPU and features. They
behave the same way as the `CA_HOST_TARGET_<arch>_CPU` and
`CA_HOST_TARGET_<arch>_FEATURES` CMake options and the same caveats about
`"native"` apply here.

## Debugging the LLVM compiler

Expand Down
25 changes: 19 additions & 6 deletions modules/compiler/targets/host/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,13 @@ if(CA_HOST_CROSS_COMPILERS)
target_compile_definitions(compiler-host PRIVATE
CA_HOST_TARGET_${HOST_ARCH_UPPER}_CPU="${CA_HOST_TARGET_${HOST_ARCH_UPPER}_CPU}")
endif()

ca_option(CA_HOST_TARGET_${HOST_ARCH_UPPER}_FEATURES STRING
"Feature list that host ${HOST_ARCH_UPPER} should enable or disable as a comma separated + or - list e.g. '+v,+zfh'" "")
if(CA_HOST_TARGET_${HOST_ARCH_UPPER}_FEATURES)
message(STATUS "Features ${HOST_ARCH_UPPER} name ${CA_HOST_TARGET_${HOST_ARCH_UPPER}_FEATURES}")
target_compile_definitions(compiler-host PRIVATE
CA_HOST_TARGET_${HOST_ARCH_UPPER}_FEATURES="${CA_HOST_TARGET_${HOST_ARCH_UPPER}_FEATURES}")
endif()
if(hostCrossCompilers)
# Validate the user specified cross compiler list.
foreach(CrossCompiler ${hostCrossCompilers})
Expand Down Expand Up @@ -225,11 +231,18 @@ if(CA_HOST_CROSS_COMPILERS)
HOST_CROSS_DEVICE_NAME_${CROSS_COMPILER}="${crossDeviceName}")
ca_option(CA_HOST_TARGET_${CROSS_COMPILER}_CPU STRING
"Name of the CPU that host ${CROSS_COMPILER} should optimize for, or 'native'" "")
if(CA_HOST_TARGET_${CROSS_COMPILER}_CPU)
message(STATUS "CPU ${CROSS_COMPILER} name ${CA_HOST_TARGET_${CROSS_COMPILER}_CPU}")
target_compile_definitions(compiler-host PRIVATE
CA_HOST_TARGET_${CROSS_COMPILER}_CPU="${CA_HOST_TARGET_${CROSS_COMPILER}_CPU}")
endif()
if(CA_HOST_TARGET_${CROSS_COMPILER}_CPU)
message(STATUS "CPU ${CROSS_COMPILER} name ${CA_HOST_TARGET_${CROSS_COMPILER}_CPU}")
target_compile_definitions(compiler-host PRIVATE
CA_HOST_TARGET_${CROSS_COMPILER}_CPU="${CA_HOST_TARGET_${CROSS_COMPILER}_CPU}")
endif()
ca_option(CA_HOST_TARGET_${CROSS_COMPILER}_FEATURES STRING
"Feature list that host ${HOST_ARCH_UPPER} should enable or disable as a comma separated + or - list e.g. '+v,+zfh'" "")
if(CA_HOST_TARGET_${CROSS_COMPILER}_FEATURES)
message(STATUS "Features ${CROSS_COMPILER} name ${CA_HOST_TARGET_${CROSS_COMPILER}_FEATURES}")
target_compile_definitions(compiler-host PRIVATE
CA_HOST_TARGET_${CROSS_COMPILER}_FEATURES="${CA_HOST_TARGET_${CROSS_COMPILER}_FEATURES}")
endif()
endforeach()
endif()
endif()
Expand Down
231 changes: 150 additions & 81 deletions modules/compiler/targets/host/source/target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <llvm/Support/raw_ostream.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Target/TargetOptions.h>
#include <llvm/TargetParser/SubtargetFeature.h>
#include <multi_llvm/multi_llvm.h>

#if LLVM_VERSION_GREATER_EQUAL(18, 0)
Expand Down Expand Up @@ -234,95 +235,176 @@ compiler::Result HostTarget::initWithBuiltins(
break;
}

llvm::StringRef CPUName = "";
llvm::StringMap<bool> FeatureMap;

if (llvm::Triple::arm == triple.getArch()) {
FeatureMap["strict-align"] = true;
// We do not support denormals for single precision floating points, but we
// do for double precision. To support that we use neon (which is FTZ) for
// single precision floating points, and use the VFP with denormal support
// enabled for doubles. The neonfp feature enables the use of neon for
// single precision floating points.
FeatureMap["neonfp"] = true;
FeatureMap["neon"] = true;
// Hardware division instructions might not exist on all ARMv7 CPUs, but
// they probably exist on all the ones we might care about.
FeatureMap["hwdiv"] = true;
FeatureMap["hwdiv-arm"] = true;
if (host_device_info.half_capabilities) {
FeatureMap["fp16"] = true;
}
#if defined(CA_HOST_TARGET_ARM_CPU)
CPUName = CA_HOST_TARGET_ARM_CPU;
std::string CPU;
llvm::SubtargetFeatures Features;

switch (triple.getArch()) {
#ifdef HOST_LLVM_ARM
case llvm::Triple::arm:
// We do not support denormals for single precision floating points, but
// we do for double precision. To support that we use neon (which is FTZ)
// for single precision floating points, and use the VFP with denormal
// support enabled for doubles. The neonfp feature enables the use of neon
// for single precision floating points.
Features.AddFeature("strict-align", true);
Features.AddFeature("neonfp", true);
Features.AddFeature("neon", true);
// Hardware division instructions might not exist on all ARMv7 CPUs, but
// they probably exist on all the ones we might care about.
Features.AddFeature("hwdiv", true);
Features.AddFeature("hwdiv-arm", true);
if (host_device_info.half_capabilities) {
Features.AddFeature("fp16", true);
}
break;
#endif
#ifdef HOST_LLVM_RISCV
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
CPU = triple.getArch() == llvm::Triple::riscv32 ? "generic-rv32"
: "generic-rv64";
// The following features are important for OpenCL, and generally
// constitute a minimum requirement for non-embedded profile. Without
// these features, we'd need compiler-rt support. Atomics are absolutely
// essential.
Features.AddFeature("m", true); // Integer multiplication and division
Features.AddFeature("f", true); // Floating point support
Features.AddFeature("a", true); // Atomics
#if defined(CA_HOST_ENABLE_FP64)
Features.AddFeature("d", true); // Double support
#endif
} else if (llvm::Triple::aarch64 == triple.getArch()) {
#if defined(CA_HOST_TARGET_AARCH64_CPU)
CPUName = CA_HOST_TARGET_AARCH64_CPU;
#if defined(CA_HOST_ENABLE_FP16)
Features.AddFeature("zfh", true); // Half support
#endif
} else if (triple.isX86()) {
CPUName = "x86-64-v3"; // Default only, may be overridden below.
if (triple.isArch32Bit()) {
#if defined(CA_HOST_TARGET_X86_CPU)
CPUName = CA_HOST_TARGET_X86_CPU;
break;
#endif
} else {
#if defined(CA_HOST_TARGET_X86_64_CPU)
CPUName = CA_HOST_TARGET_X86_64_CPU;
#ifdef HOST_LLVM_X86
case llvm::Triple::x86:
case llvm::Triple::x86_64:
CPU = "x86-64-v3";
break;
#endif
default:
break;
}

auto SetCPUFeatures = [&](std::string NewCPU, std::string NewFeatures) {
if (!NewCPU.empty()) {
CPU = NewCPU;
Features = llvm::SubtargetFeatures();
}
if (!NewFeatures.empty()) {
std::vector<std::string> NewFeatureVector;
llvm::SubtargetFeatures::Split(NewFeatureVector, NewFeatures);
for (auto &NewFeature : NewFeatureVector) Features.AddFeature(NewFeature);
}
} else if (triple.isRISCV()) {
// These are reasonable defaults, which has been used for various RISC-V
// target so far. We should allow overriding of the ABI in the future
if (triple.isArch32Bit()) {
CPUName = "generic-rv32";
#if defined(CA_HOST_TARGET_RISCV32_CPU)
CPUName = CA_HOST_TARGET_RISCV32_CPU;
};

switch (triple.getArch()) {
#ifdef HOST_LLVM_ARM
case llvm::Triple::arm:
#ifndef CA_HOST_TARGET_ARM_CPU
#define CA_HOST_TARGET_ARM_CPU ""
#endif
} else {
CPUName = "generic-rv64";
#if defined(CA_HOST_TARGET_RISCV64_CPU)
CPUName = CA_HOST_TARGET_RISCV64_CPU;
#ifndef CA_HOST_TARGET_ARM_FEATURES
#define CA_HOST_TARGET_ARM_FEATURES ""
#endif
}
// The following features are important for OpenCL, and generally constitute
// a minimum requirement for non-embedded profile. Without these features,
// we'd need compiler-rt support. Atomics are absolutely essential.
// TODO: Allow overriding of the input features.
FeatureMap["m"] = true; // Integer multiplication and division
FeatureMap["f"] = true; // Floating point support
FeatureMap["a"] = true; // Atomics
#if defined(CA_HOST_ENABLE_FP64)
FeatureMap["d"] = true; // Double support
SetCPUFeatures(CA_HOST_TARGET_ARM_CPU, CA_HOST_TARGET_ARM_FEATURES);
break;
#endif
#if defined(CA_HOST_ENABLE_FP16)
FeatureMap["zfh"] = true; // Half support
#ifdef HOST_LLVM_AARCH64
case llvm::Triple::aarch64:
#ifndef CA_HOST_TARGET_AARCH64_CPU
#define CA_HOST_TARGET_AARCH64_CPU ""
#endif
#ifndef CA_HOST_TARGET_AARCH64_FEATURES
#define CA_HOST_TARGET_AARCH64_FEATURES ""
#endif
SetCPUFeatures(CA_HOST_TARGET_AARCH64_CPU,
CA_HOST_TARGET_AARCH64_FEATURES);
break;
#endif
#ifdef HOST_LLVM_RISCV
case llvm::Triple::riscv32:
#ifndef CA_HOST_TARGET_RISCV32_CPU
#define CA_HOST_TARGET_RISCV32_CPU ""
#endif
#ifndef CA_HOST_TARGET_RISCV32_FEATURES
#define CA_HOST_TARGET_RISCV32_FEATURES ""
#endif
SetCPUFeatures(CA_HOST_TARGET_RISCV32_CPU,
CA_HOST_TARGET_RISCV32_FEATURES);
break;
case llvm::Triple::riscv64:
#ifndef CA_HOST_TARGET_RISCV64_CPU
#define CA_HOST_TARGET_RISCV64_CPU ""
#endif
#ifndef CA_HOST_TARGET_RISCV64_FEATURES
#define CA_HOST_TARGET_RISCV64_FEATURES ""
#endif
SetCPUFeatures(CA_HOST_TARGET_RISCV64_CPU,
CA_HOST_TARGET_RISCV64_FEATURES);
break;
#endif
#ifdef HOST_LLVM_X86
case llvm::Triple::x86:
#ifndef CA_HOST_TARGET_X86_CPU
#define CA_HOST_TARGET_X86_CPU ""
#endif
#ifndef CA_HOST_TARGET_X86_FEATURES
#define CA_HOST_TARGET_X86_FEATURES ""
#endif
SetCPUFeatures(CA_HOST_TARGET_X86_CPU, CA_HOST_TARGET_X86_FEATURES);
break;
case llvm::Triple::x86_64:
#ifndef CA_HOST_TARGET_X86_64_CPU
#define CA_HOST_TARGET_X86_64_CPU ""
#endif
#ifndef CA_HOST_TARGET_X86_64_FEATURES
#define CA_HOST_TARGET_X86_64_FEATURES ""
#endif
SetCPUFeatures(CA_HOST_TARGET_X86_64_CPU, CA_HOST_TARGET_X86_64_FEATURES);
break;
#endif
default:
break;
}

#ifndef NDEBUG
if (const char *E = getenv("CA_HOST_TARGET_CPU")) {
CPUName = E;
#if !defined(NDEBUG) || defined(CA_ENABLE_DEBUG_SUPPORT)
{
auto GetEnv = [](const char *Name) -> std::string {
auto *Value = std::getenv(Name);
return Value ? Value : "";
};
SetCPUFeatures(GetEnv("CA_HOST_TARGET_CPU"),
GetEnv("CA_HOST_TARGET_FEATURES"));
;
}
#endif

if (CPUName == "native") {
CPUName = llvm::sys::getHostCPUName();
if (CPU == "native") {
CPU = llvm::sys::getHostCPUName();

llvm::SubtargetFeatures NativeFeatures;

#if LLVM_VERSION_GREATER_EQUAL(19, 0)
FeatureMap = llvm::sys::getHostCPUFeatures();
auto FeatureMap = llvm::sys::getHostCPUFeatures();
#else
FeatureMap.clear();
llvm::StringMap<bool> FeatureMap;
llvm::sys::getHostCPUFeatures(FeatureMap);
#endif
for (auto &[FeatureName, IsEnabled] : FeatureMap)
NativeFeatures.AddFeature(FeatureName, IsEnabled);

NativeFeatures.addFeaturesVector(Features.getFeatures());
Features = std::move(NativeFeatures);
}

if (compiler_info->supports_deferred_compilation) {
llvm::orc::JITTargetMachineBuilder TMBuilder(triple);
TMBuilder.setCPU(CPUName.str());
TMBuilder.setCPU(CPU);
TMBuilder.setCodeGenOptLevel(multi_llvm::CodeGenOptLevel::Aggressive);
for (auto &Feature : FeatureMap) {
TMBuilder.getFeatures().AddFeature(Feature.first(), Feature.second);
}
TMBuilder.getFeatures().addFeaturesVector(Features.getFeatures());
auto Builder = llvm::orc::LLJITBuilder();

Builder.setJITTargetMachineBuilder(TMBuilder);
Expand Down Expand Up @@ -373,21 +455,8 @@ compiler::Result HostTarget::initWithBuiltins(
target_machine = std::move(*TM);
} else {
// No JIT support so create target machine directly.
std::string Features;
bool first = true;
for (auto &[FeatureName, IsEnabled] : FeatureMap) {
if (first) {
first = false;
} else {
Features += ",";
}
if (IsEnabled) {
Features += '+' + FeatureName.str();
} else {
Features += '-' + FeatureName.str();
}
}
target_machine.reset(createTargetMachine(triple, CPUName, Features));
target_machine.reset(
createTargetMachine(triple, CPU, Features.getString()));
}

return compiler::Result::SUCCESS;
Expand Down

0 comments on commit 05b3601

Please sign in to comment.