Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve CMake options #376

Merged
merged 5 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .azure-pipelines/integration-test-rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
targetType: 'inline'
script: |
mkdir build && cd build
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON ..
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down
2 changes: 1 addition & 1 deletion .azure-pipelines/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
targetType: 'inline'
script: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down
2 changes: 1 addition & 1 deletion .azure-pipelines/multi-nodes-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
targetType: 'inline'
script: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Build
run: |
rm -rf build && mkdir build && cd build
cmake -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
make -j

- name: Perform CodeQL Analysis
Expand Down Expand Up @@ -91,7 +91,7 @@ jobs:
- name: Build
run: |
rm -rf build && mkdir build && cd build
CXX=/opt/rocm/bin/hipcc cmake -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON ..
CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON ..
make -j

- name: Perform CodeQL Analysis
Expand Down
54 changes: 27 additions & 27 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ enable_language(CXX)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

# Options
option(ENABLE_TRACE "Enable tracing" OFF)
option(BUILD_TESTS "Build tests" ON)
option(BUILD_PYTHON_BINDINGS "Build Python bindings" ON)
option(BUILD_APPS_NCCL "Build NCCL interfaces" ON)
option(USE_CUDA "Use NVIDIA/CUDA." OFF)
option(USE_ROCM "Use AMD/ROCm." OFF)
option(BYPASS_GPU_CHECK "Bypass GPU check." OFF)

if(BYPASS_GPU_CHECK)
if(USE_CUDA)
option(MSCCLPP_ENABLE_TRACE "Enable tracing" OFF)
option(MSCCLPP_BUILD_TESTS "Build tests" ON)
option(MSCCLPP_BUILD_PYTHON_BINDINGS "Build Python bindings" ON)
option(MSCCLPP_BUILD_APPS_NCCL "Build NCCL interfaces" ON)
option(MSCCLPP_USE_CUDA "Use NVIDIA/CUDA." OFF)
option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF)
option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF)

if(MSCCLPP_BYPASS_GPU_CHECK)
if(MSCCLPP_USE_CUDA)
message("Bypassing GPU check: using NVIDIA/CUDA.")
find_package(CUDAToolkit REQUIRED)
elseif(USE_ROCM)
elseif(MSCCLPP_USE_ROCM)
message("Bypassing GPU check: using AMD/ROCm.")
# Temporal fix for rocm5.6
set(CMAKE_PREFIX_PATH "/opt/rocm;${CMAKE_PREFIX_PATH}")
Expand All @@ -40,16 +40,16 @@ else()
include(CheckAmdGpu)
if(NVIDIA_FOUND AND AMD_FOUND)
message("Detected NVIDIA/CUDA and AMD/ROCm: prioritizing NVIDIA/CUDA.")
set(USE_CUDA ON)
set(USE_ROCM OFF)
set(MSCCLPP_USE_CUDA ON)
set(MSCCLPP_USE_ROCM OFF)
elseif(NVIDIA_FOUND)
message("Detected NVIDIA/CUDA.")
set(USE_CUDA ON)
set(USE_ROCM OFF)
set(MSCCLPP_USE_CUDA ON)
set(MSCCLPP_USE_ROCM OFF)
elseif(AMD_FOUND)
message("Detected AMD/ROCm.")
set(USE_CUDA OFF)
set(USE_ROCM ON)
set(MSCCLPP_USE_CUDA OFF)
set(MSCCLPP_USE_ROCM ON)
else()
message(FATAL_ERROR "Neither NVIDIA/CUDA nor AMD/ROCm is found.")
endif()
Expand All @@ -58,7 +58,7 @@ endif()
# Declare project
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
if(USE_CUDA)
if(MSCCLPP_USE_CUDA)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wextra")
project(mscclpp LANGUAGES CXX CUDA)
Expand Down Expand Up @@ -115,13 +115,13 @@ if(IBVERBS_FOUND)
target_compile_definitions(mscclpp_obj PUBLIC USE_IBVERBS)
endif()
set_target_properties(mscclpp_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION})
if(USE_CUDA)
target_compile_definitions(mscclpp_obj PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_compile_definitions(mscclpp_obj PRIVATE USE_ROCM)
if(MSCCLPP_USE_CUDA)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_CUDA)
elseif(MSCCLPP_USE_ROCM)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_ROCM)
endif()
if(ENABLE_TRACE)
target_compile_definitions(mscclpp_obj PRIVATE ENABLE_TRACE)
if(MSCCLPP_ENABLE_TRACE)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_ENABLE_TRACE)
endif()
if(NPKIT_FLAGS)
target_compile_definitions(mscclpp_obj PRIVATE ${NPKIT_FLAGS})
Expand Down Expand Up @@ -150,17 +150,17 @@ install(TARGETS mscclpp_static
ARCHIVE DESTINATION ${INSTALL_PREFIX}/lib)

# Tests
if(BUILD_TESTS)
if(MSCCLPP_BUILD_TESTS)
enable_testing() # Called here to allow ctest from the build directory
add_subdirectory(test)
endif()

# Python bindings
if(BUILD_PYTHON_BINDINGS)
if(MSCCLPP_BUILD_PYTHON_BINDINGS)
add_subdirectory(python)
endif()

# NCCL interfaces
if(BUILD_APPS_NCCL)
if(MSCCLPP_BUILD_APPS_NCCL)
add_subdirectory(apps/nccl)
endif()
12 changes: 6 additions & 6 deletions apps/nccl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS src/*)
file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS include/nccl.h)

if(USE_ROCM)
if(MSCCLPP_USE_ROCM)
set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX)
endif()

Expand All @@ -14,10 +14,10 @@ target_sources(mscclpp_nccl_obj PUBLIC FILE_SET HEADERS FILES ${HEADERS})
target_include_directories(mscclpp_nccl_obj PRIVATE include ${PROJECT_SOURCE_DIR}/src/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
target_link_libraries(mscclpp_nccl_obj PRIVATE ${GPU_LIBRARIES} PUBLIC mscclpp_obj)
set_target_properties(mscclpp_nccl_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION})
if(USE_CUDA)
target_compile_definitions(mscclpp_nccl_obj PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_compile_definitions(mscclpp_nccl_obj PRIVATE USE_ROCM)
if(MSCCLPP_USE_CUDA)
target_compile_definitions(mscclpp_nccl_obj PRIVATE MSCCLPP_USE_CUDA)
elseif(MSCCLPP_USE_ROCM)
target_compile_definitions(mscclpp_nccl_obj PRIVATE MSCCLPP_USE_ROCM)
endif()

add_library(mscclpp_nccl SHARED)
Expand All @@ -34,6 +34,6 @@ install(TARGETS mscclpp_nccl
install(TARGETS mscclpp_nccl_static
ARCHIVE DESTINATION ${INSTALL_PREFIX}/lib)

if(BUILD_TESTS)
if(MSCCLPP_BUILD_TESTS)
add_subdirectory(test)
endif()
15 changes: 13 additions & 2 deletions docs/getting-started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
* AMD MI250X GPUs + ROCm >= 5.7
* AMD MI300X GPUs + ROCm >= 6.0
* OS: tested over Ubuntu 18.04 and 20.04
* Libraries: [libnuma](https://github.com/numactl/numactl), MPI (optional)
* Libraries
* [libnuma](https://github.com/numactl/numactl)
```bash
sudo apt-get install libnuma-dev
```
* (Optional, for [building the Python module](#install-from-source-python-module)) Python >= 3.8 and Python Development Package
```bash
sudo apt-get satisfy "python3 (>=3.8), python3-dev (>=3.8)"
```
If you don't want to build Python module, you need to set `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF` in your `cmake` command (see details in [Install from Source (Libraries and Headers)](#install-from-source-libraries-and-headers)).
* (Optional, for benchmarks) MPI
* Others
* For NVIDIA platforms, `nvidia_peermem` driver should be loaded on all nodes. Check it via:
```
Expand Down Expand Up @@ -60,11 +70,12 @@ $ CXX=/path/to/hipcc cmake -DCMAKE_BUILD_TYPE=Release ..
$ make -j
```

(install-from-source-libraries-and-headers)=
## Install from Source (Libraries and Headers)

```bash
# Install the generated headers and binaries to /usr/local/mscclpp
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/mscclpp -DBUILD_PYTHON_BINDINGS=OFF ..
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/mscclpp -DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF ..
$ make -j mscclpp mscclpp_static
$ sudo make install/fast
```
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ wheel.packages = ["python/mscclpp", "python/mscclpp_benchmark"]
wheel.install-dir = "mscclpp"

[tool.scikit-build.cmake.define]
BUILD_PYTHON_BINDINGS = "ON"
BUILD_TESTS = "OFF"
MSCCLPP_BUILD_PYTHON_BINDINGS = "ON"
MSCCLPP_BUILD_TESTS = "OFF"

[tool.black]
line-length = 120
Expand Down
8 changes: 4 additions & 4 deletions src/bootstrap/socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ static int envSocketFamily(void) {

static int findInterfaces(const char* prefixList, char* names, union SocketAddress* addrs, int sock_family,
int maxIfNameSize, int maxIfs) {
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
#endif
struct mscclpp::netIf userIfs[MAX_IFS];
Expand Down Expand Up @@ -184,7 +184,7 @@ static bool matchSubnet(struct ifaddrs local_if, union SocketAddress* remote) {

int FindInterfaceMatchSubnet(char* ifNames, union SocketAddress* localAddrs, union SocketAddress* remoteAddr,
int ifNameMaxSize, int maxIfs) {
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
#endif
char line_a[SOCKET_NAME_MAXLEN + 1];
Expand Down Expand Up @@ -436,7 +436,7 @@ void Socket::bind() {

void Socket::bindAndListen() {
bind();
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
TRACE(MSCCLPP_INIT | MSCCLPP_NET, "Listening on socket %s", SocketToString(&addr_, line));
#endif
Expand All @@ -452,7 +452,7 @@ void Socket::bindAndListen() {

void Socket::connect(int64_t timeout) {
mscclpp::Timer timer;
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
#endif
const int one = 1;
Expand Down
6 changes: 3 additions & 3 deletions src/include/atomic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
#ifndef MSCCLPP_ATOMIC_HPP_
#define MSCCLPP_ATOMIC_HPP_

#if defined(USE_CUDA)
#if defined(MSCCLPP_USE_CUDA)
#define MSCCLPP_DEVICE_CUDA
#include <mscclpp/atomic_device.hpp>
#undef MSCCLPP_DEVICE_CUDA
#else // !defined(USE_CUDA)
#else // !defined(MSCCLPP_USE_CUDA)
#define MSCCLPP_DEVICE_HIP
#include <mscclpp/atomic_device.hpp>
#undef MSCCLPP_DEVICE_HIP
#endif // !defined(USE_CUDA)
#endif // !defined(MSCCLPP_USE_CUDA)

#endif // MSCCLPP_ATOMIC_HPP_
2 changes: 1 addition & 1 deletion src/include/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ extern char mscclppLastError[];
#define INFO(FLAGS, ...) mscclppDebugLog(MSCCLPP_LOG_INFO, (FLAGS), __func__, __LINE__, __VA_ARGS__)
#define TRACE_CALL(...) mscclppDebugLog(MSCCLPP_LOG_TRACE, MSCCLPP_CALL, __func__, __LINE__, __VA_ARGS__)

#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
#define TRACE(FLAGS, ...) mscclppDebugLog(MSCCLPP_LOG_TRACE, (FLAGS), __func__, __LINE__, __VA_ARGS__)
extern std::chrono::steady_clock::time_point mscclppEpoch;
#else
Expand Down
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main)
set(TEST_INC_COMMON PRIVATE ${PROJECT_SOURCE_DIR}/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
set(TEST_INC_INTERNAL PRIVATE ${PROJECT_SOURCE_DIR}/src/include)

if(USE_ROCM)
if(MSCCLPP_USE_ROCM)
file(GLOB_RECURSE CU_SOURCES CONFIGURE_DEPENDS *.cu)
set_source_files_properties(${CU_SOURCES} PROPERTIES LANGUAGE CXX)
endif()
Expand Down
2 changes: 1 addition & 1 deletion test/mscclpp-test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download
FetchContent_MakeAvailable(json)

function(add_mscclpp_test_executable name sources)
if(USE_ROCM)
if(MSCCLPP_USE_ROCM)
set_source_files_properties(${sources} PROPERTIES LANGUAGE CXX)
endif()
add_executable(${name} ${sources} common.cc)
Expand Down
Loading