-
Notifications
You must be signed in to change notification settings - Fork 12.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[memprof] Speed up llvm-profdata #117446
base: main
Are you sure you want to change the base?
[memprof] Speed up llvm-profdata #117446
Conversation
CallStackRadixTreeBuilder::build takes the parameter MemProfFrameIndexes by value, involving copies: std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>> MemProfFrameIndexes This patch changes the type to a pointer so that we don't have to make a copy every time we pass the argument.
@llvm/pr-subscribers-pgo Author: Kazu Hirata (kazutakahirata) ChangesCallStackRadixTreeBuilder::build takes the parameter std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>> This patch changes the type to a pointer so that we don't have to make Full diff: https://github.com/llvm/llvm-project/pull/117446.diff 5 Files Affected:
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index a56ad1e0dbbceb..33879e77fa84e9 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -1125,21 +1125,20 @@ template <typename FrameIdTy> class CallStackRadixTreeBuilder {
// Encode a call stack into RadixArray. Return the starting index within
// RadixArray.
- LinearCallStackId
- encodeCallStack(const llvm::SmallVector<FrameIdTy> *CallStack,
- const llvm::SmallVector<FrameIdTy> *Prev,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes);
+ LinearCallStackId encodeCallStack(
+ const llvm::SmallVector<FrameIdTy> *CallStack,
+ const llvm::SmallVector<FrameIdTy> *Prev,
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes);
public:
CallStackRadixTreeBuilder() = default;
// Build a radix tree array.
- void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
- &&MemProfCallStackData,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes,
- llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
+ void
+ build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
+ &&MemProfCallStackData,
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
+ llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8f22a50a5e0245..63f4e34074e06b 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4235,7 +4235,7 @@ static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
CallStackRadixTreeBuilder<LinearFrameId> Builder;
// We don't need a MemProfFrameIndexes map as we have already converted the
// full stack id hash to a linear offset into the StackIds array.
- Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
+ Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/nullptr,
FrameHistogram);
Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
RadixAbbrev);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 725ff9256fd4a0..4c4418ec5a5352 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -636,7 +636,7 @@ writeMemProfCallStackArray(
MemProfCallStackIndexes;
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
for (auto I : Builder.getRadixArray())
OS.write32(I);
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 20cc4eececc9b1..1c240c3858cc76 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -335,8 +335,7 @@ template <typename FrameIdTy>
LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
const llvm::SmallVector<FrameIdTy> *CallStack,
const llvm::SmallVector<FrameIdTy> *Prev,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes) {
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes) {
// Compute the length of the common root prefix between Prev and CallStack.
uint32_t CommonLen = 0;
if (Prev) {
@@ -381,8 +380,7 @@ template <typename FrameIdTy>
void CallStackRadixTreeBuilder<FrameIdTy>::build(
llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&&MemProfCallStackData,
- std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
- MemProfFrameIndexes,
+ const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
// Take the vector portion of MemProfCallStackData. The vector is exactly
// what we need to sort. Also, we no longer need its lookup capability.
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index c3f35e41b5fc74..7b9910e295df9d 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -628,7 +628,7 @@ TEST(MemProf, RadixTreeBuilderEmpty) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
const auto Mappings = Builder.takeCallStackPos();
@@ -646,7 +646,7 @@ TEST(MemProf, RadixTreeBuilderOne) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
3U, // Size of CS1,
@@ -673,7 +673,7 @@ TEST(MemProf, RadixTreeBuilderTwo) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
testing::ElementsAreArray({
@@ -711,7 +711,7 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
FrameHistogram =
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
testing::ElementsAreArray({
|
CallStackRadixTreeBuilder::build takes the parameter
MemProfFrameIndexes by value, involving copies:
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
MemProfFrameIndexes
Then "build" makes another copy of MemProfFrameIndexe and passes it to
encodeCallStack for every call stack, which is painfully slow.
This patch changes the type to a pointer so that we don't have to make
a copy every time we pass the argument.
Without this patch, it takes 553 seconds to run "llvm-profdata merge"
on a large MemProf raw profile. This patch shortenes that down to 67
seconds.