From ac01dd197f9fafb0301b0432da90698cd7e9517a Mon Sep 17 00:00:00 2001 From: Ben Howe Date: Fri, 11 Oct 2024 16:52:57 +0000 Subject: [PATCH 01/18] DCO Remediation Commit for Ben Howe I, Ben Howe , hereby add my Signed-off-by to this commit: 86681ef67d3b76c0e468f6595e2c2524cf9b4b6c Signed-off-by: Ben Howe Signed-off-by: Anna Gringauze From 21a87c1646f168a6465c3e51dc4fc510c1de9c43 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 17 Sep 2024 14:40:45 -0700 Subject: [PATCH 02/18] State pointer synthesis for quantum hardware Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Builder/Intrinsics.h | 4 + include/cudaq/Optimizer/Transforms/Passes.td | 38 ++++ lib/Optimizer/Builder/Intrinsics.cpp | 4 + lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp | 3 +- lib/Optimizer/Transforms/CMakeLists.txt | 1 + lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 11 +- .../Transforms/StateInitialization.cpp | 146 +++++++++++++++ python/runtime/cudaq/algorithms/py_state.cpp | 5 +- .../cudaq/platform/py_alt_launch_kernel.cpp | 2 +- runtime/common/ArgumentConversion.cpp | 167 ++++++++++++++++-- runtime/common/ArgumentConversion.h | 22 ++- runtime/common/BaseRemoteRESTQPU.h | 33 ++-- runtime/common/BaseRestRemoteClient.h | 4 +- runtime/common/CMakeLists.txt | 2 +- runtime/common/SimulationState.h | 11 ++ runtime/cudaq/CMakeLists.txt | 1 + runtime/cudaq/algorithms/get_state.h | 12 ++ .../rest/helpers/quantinuum/quantinuum.yml | 2 + runtime/cudaq/qis/quantum_state.cpp | 113 ++++++++++++ runtime/cudaq/qis/quantum_state.h | 151 ++++++++++++++++ runtime/cudaq/qis/remote_state.cpp | 2 +- runtime/cudaq/qis/remote_state.h | 3 +- .../Remote-Sim/qvector_init_from_state.cpp | 16 ++ .../execution/qvector_init_from_state.cpp | 147 +++++++++++++++ targettests/execution/state_init.cpp | 2 +- test/Quake/arg_subst-5.txt | 15 ++ test/Quake/arg_subst-6.txt | 11 ++ test/Quake/arg_subst_func.qke | 37 +++- test/Quake/state_init.qke | 37 ++++ test/Quake/state_prep.qke | 2 +- tpls/Stim | 2 +- 31 files changed, 955 insertions(+), 51 deletions(-) create mode 100644 lib/Optimizer/Transforms/StateInitialization.cpp create mode 100644 runtime/cudaq/qis/quantum_state.cpp create mode 100644 runtime/cudaq/qis/quantum_state.h create mode 100644 targettests/execution/qvector_init_from_state.cpp create mode 100644 test/Quake/arg_subst-5.txt create mode 100644 test/Quake/arg_subst-6.txt create mode 100644 test/Quake/state_init.qke diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h index 30ab0e696a..c05021b879 100644 --- a/include/cudaq/Optimizer/Builder/Intrinsics.h +++ b/include/cudaq/Optimizer/Builder/Intrinsics.h @@ -55,6 +55,10 @@ static constexpr const char createCudaqStateFromDataFP32[] = // Delete a state created by the runtime functions above. static constexpr const char deleteCudaqState[] = "__nvqpp_cudaq_state_delete"; +// Get state of a kernel (placeholder function, calls are always replaced in +// opts) +static constexpr const char getCudaqState[] = "__nvqpp_cudaq_state_get"; + /// Builder for lowering the clang AST to an IR for CUDA-Q. Lowering includes /// the transformation of both quantum and classical computation. Different /// features of the CUDA-Q programming model are lowered into different dialects diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 9ca3810f39..66eb4cfcb0 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -779,6 +779,44 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { }]; } +def StateInitialization : Pass<"state-initialization", "mlir::ModuleOp"> { + let summary = + "Replace `quake.init_state` instructions with call to the kernel generating the state"; + let description = [{ + Argument synthesis for state pointers for quantum devices substitutes state + argument by a new state created from `__nvqpp_cudaq_state_get` intrinsic, which + in turn accepts the name for the synthesized kernel that generated the state. + + This optimization completes the replacement of `quake.init_state` instruction by: + + - Replace `quake.init_state` by a call that `get_state` call refers to. + - Remove all unneeded instructions. + + For example: + + Before StateInitialization (state-initialization): + ``` + func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.string_literal "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr + %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 + %4 = quake.alloca !quake.veq[%3 : i64] + %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq + return + } + ``` + + After StateInitialization (state-initialization): + ``` + func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %5 = call @__nvqpp__mlirgen__test_init_state.modified_0() : () -> !quake.veq + return + } + ``` + }]; +} + def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { let summary = "Convert state vector data into gates"; diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index 12e430dc03..57c636e31d 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -261,6 +261,10 @@ static constexpr IntrinsicCode intrinsicTable[] = { {cudaq::deleteCudaqState, {}, R"#( func.func private @__nvqpp_cudaq_state_delete(%p : !cc.ptr) -> () + )#"}, + + {cudaq::getCudaqState, {}, R"#( + func.func private @__nvqpp_cudaq_state_get(%p : !cc.ptr) -> !cc.ptr )#"}, {cudaq::getNumQubitsFromCudaqState, {}, R"#( diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp index 4de20fd7be..04eac5b06f 100644 --- a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp +++ b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp @@ -49,7 +49,8 @@ struct VerifyNVQIRCallOpsPass cudaq::getNumQubitsFromCudaqState, cudaq::createCudaqStateFromDataFP32, cudaq::createCudaqStateFromDataFP64, - cudaq::deleteCudaqState}; + cudaq::deleteCudaqState, + cudaq::getCudaqState}; // It must be either NVQIR extension functions or in the allowed list. return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) != NVQIR_FUNCS.end() || diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index a6b94d9a59..f107d78bde 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -50,6 +50,7 @@ add_cudaq_library(OptTransforms QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp + StateInitialization.cpp StatePreparation.cpp UnitarySynthesis.cpp WiresToWiresets.cpp diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index 9328b78896..8cf6a019f8 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -170,9 +170,10 @@ class AllocaPattern : public OpRewritePattern { if (auto load = dyn_cast(useuser)) { rewriter.setInsertionPointAfter(useuser); LLVM_DEBUG(llvm::dbgs() << "replaced load\n"); - rewriter.replaceOpWithNewOp( - load, eleTy, conArr, - ArrayRef{offset}); + auto extract = rewriter.create( + loc, eleTy, conArr, ArrayRef{offset}); + rewriter.replaceAllUsesWith(load, extract); + toErase.push_back(load); continue; } if (isa(useuser)) @@ -199,8 +200,10 @@ class AllocaPattern : public OpRewritePattern { toErase.push_back(alloc); } - for (auto *op : toErase) + for (auto *op : toErase) { + op->dropAllUses(); rewriter.eraseOp(op); + } return success(); } diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp new file mode 100644 index 0000000000..3a122f02a7 --- /dev/null +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -0,0 +1,146 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" +#include + +namespace cudaq::opt { +#define GEN_PASS_DEF_STATEINITIALIZATION +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + +#define DEBUG_TYPE "state-initialization" + +using namespace mlir; + +namespace { + +static bool isCall(Operation *callOp, std::vector &&names) { + if (callOp) { + if (auto createStateCall = dyn_cast(callOp)) { + if (auto calleeAttr = createStateCall.getCalleeAttr()) { + auto funcName = calleeAttr.getValue().str(); + if (std::find(names.begin(), names.end(), funcName) != names.end()) + return true; + } + } + } + return false; +} + +static bool isGetStateCall(Operation *callOp) { + return isCall(callOp, {cudaq::getCudaqState}); +} + +static bool isNumberOfQubitsCall(Operation *callOp) { + return isCall(callOp, {cudaq::getNumQubitsFromCudaqState}); +} + +// clang-format off +/// Replace `quake.init_state` by a call to a (modified) kernel that produced the state. +/// ``` +/// %0 = cc.string_literal "callee.modified_0" : !cc.ptr> +/// %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr +/// %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr +/// %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 +/// %4 = quake.alloca !quake.veq[%3 : i64] +/// %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq +/// ─────────────────────────────────────────── +/// ... +/// %5 = call @callee.modified_0() : () -> !quake.veq +/// ``` +// clang-format on +class StateInitPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(quake::InitializeStateOp initState, + PatternRewriter &rewriter) const override { + auto loc = initState.getLoc(); + auto allocaOp = initState.getOperand(0).getDefiningOp(); + auto getStateOp = initState.getOperand(1).getDefiningOp(); + auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); + + if (isGetStateCall(getStateOp)) { + auto calleeNameOp = getStateOp->getOperand(0); + if (auto cast = + dyn_cast(calleeNameOp.getDefiningOp())) { + calleeNameOp = cast.getOperand(); + + if (auto literal = dyn_cast( + calleeNameOp.getDefiningOp())) { + auto calleeName = literal.getStringLiteral(); + + Value result = + rewriter + .create(loc, initState.getType(), calleeName, + mlir::ValueRange{}) + .getResult(0); + rewriter.replaceAllUsesWith(initState, result); + initState.erase(); + allocaOp->dropAllUses(); + rewriter.eraseOp(allocaOp); + if (isNumberOfQubitsCall(numOfQubits)) { + numOfQubits->dropAllUses(); + rewriter.eraseOp(numOfQubits); + } + getStateOp->dropAllUses(); + rewriter.eraseOp(getStateOp); + cast->dropAllUses(); + rewriter.eraseOp(cast); + literal->dropAllUses(); + rewriter.eraseOp(literal); + return success(); + } + } + } + return failure(); + } +}; + +class StateInitializationPass + : public cudaq::opt::impl::StateInitializationBase< + StateInitializationPass> { +public: + using StateInitializationBase::StateInitializationBase; + + void runOnOperation() override { + auto *ctx = &getContext(); + auto module = getOperation(); + for (Operation &op : *module.getBody()) { + auto func = dyn_cast(op); + if (!func) + continue; + + std::string funcName = func.getName().str(); + RewritePatternSet patterns(ctx); + patterns.insert(ctx); + + LLVM_DEBUG(llvm::dbgs() + << "Before state initialization: " << func << '\n'); + + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); + + LLVM_DEBUG(llvm::dbgs() + << "After state initialization: " << func << '\n'); + } + } +}; +} // namespace diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp index 77a8e4a36d..74e098ebbf 100644 --- a/python/runtime/cudaq/algorithms/py_state.cpp +++ b/python/runtime/cudaq/algorithms/py_state.cpp @@ -96,8 +96,9 @@ class PyRemoteSimulationState : public RemoteSimulationState { } } - std::pair> getKernelInfo() const override { - return {kernelName, argsData->getArgs()}; + std::optional>> + getKernelInfo() const override { + return std::make_pair(kernelName, argsData->getArgs()); } std::complex overlap(const cudaq::SimulationState &other) override { diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index b91627de9f..a7531f9caa 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -517,7 +517,7 @@ MlirModule synthesizeKernel(const std::string &name, MlirModule module, auto isLocalSimulator = platform.is_simulator() && !platform.is_emulated(); auto isSimulator = isLocalSimulator || isRemoteSimulator; - cudaq::opt::ArgumentConverter argCon(name, unwrap(module), isSimulator); + cudaq::opt::ArgumentConverter argCon(name, unwrap(module)); argCon.gen(runtimeArgs.getArgs()); std::string kernName = cudaq::runtime::cudaqGenPrefixName + name; SmallVector kernels = {kernName}; diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 424cbd8873..83e4dd3725 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -10,6 +10,8 @@ #include "cudaq.h" #include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeTypes.h" #include "cudaq/Todo.h" #include "cudaq/qis/pauli_word.h" #include "cudaq/utils/registry.h" @@ -97,11 +99,25 @@ static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp substMod, llvm::DataLayout &); static Value genConstant(OpBuilder &builder, const cudaq::state *v, - ModuleOp substMod, llvm::DataLayout &layout, - llvm::StringRef kernelName, bool isSimulator) { - if (isSimulator) { - // The program is executed remotely, materialize the simulation data - // into an array and create a new state from it. + llvm::DataLayout &layout, + cudaq::opt::ArgumentConverter &converter) { + auto simState = + cudaq::state_helper::getSimulationState(const_cast(v)); + + auto kernelName = converter.getKernelName(); + auto sourceMod = converter.getSourceModule(); + auto substMod = converter.getSubstitutionModule(); + + // If the state has amplitude data, we materialize the data as a state + // vector and create a new state from it. + // TODO: how to handle density matrices? Should we just inline calls? + if (simState->hasData()) { + // The call below might cause lazy execution of the state kernel. + // TODO: For lazy execution scenario on remote simulators, we have the + // kernel info available on the state as well, before we needed to run + // the state kernel and compute its data, which might cause significant + // data transfer). Investigate if it is more performant to use the other + // synthesis option in that case (see the next `if`). auto numQubits = v->get_num_qubits(); // We currently only synthesize small states. @@ -130,11 +146,11 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, std::string name = kernelName.str() + ".rodata_synth_" + std::to_string(counter++); irBuilder.genVectorOfConstants(loc, substMod, name, vec); - auto conGlobal = builder.create(loc, ptrTy, name); - return builder.create(loc, arrTy, conGlobal); + + return builder.create(loc, ptrTy, name); }; - auto conArr = is64Bit ? genConArray.template operator()() + auto buffer = is64Bit ? genConArray.template operator()() : genConArray.template operator()(); auto createState = is64Bit ? cudaq::createCudaqStateFromDataFP64 @@ -146,21 +162,111 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, auto stateTy = cudaq::cc::StateType::get(ctx); auto statePtrTy = cudaq::cc::PointerType::get(stateTy); auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - auto buffer = builder.create(loc, arrTy); - builder.create(loc, conArr, buffer); auto cast = builder.create(loc, i8PtrTy, buffer); auto statePtr = builder .create(loc, statePtrTy, createState, ValueRange{cast, arrSize}) .getResult(0); + return builder.create(loc, statePtrTy, statePtr); + } + + // For quantum hardware, replace states with calls to kernels that generated + // them. + if (simState->getKernelInfo().has_value()) { + auto [calleeName, calleeArgs] = simState->getKernelInfo().value(); + + std::string calleeKernelName = + cudaq::runtime::cudaqGenPrefixName + calleeName; + + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); - // TODO: Delete the new state before function exit. + auto code = cudaq::get_quake_by_name(calleeName, /*throwException=*/false); + assert(!code.empty() && "Quake code not found for callee"); + auto fromModule = parseSourceString(code, ctx); + + static unsigned counter = 0; + std::string modifiedCalleeName = + calleeName + ".modified_" + std::to_string(counter++); + std::string modifiedCalleeKernelName = + cudaq::runtime::cudaqGenPrefixName + modifiedCalleeName; + + // Create callee.modified that returns concat of veq allocations. + auto calleeFunc = fromModule->lookupSymbol(calleeKernelName); + assert(calleeFunc && "callee is missing"); + auto argTypes = calleeFunc.getArgumentTypes(); + auto retType = quake::VeqType::getUnsized(ctx); + auto funcTy = FunctionType::get(ctx, argTypes, {retType}); + + { + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToEnd(sourceMod.getBody()); + + auto modifiedCalleeFunc = cast(builder.clone(*calleeFunc)); + modifiedCalleeFunc.setName(modifiedCalleeKernelName); + modifiedCalleeFunc.setType(funcTy); + modifiedCalleeFunc.setPrivate(); + + OpBuilder modifiedBuilder(ctx); + SmallVector allocations; + SmallVector cleanUps; + for (auto &op : modifiedCalleeFunc.getOps()) { + if (auto alloc = dyn_cast(op)) { + allocations.push_back(alloc.getResult()); + // Replace by the result of quake.init_state if used by it + for (auto *user : op.getUsers()) { + if (auto init = dyn_cast(*user)) { + allocations.pop_back(); + allocations.push_back(init.getResult()); + } + } + } + if (auto retOp = dyn_cast(op)) { + if (retOp.getOperands().size() == 0) { + modifiedBuilder.setInsertionPointAfter(retOp); + assert(allocations.size() > 0 && "No veq allocations found"); + Value ret = modifiedBuilder.create( + loc, quake::VeqType::getUnsized(ctx), allocations); + modifiedBuilder.create(loc, ret); + cleanUps.push_back(retOp); + } + } + } + for (auto *op : cleanUps) { + op->dropAllUses(); + op->erase(); + } + } + + // Create substitutions for the `callee.modified.N`. + converter.genCallee(modifiedCalleeName, calleeArgs); + + // Create a subst for state pointer. + auto strLitTy = cudaq::cc::PointerType::get( + cudaq::cc::ArrayType::get(builder.getContext(), builder.getI8Type(), + modifiedCalleeKernelName.size() + 1)); + auto callee = builder.create( + loc, strLitTy, builder.getStringAttr(modifiedCalleeKernelName)); + + auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); + auto calleeCast = builder.create(loc, i8PtrTy, callee); + + cudaq::IRBuilder irBuilder(ctx); + auto result = irBuilder.loadIntrinsic(substMod, cudaq::getCudaqState); + assert(succeeded(result) && "loading intrinsic should never fail"); + + auto statePtrTy = + cudaq::cc::PointerType::get(cudaq::cc::StateType::get(ctx)); + auto statePtr = + builder + .create(loc, statePtrTy, cudaq::getCudaqState, + ValueRange{calleeCast}) + .getResult(0); return builder.create(loc, statePtrTy, statePtr); } - // The program is executed on quantum hardware, state data is not - // available and needs to be regenerated. - TODO("cudaq::state* argument synthesis for quantum hardware"); + + TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); return {}; } @@ -326,7 +432,7 @@ cudaq::opt::ArgumentConverter::ArgumentConverter(StringRef kernelName, ModuleOp sourceModule, bool isSimulator) : sourceModule(sourceModule), builder(sourceModule.getContext()), - kernelName(kernelName), isSimulator(isSimulator) { + kernelName(kernelName) { substModule = builder.create(builder.getUnknownLoc()); } @@ -335,7 +441,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { // We should look up the input type signature here. auto fun = sourceModule.lookupSymbol( - cudaq::runtime::cudaqGenPrefixName + kernelName.str()); + cudaq::runtime::cudaqGenPrefixName + kernelName); FunctionType fromFuncTy = fun.getFunctionType(); for (auto iter : llvm::enumerate(llvm::zip(fromFuncTy.getInputs(), arguments))) { @@ -403,8 +509,7 @@ void cudaq::opt::ArgumentConverter::gen(const std::vector &arguments) { .Case([&](cc::PointerType ptrTy) -> cc::ArgumentSubstitutionOp { if (ptrTy.getElementType() == cc::StateType::get(ctx)) return buildSubst(static_cast(argPtr), - substModule, dataLayout, kernelName, - isSimulator); + dataLayout, *this); return {}; }) .Case([&](cc::StdvecType ty) { @@ -457,3 +562,29 @@ void cudaq::opt::ArgumentConverter::gen_drop_front( } gen(partialArgs); } + +std::pair, std::vector> +cudaq::opt::ArgumentConverter::collectAllSubstitutions() { + std::vector kernels; + std::vector substs; + + std::function collect = + [&kernels, &substs, &collect](ArgumentConverter &con) { + auto name = con.getKernelName(); + std::string kernName = cudaq::runtime::cudaqGenPrefixName + name.str(); + kernels.push_back(kernName); + + { + std::string substBuff; + llvm::raw_string_ostream ss(substBuff); + ss << con.getSubstitutionModule(); + substs.push_back(substBuff); + } + + for (auto &calleeCon : con.getCalleeConverters()) + collect(calleeCon); + }; + + collect(*this); + return {kernels, substs}; +} diff --git a/runtime/common/ArgumentConversion.h b/runtime/common/ArgumentConversion.h index 45e6607b0c..be438fe66c 100644 --- a/runtime/common/ArgumentConversion.h +++ b/runtime/common/ArgumentConversion.h @@ -14,6 +14,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/Types.h" #include +#include namespace cudaq::opt { @@ -47,13 +48,30 @@ class ArgumentConverter { /// created. mlir::ModuleOp getSubstitutionModule() { return substModule; } + mlir::ModuleOp getSourceModule() { return sourceModule; } + + mlir::StringRef getKernelName() { return kernelName; } + + void genCallee(std::string &calleeName, std::vector &args) { + auto converter = ArgumentConverter(calleeName, sourceModule); + converter.gen(args); + calleeConverters.push_back(converter); + } + + std::vector &getCalleeConverters() { + return calleeConverters; + } + + std::pair, std::vector> + collectAllSubstitutions(); + private: mlir::ModuleOp sourceModule; mlir::ModuleOp substModule; mlir::OpBuilder builder; - mlir::StringRef kernelName; + std::string kernelName; mlir::SmallVector substitutions; - bool isSimulator; + std::vector calleeConverters; }; } // namespace cudaq::opt diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 61c26dc791..41f45b6b75 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -393,15 +393,18 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (!func->hasAttr(cudaq::entryPointAttrName)) func->setAttr(cudaq::entryPointAttrName, builder.getUnitAttr()); auto moduleOp = builder.create(); - moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); for (auto &op : m_module.getOps()) { - // Add any global symbols, including global constant arrays. - // Global constant arrays can be created during compilation, - // `lift-array-value`, `quake-synthesizer`, and `get-concrete-matrix` - // passes. - if (auto globalOp = dyn_cast(op)) + if (auto funcOp = dyn_cast(op)) { + // Add quantum kernels defined in the module. + if (funcOp->hasAttr(cudaq::kernelAttrName) || + funcOp.getName().startswith("__nvqpp__mlirgen__") || + funcOp.getBody().empty()) + moduleOp.push_back(funcOp.clone()); + } + // Add globals defined in the module. + if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); } @@ -428,16 +431,18 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); - opt::ArgumentConverter argCon(kernelName, moduleOp, false); + opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); - std::string kernName = cudaq::runtime::cudaqGenPrefixName + kernelName; - mlir::SmallVector kernels = {kernName}; - std::string substBuff; - llvm::raw_string_ostream ss(substBuff); - ss << argCon.getSubstitutionModule(); - mlir::SmallVector substs = {substBuff}; + auto [kernels, substs] = argCon.collectAllSubstitutions(); pm.addNestedPass( - opt::createArgumentSynthesisPass(kernels, substs)); + cudaq::opt::createArgumentSynthesisPass( + mlir::SmallVector{kernels.begin(), + kernels.end()}, + mlir::SmallVector{substs.begin(), + substs.end()})); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(opt::createDeleteStates()); + pm.addPass(opt::createStateInitialization()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index b938815d92..5384d71008 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -329,8 +329,8 @@ class BaseRemoteRestRuntimeClient : public cudaq::RemoteRuntimeClient { if (!castedState1 || !castedState2) throw std::runtime_error( "Invalid execution context: input states are not compatible"); - auto [kernelName1, args1] = castedState1->getKernelInfo(); - auto [kernelName2, args2] = castedState2->getKernelInfo(); + auto [kernelName1, args1] = castedState1->getKernelInfo().value(); + auto [kernelName2, args2] = castedState2->getKernelInfo().value(); cudaq::IRPayLoad stateIrPayload1, stateIrPayload2; stateIrPayload1.entryPoint = kernelName1; diff --git a/runtime/common/CMakeLists.txt b/runtime/common/CMakeLists.txt index bb8a5ecaba..e1a38c4e25 100644 --- a/runtime/common/CMakeLists.txt +++ b/runtime/common/CMakeLists.txt @@ -102,7 +102,7 @@ set_source_files_properties( JIT.cpp Logger.cpp RuntimeMLIR.cpp - PROPERTIES COMPILE_FLAGS -fno-rtti +# PROPERTIES COMPILE_FLAGS -fno-rtti ) target_include_directories(cudaq-mlir-runtime diff --git a/runtime/common/SimulationState.h b/runtime/common/SimulationState.h index 3ec97f2568..694770fa48 100644 --- a/runtime/common/SimulationState.h +++ b/runtime/common/SimulationState.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,16 @@ class SimulationState { return createFromSizeAndPtr(size, ptr, data.index()); } + /// @brief True if the state has amplitudes or density matrix + // is available or can be computed. + virtual bool hasData() const { return true; } + + /// @brief Helper to retrieve (kernel name, `args` pointers) + virtual std::optional>> + getKernelInfo() const { + return std::nullopt; + } + /// @brief Return the tensor at the given index. Throws /// for an invalid tensor index. virtual Tensor getTensor(std::size_t tensorIdx = 0) const = 0; diff --git a/runtime/cudaq/CMakeLists.txt b/runtime/cudaq/CMakeLists.txt index 9c08eef354..2efb8824e7 100644 --- a/runtime/cudaq/CMakeLists.txt +++ b/runtime/cudaq/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(${LIBRARY_NAME} platform/quantum_platform.cpp qis/execution_manager_c_api.cpp qis/execution_manager.cpp + qis/quantum_state.cpp qis/remote_state.cpp qis/state.cpp utils/cudaq_utils.cpp diff --git a/runtime/cudaq/algorithms/get_state.h b/runtime/cudaq/algorithms/get_state.h index bbb64ebcbf..a57fa0194e 100644 --- a/runtime/cudaq/algorithms/get_state.h +++ b/runtime/cudaq/algorithms/get_state.h @@ -14,6 +14,7 @@ #include "cudaq/host_config.h" #include "cudaq/platform.h" #include "cudaq/platform/QuantumExecutionQueue.h" +#include "cudaq/qis/quantum_state.h" #include "cudaq/qis/remote_state.h" #include "cudaq/qis/state.h" #include @@ -118,6 +119,17 @@ auto get_state(QuantumKernel &&kernel, Args &&...args) { return state(new RemoteSimulationState(std::forward(kernel), std::forward(args)...)); } +#endif +#if defined(CUDAQ_QUANTUM_DEVICE) + // Store kernel name and arguments for quantum states. + if (!cudaq::get_quake_by_name(cudaq::getKernelName(kernel), false).empty()) { + return state(new QuantumState(std::forward(kernel), + std::forward(args)...)); + } else { + throw std::runtime_error( + "cudaq::state* argument synthesis is not supported for quantum hardware" + "for c-like functions, use class kernels instead"); + } #endif return details::extractState([&]() mutable { cudaq::invokeKernel(std::forward(kernel), diff --git a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml index 21cc45be1e..0a291a240d 100644 --- a/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml +++ b/runtime/cudaq/platform/default/rest/helpers/quantinuum/quantinuum.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/qis/quantum_state.cpp b/runtime/cudaq/qis/quantum_state.cpp new file mode 100644 index 0000000000..faaae5b510 --- /dev/null +++ b/runtime/cudaq/qis/quantum_state.cpp @@ -0,0 +1,113 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "quantum_state.h" +#include "common/Logger.h" + +namespace cudaq { + +QuantumState::~QuantumState() { + if (!platformExecutionLog.empty()) { + // Flush any info log from the remote execution + printf("%s\n", platformExecutionLog.c_str()); + platformExecutionLog.clear(); + } + + for (std::size_t counter = 0; auto &ptr : args) + deleters[counter++](ptr); + + args.clear(); + deleters.clear(); +} + +std::size_t QuantumState::getNumQubits() const { + throw std::runtime_error( + "getNumQubits is not implemented for quantum hardware"); +} + +cudaq::SimulationState::Tensor +QuantumState::getTensor(std::size_t tensorIdx) const { + throw std::runtime_error("getTensor is not implemented for quantum hardware"); +} + +/// @brief Return all tensors that represent this state +std::vector QuantumState::getTensors() const { + throw std::runtime_error( + "getTensors is not implemented for quantum hardware"); + return {getTensor()}; +} + +/// @brief Return the number of tensors that represent this state. +std::size_t QuantumState::getNumTensors() const { + throw std::runtime_error( + "getNumTensors is not implemented for quantum hardware"); +} + +std::complex +QuantumState::operator()(std::size_t tensorIdx, + const std::vector &indices) { + throw std::runtime_error( + "operator() is not implemented for quantum hardware"); +} + +std::unique_ptr +QuantumState::createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) { + throw std::runtime_error( + "createFromSizeAndPtr is not implemented for quantum hardware"); +} + +void QuantumState::dump(std::ostream &os) const { + throw std::runtime_error("dump is not implemented for quantum hardware"); +} + +cudaq::SimulationState::precision QuantumState::getPrecision() const { + throw std::runtime_error( + "getPrecision is not implemented for quantum hardware"); +} + +void QuantumState::destroyState() { + // There is no state data so nothing to destroy. +} + +bool QuantumState::isDeviceData() const { + throw std::runtime_error( + "isDeviceData is not implemented for quantum hardware"); +} + +void QuantumState::toHost(std::complex *clientAllocatedData, + std::size_t numElements) const { + throw std::runtime_error("toHost is not implemented for quantum hardware"); +} + +void QuantumState::toHost(std::complex *clientAllocatedData, + std::size_t numElements) const { + throw std::runtime_error("toHost is not implemented for quantum hardware"); +} + +std::optional>> +QuantumState::getKernelInfo() const { + return std::make_pair(kernelName, args); +} + +std::vector> +QuantumState::getAmplitudes(const std::vector> &basisStates) { + throw std::runtime_error( + "getAmplitudes is not implemented for quantum hardware"); +} + +std::complex +QuantumState::getAmplitude(const std::vector &basisState) { + throw std::runtime_error( + "getAmplitudes is not implemented for quantum hardware"); +} + +std::complex +QuantumState::overlap(const cudaq::SimulationState &other) { + throw std::runtime_error("overlap is not implemented for quantum hardware"); +} +} // namespace cudaq diff --git a/runtime/cudaq/qis/quantum_state.h b/runtime/cudaq/qis/quantum_state.h new file mode 100644 index 0000000000..63117eb462 --- /dev/null +++ b/runtime/cudaq/qis/quantum_state.h @@ -0,0 +1,151 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#pragma once + +#include "common/SimulationState.h" +#include "cudaq.h" +#include "cudaq/utils/cudaq_utils.h" + +namespace cudaq { +/// Implementation of `SimulationState` for quantum device backends. +// The state is represented by a quantum kernel. +// Quantum state contains all the information we need to replicate a +// call to kernel that created the state. +class QuantumState : public cudaq::SimulationState { +protected: + std::string kernelName; + // Lazily-evaluated state data (just keeping the kernel name and arguments). + // e.g., to be evaluated at amplitude accessor APIs (const APIs, hence needs + // to be mutable) or overlap calculation with another remote state (combining + // the IR of both states for remote evaluation) + mutable std::unique_ptr state; + // Cache log messages from the remote execution. + // Mutable to support lazy execution during `const` API calls. + mutable std::string platformExecutionLog; + using ArgDeleter = std::function; + /// @brief Vector of arguments + // Note: we create a copy of all arguments except pointers. + std::vector args; + /// @brief Deletion functions for the arguments. + std::vector> deleters; + +public: + template + void addArgument(const T &arg) { + if constexpr (std::is_pointer_v>) { + if constexpr (std::is_copy_constructible_v< + std::remove_pointer_t>>) { + auto ptr = new std::remove_pointer_t>(*arg); + args.push_back(ptr); + deleters.push_back([](void *ptr) { + delete static_cast> *>(ptr); + }); + } else { + throw std::invalid_argument( + "Unsupported argument type: only pointers to copy-constructible " + "types and copy-constructible types are supported."); + } + } else if constexpr (std::is_copy_constructible_v>) { + auto *ptr = new std::decay_t(arg); + args.push_back(ptr); + deleters.push_back( + [](void *ptr) { delete static_cast *>(ptr); }); + } else { + throw std::invalid_argument( + "Unsupported argument type: only pointers to copy-constructible " + "types and copy-constructible types are supported."); + } + } + + /// @brief Constructor + template + QuantumState(QuantumKernel &&kernel, Args &&...args) { + if constexpr (has_name::value) { + // kernel_builder kernel: need to JIT code to get it registered. + static_cast(kernel).jitCode(); + kernelName = kernel.name(); + } else { + kernelName = cudaq::getKernelName(kernel); + } + (addArgument(args), ...); + } + QuantumState() = default; + virtual ~QuantumState(); + + /// @brief True if the state has amplitudes or density matrix available. + virtual bool hasData() const override { return false; } + + /// @brief Helper to retrieve (kernel name, `args` pointers) + virtual std::optional>> + getKernelInfo() const override; + + /// @brief Return the number of qubits this state represents. + std::size_t getNumQubits() const override; + + /// @brief Compute the overlap of this state representation with + /// the provided `other` state, e.g. ``. + std::complex overlap(const cudaq::SimulationState &other) override; + + /// @brief Return the amplitude of the given computational + /// basis state. + std::complex + getAmplitude(const std::vector &basisState) override; + + /// @brief Return the amplitudes of the given list of computational + /// basis states. + std::vector> + getAmplitudes(const std::vector> &basisState) override; + + /// @brief Return the tensor at the given index. Throws + /// for an invalid tensor index. + Tensor getTensor(std::size_t tensorIdx = 0) const override; + + /// @brief Return all tensors that represent this state + std::vector getTensors() const override; + + /// @brief Return the number of tensors that represent this state. + std::size_t getNumTensors() const override; + + /// @brief Return the element from the tensor at the + /// given tensor index and at the given indices. + std::complex + operator()(std::size_t tensorIdx, + const std::vector &indices) override; + + /// @brief Create a new subclass specific SimulationState + /// from the user provided data set. + std::unique_ptr + createFromSizeAndPtr(std::size_t size, void *ptr, std::size_t) override; + + /// @brief Dump a representation of the state to the + /// given output stream. + void dump(std::ostream &os) const override; + + /// @brief Return the floating point precision used by the simulation state. + precision getPrecision() const override; + + /// @brief Destroy the state representation, frees all associated memory. + void destroyState() override; + + /// @brief Return true if this `SimulationState` wraps data on the GPU. + bool isDeviceData() const override; + + /// @brief Transfer data from device to host, return the data + /// to the pointer provided by the client. Clients must specify the number of + /// elements. + void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override; + + /// @brief Transfer data from device to host, return the data + /// to the pointer provided by the client. Clients must specify the number of + /// elements. + void toHost(std::complex *clientAllocatedData, + std::size_t numElements) const override; +}; +} // namespace cudaq diff --git a/runtime/cudaq/qis/remote_state.cpp b/runtime/cudaq/qis/remote_state.cpp index 713a462e46..84c9bf9410 100644 --- a/runtime/cudaq/qis/remote_state.cpp +++ b/runtime/cudaq/qis/remote_state.cpp @@ -128,7 +128,7 @@ void RemoteSimulationState::toHost(std::complex *clientAllocatedData, } } -std::pair> +std::optional>> RemoteSimulationState::getKernelInfo() const { return std::make_pair(kernelName, args); } diff --git a/runtime/cudaq/qis/remote_state.h b/runtime/cudaq/qis/remote_state.h index 878bb098dd..ba7929dea4 100644 --- a/runtime/cudaq/qis/remote_state.h +++ b/runtime/cudaq/qis/remote_state.h @@ -83,7 +83,8 @@ class RemoteSimulationState : public cudaq::SimulationState { virtual void execute() const; /// @brief Helper to retrieve (kernel name, `args` pointers) - virtual std::pair> getKernelInfo() const; + virtual std::optional>> + getKernelInfo() const override; /// @brief Return the number of qubits this state represents. std::size_t getNumQubits() const override; diff --git a/targettests/Remote-Sim/qvector_init_from_state.cpp b/targettests/Remote-Sim/qvector_init_from_state.cpp index 5899c2f598..1f94b47f06 100644 --- a/targettests/Remote-Sim/qvector_init_from_state.cpp +++ b/targettests/Remote-Sim/qvector_init_from_state.cpp @@ -148,6 +148,22 @@ int main() { // CHECK: 10 // clang-format on + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state); + auto counts = cudaq::sample(test_state_param2, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off +// CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + // clang-format on + { std::cout << "Passing state from another kernel as argument iteratively " "with vector args (kernel mode)" diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp new file mode 100644 index 0000000000..afaba5a2c0 --- /dev/null +++ b/targettests/execution/qvector_init_from_state.cpp @@ -0,0 +1,147 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// clang-format off +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target quantinuum --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// clang-format on + +#include +#include +#include +#include + +struct test_init_state { + void operator()(int n) __qpu__ { + cudaq::qvector q(n); + ry(M_PI/2.0, q[0]); + } +}; + +struct test_state_param { + void operator()(cudaq::state *state) __qpu__ { + cudaq::qvector q(state); + x(q); + } +}; + +struct test_state_param2 { + void operator()(cudaq::state *state, cudaq::pauli_word w) __qpu__ { + cudaq::qvector q(state); + cudaq::exp_pauli(1.0, q, w); + } +}; + +void printCounts(cudaq::sample_result &result) { + std::vector values{}; + for (auto &&[bits, counts] : result) { + values.push_back(bits); + } + + std::sort(values.begin(), values.end()); + for (auto &&bits : values) { + std::cout << bits << std::endl; + } +} + +int main() { + std::vector vec{M_SQRT1_2, M_SQRT1_2, 0., 0., 0., 0., 0., 0.}; + std::vector vec1{0., 0., 0., 0., + 0., 0., M_SQRT1_2, M_SQRT1_2}; + auto state = cudaq::state::from_data(vec); + auto state1 = cudaq::state::from_data(vec1); + { + std::cout << "Passing state created from data as argument (kernel mode)" + << std::endl; + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); + + counts = cudaq::sample(test_state_param{}, &state1); + printCounts(counts); + } + + // clang-format off +// CHECK: Passing state created from data as argument (kernel mode) +// CHECK: 011 +// CHECK: 111 + +// CHECK: 000 +// CHECK: 100 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto counts = cudaq::sample(test_state_param{}, &state); + printCounts(counts); + } + // clang-format off +// CHECK: Passing state from another kernel as argument (kernel mode) +// CHECK: 01 +// CHECK: 11 + // clang-format on + + { + std::cout + << "Passing large state from another kernel as argument (kernel mode)" + << std::endl; + auto largeState = cudaq::get_state(test_init_state{}, 14); + auto counts = cudaq::sample(test_state_param{}, &largeState); + printCounts(counts); + } + // clang-format off +// CHECK: Passing large state from another kernel as argument (kernel mode) +// CHECK: 01111111111111 +// CHECK: 11111111111111 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument" + " with pauli word arg (kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + auto counts = cudaq::sample(test_state_param2{}, &state, cudaq::pauli_word{"XX"}); + printCounts(counts); + } + // clang-format off +// CHECK: Passing state from another kernel as argument with pauli word arg (kernel mode) +// CHECK: 00 +// CHECK: 01 +// CHECK: 10 +// CHECK: 11 + // clang-format on + + { + std::cout << "Passing state from another kernel as argument iteratively " + "(kernel mode)" + << std::endl; + auto state = cudaq::get_state(test_init_state{}, 2); + for (auto i = 0; i < 4; i++) { + auto counts = cudaq::sample(test_state_param{}, &state); + std::cout << "Iteration: " << i << std::endl; + printCounts(counts); + state = cudaq::get_state(test_state_param{}, &state); + } + } + // clang-format off +// CHECK: Passing state from another kernel as argument iteratively (kernel mode) +// CHECK: Iteration: 0 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 1 +// CHECK: 00 +// CHECK: 10 +// CHECK: Iteration: 2 +// CHECK: 01 +// CHECK: 11 +// CHECK: Iteration: 3 +// CHECK: 00 +// CHECK: 10 + // clang-format on +} diff --git a/targettests/execution/state_init.cpp b/targettests/execution/state_init.cpp index 31e946147d..e9b8456513 100644 --- a/targettests/execution/state_init.cpp +++ b/targettests/execution/state_init.cpp @@ -40,4 +40,4 @@ int main() { } // CHECK: 00 -// CHECK: 10 +// CHECK: 10 \ No newline at end of file diff --git a/test/Quake/arg_subst-5.txt b/test/Quake/arg_subst-5.txt new file mode 100644 index 0000000000..c5e727bb79 --- /dev/null +++ b/test/Quake/arg_subst-5.txt @@ -0,0 +1,15 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +cc.arg_subst[0] { + %0 = cc.string_literal "init" : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = func.call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr + %3 = cc.cast %2 : (!cc.ptr) -> !cc.ptr +} +func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr diff --git a/test/Quake/arg_subst-6.txt b/test/Quake/arg_subst-6.txt new file mode 100644 index 0000000000..4c3a55d883 --- /dev/null +++ b/test/Quake/arg_subst-6.txt @@ -0,0 +1,11 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +cc.arg_subst[0] { + %c2_i32 = arith.constant 2 : i32 +} diff --git a/test/Quake/arg_subst_func.qke b/test/Quake/arg_subst_func.qke index e96e04b63a..4bf6e10155 100644 --- a/test/Quake/arg_subst_func.qke +++ b/test/Quake/arg_subst_func.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt --canonicalize %s | FileCheck %s +// RUN: cudaq-opt --argument-synthesis=functions=foo:%S/arg_subst.txt,blink:%S/arg_subst.txt,testy1:%S/arg_subst-1.txt,testy2:%S/arg_subst-2.txt,testy3:%S/arg_subst-3.txt,testy4:%S/arg_subst-4.txt,testy5:%S/arg_subst-5.txt,init:%S/arg_subst-6.txt --canonicalize %s | FileCheck %s func.func private @bar(i32) func.func private @baz(f32) @@ -146,3 +146,38 @@ func.func @testy4(%arg0: !cc.stdvec>) { // CHECK: call @callee4(%[[VAL_32]]) : (!cc.stdvec>) -> () // CHECK: return // CHECK: } + +func.func @testy5(%arg0: !cc.ptr) { + %3 = call @__nvqpp_cudaq_state_numberOfQubits(%arg0) : (!cc.ptr) -> i64 + %4 = quake.alloca !quake.veq[%3 : i64] + %5 = quake.init_state %4, %arg0 : (!quake.veq, !cc.ptr) -> !quake.veq + return +} + +func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 +func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr + +func.func private @init(%arg0: i32) -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst = arith.constant 1.5707963267948966 : f64 + %0 = cc.cast signed %arg0 : (i32) -> i64 + %1 = quake.alloca !quake.veq[%0 : i64] + %2 = quake.concat %1 : (!quake.veq) -> !quake.veq + return %2 : !quake.veq +} + +// CHECK-LABEL: func.func @testy5() { +// CHECK: %[[VAL_0:.*]] = cc.string_literal "init" : !cc.ptr> +// CHECK: %[[VAL_1:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr>) -> !cc.ptr +// CHECK: %[[VAL_2:.*]] = call @__nvqpp_cudaq_state_get(%[[VAL_1]]) : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = call @__nvqpp_cudaq_state_numberOfQubits(%[[VAL_2]]) : (!cc.ptr) -> i64 +// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.veq[%[[VAL_3]] : i64] +// CHECK: %[[VAL_5:.*]] = quake.init_state %[[VAL_4]], %[[VAL_2]] : (!quake.veq, !cc.ptr) -> !quake.veq +// CHECK: return +// CHECK: } +// CHECK: func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 +// CHECK: func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr +// CHECK: func.func private @init() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { +// CHECK: %[[VAL_7:.*]] = quake.alloca !quake.veq<2> +// CHECK: %[[VAL_8:.*]] = quake.relax_size %[[VAL_7:.*]] : (!quake.veq<2>) -> !quake.veq +// CHECK: return %[[VAL_8]] : !quake.veq +// CHECK: } diff --git a/test/Quake/state_init.qke b/test/Quake/state_init.qke new file mode 100644 index 0000000000..9f43a965a4 --- /dev/null +++ b/test/Quake/state_init.qke @@ -0,0 +1,37 @@ +// ========================================================================== // +// Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. // +// All rights reserved. // +// // +// This source code and the accompanying materials are made available under // +// the terms of the Apache License 2.0 which accompanies this distribution. // +// ========================================================================== // + +// RUN: cudaq-opt -state-initialization -canonicalize %s | FileCheck %s + +module { + func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + %0 = cc.string_literal "callee.modified_0" : !cc.ptr> + %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr + %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr + %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 + %4 = quake.alloca !quake.veq[%3 : i64] + %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq + return + } + + func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 + func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr + + func.func private @callee.modified_0() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { + %cst = arith.constant 1.5707963267948966 : f64 + %0 = quake.alloca !quake.veq<2> + %1 = quake.extract_ref %0[0] : (!quake.veq<2>) -> !quake.ref + quake.ry (%cst) %1 : (f64, !quake.ref) -> () + %2 = quake.relax_size %0 : (!quake.veq<2>) -> !quake.veq + return %2 : !quake.veq + } +// CHECK-LABEL: func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = call @callee.modified_0() : () -> !quake.veq +// CHECK: return +// CHECK: } +} diff --git a/test/Quake/state_prep.qke b/test/Quake/state_prep.qke index 4289571b33..3072a19218 100644 --- a/test/Quake/state_prep.qke +++ b/test/Quake/state_prep.qke @@ -31,7 +31,7 @@ module { // CHECK: return // CHECK: } - func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { + func.func @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %0 = cc.address_of @__nvqpp__mlirgen__function_test_real_constant_array._Z24test_real_constant_arrayv.rodata_0 : !cc.ptr> %1 = quake.alloca !quake.veq<2> %2 = quake.init_state %1, %0 : (!quake.veq<2>, !cc.ptr>) -> !quake.veq<2> diff --git a/tpls/Stim b/tpls/Stim index 47190f4a3a..b01e423915 160000 --- a/tpls/Stim +++ b/tpls/Stim @@ -1 +1 @@ -Subproject commit 47190f4a3afb104c9f0068d0be9fea87d2894a70 +Subproject commit b01e42391583d03db4266b387d907eda1d7ae488 From 3fc56de6f0c911888fc8f3ae6356b8613653f0f9 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 14:25:47 -0700 Subject: [PATCH 03/18] Merge with main Signed-off-by: Anna Gringauze --- python/tests/interop/quantum_lib/CMakeLists.txt | 1 + runtime/common/BaseRemoteRESTQPU.h | 7 +++---- targettests/execution/state_init.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/tests/interop/quantum_lib/CMakeLists.txt b/python/tests/interop/quantum_lib/CMakeLists.txt index 34fb024188..21bb37a4d7 100644 --- a/python/tests/interop/quantum_lib/CMakeLists.txt +++ b/python/tests/interop/quantum_lib/CMakeLists.txt @@ -11,3 +11,4 @@ set(CMAKE_CXX_COMPILE_OBJECT " -fPIC --enable-mlir --disable # FIXME Error with SHARED, it pulls in all the mlir libraries anyway add_library(quantum_lib OBJECT quantum_lib.cpp) +add_dependencies(quantum_lib nvq++ cudaq-opt cudaq-quake cudaq-translate) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 41f45b6b75..32a097cfc5 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -393,14 +393,13 @@ class BaseRemoteRESTQPU : public cudaq::QPU { if (!func->hasAttr(cudaq::entryPointAttrName)) func->setAttr(cudaq::entryPointAttrName, builder.getUnitAttr()); auto moduleOp = builder.create(); + moduleOp.push_back(func.clone()); moduleOp->setAttrs(m_module->getAttrDictionary()); for (auto &op : m_module.getOps()) { if (auto funcOp = dyn_cast(op)) { - // Add quantum kernels defined in the module. - if (funcOp->hasAttr(cudaq::kernelAttrName) || - funcOp.getName().startswith("__nvqpp__mlirgen__") || - funcOp.getBody().empty()) + // Add function definitions for runtime functions. + if (funcOp.getBody().empty()) moduleOp.push_back(funcOp.clone()); } // Add globals defined in the module. diff --git a/targettests/execution/state_init.cpp b/targettests/execution/state_init.cpp index e9b8456513..31e946147d 100644 --- a/targettests/execution/state_init.cpp +++ b/targettests/execution/state_init.cpp @@ -40,4 +40,4 @@ int main() { } // CHECK: 00 -// CHECK: 10 \ No newline at end of file +// CHECK: 10 From 7969a755986157cdb04625a8680516432d00e352 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 14:37:56 -0700 Subject: [PATCH 04/18] Merge with main Signed-off-by: Anna Gringauze --- tpls/Stim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpls/Stim b/tpls/Stim index b01e423915..47190f4a3a 160000 --- a/tpls/Stim +++ b/tpls/Stim @@ -1 +1 @@ -Subproject commit b01e42391583d03db4266b387d907eda1d7ae488 +Subproject commit 47190f4a3afb104c9f0068d0be9fea87d2894a70 From 755d0d1971bc489093ab2e541db759352f4506eb Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 15:24:55 -0700 Subject: [PATCH 05/18] Fix test failure on anyon platform Signed-off-by: Anna Gringauze --- runtime/common/BaseRemoteRESTQPU.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 32a097cfc5..989649d9fa 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -18,6 +18,7 @@ #include "common/RuntimeMLIR.h" #include "cudaq.h" #include "cudaq/Frontend/nvqpp/AttributeNames.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/CodeGen/OpenQASMEmitter.h" #include "cudaq/Optimizer/CodeGen/Passes.h" @@ -398,8 +399,13 @@ class BaseRemoteRESTQPU : public cudaq::QPU { for (auto &op : m_module.getOps()) { if (auto funcOp = dyn_cast(op)) { - // Add function definitions for runtime functions. - if (funcOp.getBody().empty()) + // Add function definitions for runtime functions that must + // be removed after synthesis in cleanup ops. + if (funcOp.getBody().empty() && + (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || + funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || + funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || + funcOp.getName().equals(cudaq::getCudaqState))) moduleOp.push_back(funcOp.clone()); } // Add globals defined in the module. From 382bc99adda74bcae5cab1965096dac12d6e2b37 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Thu, 17 Oct 2024 15:40:34 -0700 Subject: [PATCH 06/18] Make StateInitialization a funcOp pass Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 2 +- .../Transforms/StateInitialization.cpp | 25 ++++++------------- runtime/common/BaseRemoteRESTQPU.h | 2 +- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 66eb4cfcb0..70ae6c7138 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -779,7 +779,7 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { }]; } -def StateInitialization : Pass<"state-initialization", "mlir::ModuleOp"> { +def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { let summary = "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp index 3a122f02a7..f641eb04f6 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -121,26 +121,17 @@ class StateInitializationPass void runOnOperation() override { auto *ctx = &getContext(); - auto module = getOperation(); - for (Operation &op : *module.getBody()) { - auto func = dyn_cast(op); - if (!func) - continue; + auto func = getOperation(); + RewritePatternSet patterns(ctx); + patterns.insert(ctx); - std::string funcName = func.getName().str(); - RewritePatternSet patterns(ctx); - patterns.insert(ctx); + LLVM_DEBUG(llvm::dbgs() << "Before state initialization: " << func << '\n'); - LLVM_DEBUG(llvm::dbgs() - << "Before state initialization: " << func << '\n'); + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) - signalPassFailure(); - - LLVM_DEBUG(llvm::dbgs() - << "After state initialization: " << func << '\n'); - } + LLVM_DEBUG(llvm::dbgs() << "After state initialization: " << func << '\n'); } }; } // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 989649d9fa..a37d5bf706 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -447,7 +447,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { substs.end()})); pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); - pm.addPass(opt::createStateInitialization()); + pm.addNestedPass(opt::createStateInitialization()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); From d3a05d4432d41acaae68fea86eeac6f3e34d4cc7 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 18 Oct 2024 11:09:12 -0700 Subject: [PATCH 07/18] Fix issues and tests for the rest of quantum architectures Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 11 ++ lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/StateInitialization.cpp | 16 +-- lib/Optimizer/Transforms/StateValidation.cpp | 130 ++++++++++++++++++ runtime/common/BaseRemoteRESTQPU.h | 2 + .../default/rest/helpers/anyon/anyon.yml | 2 + .../default/rest/helpers/ionq/ionq.yml | 2 + .../platform/default/rest/helpers/iqm/iqm.yml | 2 + .../platform/default/rest/helpers/oqc/oqc.yml | 2 + .../execution/qvector_init_from_state.cpp | 17 ++- 10 files changed, 174 insertions(+), 11 deletions(-) create mode 100644 lib/Optimizer/Transforms/StateValidation.cpp diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index 70ae6c7138..aa8f038c41 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -866,6 +866,17 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { ]; } +def StateValidation : Pass<"state-validation", "mlir::ModuleOp"> { + let summary = + "Make sure MLIR is valid after synthesis for quantum devices"; + let description = [{ + Argument synthesis should replace all `quake.init` from state instructions + and calls to state-related runtime functions. + Make sure none of them left, and remove definitions for state-related + runtime functions. + }]; +} + def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { let summary = "Promote single qubit allocations."; let description = [{ diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index f107d78bde..7eae39e35f 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -52,6 +52,7 @@ add_cudaq_library(OptTransforms RegToMem.cpp StateInitialization.cpp StatePreparation.cpp + StateValidation.cpp UnitarySynthesis.cpp WiresToWiresets.cpp diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp index f641eb04f6..c46273b747 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -30,10 +30,10 @@ using namespace mlir; namespace { -static bool isCall(Operation *callOp, std::vector &&names) { - if (callOp) { - if (auto createStateCall = dyn_cast(callOp)) { - if (auto calleeAttr = createStateCall.getCalleeAttr()) { +static bool isCall(Operation *op, std::vector &&names) { + if (op) { + if (auto callOp = dyn_cast(op)) { + if (auto calleeAttr = callOp.getCalleeAttr()) { auto funcName = calleeAttr.getValue().str(); if (std::find(names.begin(), names.end(), funcName) != names.end()) return true; @@ -43,12 +43,12 @@ static bool isCall(Operation *callOp, std::vector &&names) { return false; } -static bool isGetStateCall(Operation *callOp) { - return isCall(callOp, {cudaq::getCudaqState}); +static bool isGetStateCall(Operation *op) { + return isCall(op, {cudaq::getCudaqState}); } -static bool isNumberOfQubitsCall(Operation *callOp) { - return isCall(callOp, {cudaq::getNumQubitsFromCudaqState}); +static bool isNumberOfQubitsCall(Operation *op) { + return isCall(op, {cudaq::getNumQubitsFromCudaqState}); } // clang-format off diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp new file mode 100644 index 0000000000..be20dd4ede --- /dev/null +++ b/lib/Optimizer/Transforms/StateValidation.cpp @@ -0,0 +1,130 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "PassDetails.h" +#include "cudaq/Optimizer/Builder/Intrinsics.h" +#include "cudaq/Optimizer/Dialect/CC/CCOps.h" +#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" +#include "cudaq/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Complex/IR/Complex.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +namespace cudaq::opt { +#define GEN_PASS_DEF_STATEVALIDATION +#include "cudaq/Optimizer/Transforms/Passes.h.inc" +} // namespace cudaq::opt + +#define DEBUG_TYPE "state-validation" + +using namespace mlir; + + +/// Validate that quantum code does not contain runtime calls and remove runtime function definitions. +namespace { + +static bool isRuntimeStateCallName(llvm::StringRef funcName) { + static std::vector names = { + cudaq::getCudaqState, + cudaq::createCudaqStateFromDataFP32, + cudaq::createCudaqStateFromDataFP64, + cudaq::deleteCudaqState, + cudaq::getNumQubitsFromCudaqState + }; + if (std::find(names.begin(), names.end(), funcName) != names.end()) + return true; + return false; +} + +static bool isRuntimeStateCall(Operation *callOp) { + if (callOp) { + if (auto call = dyn_cast(callOp)) { + if (auto calleeAttr = call.getCalleeAttr()) { + auto funcName = calleeAttr.getValue().str(); + if (isRuntimeStateCallName(funcName)) + return true; + } + } + } + return false; +} + +class ValidateStateCallPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(func::CallOp callOp, + PatternRewriter &rewriter) const override { + if (isRuntimeStateCall(callOp)) { + auto name = callOp.getCalleeAttr().getValue(); + callOp.emitError("Unsupported call for quantum platform: " + name); + } + return failure(); + } +}; + +class ValidateStateInitPattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(quake::InitializeStateOp initState, + PatternRewriter &rewriter) const override { + auto stateOp = initState.getOperand(1); + if (isa(stateOp.getType())) + initState.emitError("Synthesis did not remove `quake.init_state ` instruction"); + + return failure(); + } +}; + + +class StateValidationPass + : public cudaq::opt::impl::StateValidationBase { +protected: +public: + using StateValidationBase::StateValidationBase; + + mlir::ModuleOp getModule() { return getOperation(); } + + void runOnOperation() override final { + auto *ctx = &getContext(); + auto module = getModule(); + SmallVector toErase; + + for (Operation &op : *module.getBody()) { + auto func = dyn_cast(op); + if (!func) + continue; + + RewritePatternSet patterns(ctx); + patterns.insert(ctx); + + LLVM_DEBUG(llvm::dbgs() + << "Before state validation: " << func << '\n'); + + if (failed(applyPatternsAndFoldGreedily(func.getOperation(), + std::move(patterns)))) + signalPassFailure(); + + // Delete runtime function definitions. + if (func.getBody().empty() && isRuntimeStateCallName(func.getName())) + toErase.push_back(func); + + LLVM_DEBUG(llvm::dbgs() + << "After state validation: " << func << '\n'); + } + + for (auto *op : toErase) + op->erase(); + } +}; + +} // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index a37d5bf706..0eab2c7fba 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -405,6 +405,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || + funcOp.getName().equals(cudaq::deleteCudaqState) || funcOp.getName().equals(cudaq::getCudaqState))) moduleOp.push_back(funcOp.clone()); } @@ -448,6 +449,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); pm.addNestedPass(opt::createStateInitialization()); + pm.addPass(opt::createStateValidation()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); diff --git a/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml b/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml index 3ecb49f302..e0fb208f9c 100644 --- a/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml +++ b/runtime/cudaq/platform/default/rest/helpers/anyon/anyon.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml index 238d4c3316..802cdc2e0a 100644 --- a/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml +++ b/runtime/cudaq/platform/default/rest/helpers/ionq/ionq.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml index 0e90a1e2af..2c928bda87 100644 --- a/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml +++ b/runtime/cudaq/platform/default/rest/helpers/iqm/iqm.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml index 6a8a46c066..cde626676c 100644 --- a/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml +++ b/runtime/cudaq/platform/default/rest/helpers/oqc/oqc.yml @@ -13,6 +13,8 @@ config: platform-qpu: remote_rest # Tell NVQ++ to generate glue code to set the target backend name gen-target-backend: true + # Add preprocessor defines to compilation + preprocessor-defines: ["-D CUDAQ_QUANTUM_DEVICE"] # Add the rest-qpu library to the link list link-libs: ["-lcudaq-rest-qpu"] # Define the lowering pipeline diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index afaba5a2c0..06c97b1e6a 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -7,8 +7,16 @@ ******************************************************************************/ // clang-format off -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target quantinuum --emulate -fkernel-exec-kind=2 %s -o %t && %t | FileCheck %s +// Simulators +// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s + +// Quantum emulators +// RUN: nvq++ %cpp_std --target quantinuum --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s +// 2 different IQM machines for 2 different topologies +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Adonis --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s +// RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s // clang-format on #include @@ -91,7 +99,10 @@ int main() { std::cout << "Passing large state from another kernel as argument (kernel mode)" << std::endl; - auto largeState = cudaq::get_state(test_init_state{}, 14); + // TODO: State larger than 5 qubits fails on iqm machines with Adonis architecture + // TODO: State larger than 8 qubits fails on oqc and anyon + // Up to 14 bits works with quantinuum an ionq + auto largeState = cudaq::get_state(test_init_state{}, 5); auto counts = cudaq::sample(test_state_param{}, &largeState); printCounts(counts); } From 51ef054c14df334252e389e2244d24974486661e Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Fri, 18 Oct 2024 15:48:39 -0700 Subject: [PATCH 08/18] Fix failing quantinuum state prep tests Signed-off-by: Anna Gringauze --- .../Transforms/StateInitialization.cpp | 68 ++++++++++--------- lib/Optimizer/Transforms/StateValidation.cpp | 7 +- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/StateInitialization.cpp index c46273b747..0ed6867670 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/StateInitialization.cpp @@ -73,39 +73,43 @@ class StateInitPattern : public OpRewritePattern { PatternRewriter &rewriter) const override { auto loc = initState.getLoc(); auto allocaOp = initState.getOperand(0).getDefiningOp(); - auto getStateOp = initState.getOperand(1).getDefiningOp(); - auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); - - if (isGetStateCall(getStateOp)) { - auto calleeNameOp = getStateOp->getOperand(0); - if (auto cast = - dyn_cast(calleeNameOp.getDefiningOp())) { - calleeNameOp = cast.getOperand(); - - if (auto literal = dyn_cast( - calleeNameOp.getDefiningOp())) { - auto calleeName = literal.getStringLiteral(); - - Value result = - rewriter - .create(loc, initState.getType(), calleeName, - mlir::ValueRange{}) - .getResult(0); - rewriter.replaceAllUsesWith(initState, result); - initState.erase(); - allocaOp->dropAllUses(); - rewriter.eraseOp(allocaOp); - if (isNumberOfQubitsCall(numOfQubits)) { - numOfQubits->dropAllUses(); - rewriter.eraseOp(numOfQubits); + auto stateOp = initState.getOperand(1); + + if (isa(stateOp.getType())) { + auto getStateOp = stateOp.getDefiningOp(); + auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); + + if (isGetStateCall(getStateOp)) { + auto calleeNameOp = getStateOp->getOperand(0); + if (auto cast = + dyn_cast(calleeNameOp.getDefiningOp())) { + calleeNameOp = cast.getOperand(); + + if (auto literal = dyn_cast( + calleeNameOp.getDefiningOp())) { + auto calleeName = literal.getStringLiteral(); + + Value result = + rewriter + .create(loc, initState.getType(), calleeName, + mlir::ValueRange{}) + .getResult(0); + rewriter.replaceAllUsesWith(initState, result); + initState.erase(); + allocaOp->dropAllUses(); + rewriter.eraseOp(allocaOp); + if (isNumberOfQubitsCall(numOfQubits)) { + numOfQubits->dropAllUses(); + rewriter.eraseOp(numOfQubits); + } + getStateOp->dropAllUses(); + rewriter.eraseOp(getStateOp); + cast->dropAllUses(); + rewriter.eraseOp(cast); + literal->dropAllUses(); + rewriter.eraseOp(literal); + return success(); } - getStateOp->dropAllUses(); - rewriter.eraseOp(getStateOp); - cast->dropAllUses(); - rewriter.eraseOp(cast); - literal->dropAllUses(); - rewriter.eraseOp(literal); - return success(); } } } diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp index f0b25cdc10..c9d301740c 100644 --- a/lib/Optimizer/Transforms/StateValidation.cpp +++ b/lib/Optimizer/Transforms/StateValidation.cpp @@ -62,7 +62,8 @@ class ValidateStateCallPattern : public OpRewritePattern { PatternRewriter &rewriter) const override { if (isRuntimeStateCall(callOp)) { auto name = callOp.getCalleeAttr().getValue(); - callOp.emitError("Unsupported call for quantum platform: " + name); + callOp.emitError( + "Synthesis did not remove func call for quantum platform: " + name); } return failure(); } @@ -77,8 +78,8 @@ class ValidateStateInitPattern PatternRewriter &rewriter) const override { auto stateOp = initState.getOperand(1); if (isa(stateOp.getType())) - initState.emitError( - "Synthesis did not remove `quake.init_state ` instruction"); + initState.emitError("Synthesis did not remove `quake.init_state " + "` instruction"); return failure(); } From a7f5387e10c181704ff36c37504fea72ea2e3486 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 21 Oct 2024 15:11:34 -0700 Subject: [PATCH 09/18] Address CR comments Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Transforms/Passes.td | 17 +-- lib/Optimizer/Transforms/CMakeLists.txt | 3 +- ...ization.cpp => ReplaceStateWithKernel.cpp} | 98 +++++++------- lib/Optimizer/Transforms/StateValidation.cpp | 127 ------------------ runtime/common/BaseRemoteRESTQPU.h | 10 +- ...init.qke => replace_state_with_kernel.qke} | 2 +- 6 files changed, 64 insertions(+), 193 deletions(-) rename lib/Optimizer/Transforms/{StateInitialization.cpp => ReplaceStateWithKernel.cpp} (56%) delete mode 100644 lib/Optimizer/Transforms/StateValidation.cpp rename test/Quake/{state_init.qke => replace_state_with_kernel.qke} (96%) diff --git a/include/cudaq/Optimizer/Transforms/Passes.td b/include/cudaq/Optimizer/Transforms/Passes.td index aa8f038c41..ef446a3812 100644 --- a/include/cudaq/Optimizer/Transforms/Passes.td +++ b/include/cudaq/Optimizer/Transforms/Passes.td @@ -779,7 +779,7 @@ def DeleteStates : Pass<"delete-states", "mlir::ModuleOp"> { }]; } -def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { +def ReplaceStateWithKernel : Pass<"replace-state-with-kernel", "mlir::func::FuncOp"> { let summary = "Replace `quake.init_state` instructions with call to the kernel generating the state"; let description = [{ @@ -794,7 +794,7 @@ def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { For example: - Before StateInitialization (state-initialization): + Before ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %0 = cc.string_literal "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr> @@ -807,7 +807,7 @@ def StateInitialization : Pass<"state-initialization", "mlir::func::FuncOp"> { } ``` - After StateInitialization (state-initialization): + After ReplaceStateWithKernel (replace-state-with-kernel): ``` func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { %5 = call @__nvqpp__mlirgen__test_init_state.modified_0() : () -> !quake.veq @@ -866,17 +866,6 @@ def StatePreparation : Pass<"state-prep", "mlir::ModuleOp"> { ]; } -def StateValidation : Pass<"state-validation", "mlir::ModuleOp"> { - let summary = - "Make sure MLIR is valid after synthesis for quantum devices"; - let description = [{ - Argument synthesis should replace all `quake.init` from state instructions - and calls to state-related runtime functions. - Make sure none of them left, and remove definitions for state-related - runtime functions. - }]; -} - def PromoteRefToVeqAlloc : Pass<"promote-qubit-allocation"> { let summary = "Promote single qubit allocations."; let description = [{ diff --git a/lib/Optimizer/Transforms/CMakeLists.txt b/lib/Optimizer/Transforms/CMakeLists.txt index 7eae39e35f..153e095e1f 100644 --- a/lib/Optimizer/Transforms/CMakeLists.txt +++ b/lib/Optimizer/Transforms/CMakeLists.txt @@ -50,9 +50,8 @@ add_cudaq_library(OptTransforms QuakeSynthesizer.cpp RefToVeqAlloc.cpp RegToMem.cpp - StateInitialization.cpp + ReplaceStateWithKernel.cpp StatePreparation.cpp - StateValidation.cpp UnitarySynthesis.cpp WiresToWiresets.cpp diff --git a/lib/Optimizer/Transforms/StateInitialization.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp similarity index 56% rename from lib/Optimizer/Transforms/StateInitialization.cpp rename to lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 0ed6867670..d588f09216 100644 --- a/lib/Optimizer/Transforms/StateInitialization.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -20,11 +20,11 @@ #include namespace cudaq::opt { -#define GEN_PASS_DEF_STATEINITIALIZATION +#define GEN_PASS_DEF_REPLACESTATEWITHKERNEL #include "cudaq/Optimizer/Transforms/Passes.h.inc" } // namespace cudaq::opt -#define DEBUG_TYPE "state-initialization" +#define DEBUG_TYPE "replace-state-with-kernel" using namespace mlir; @@ -52,7 +52,9 @@ static bool isNumberOfQubitsCall(Operation *op) { } // clang-format off -/// Replace `quake.init_state` by a call to a (modified) kernel that produced the state. +/// Replace `quake.init_state` by a call to a (modified) kernel that produced +/// the state. +/// /// ``` /// %0 = cc.string_literal "callee.modified_0" : !cc.ptr> /// %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr @@ -65,50 +67,54 @@ static bool isNumberOfQubitsCall(Operation *op) { /// %5 = call @callee.modified_0() : () -> !quake.veq /// ``` // clang-format on -class StateInitPattern : public OpRewritePattern { +class ReplaceStateWithKernelPattern : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - auto loc = initState.getLoc(); - auto allocaOp = initState.getOperand(0).getDefiningOp(); + //auto loc = initState.getLoc(); + auto *alloca = initState.getOperand(0).getDefiningOp(); auto stateOp = initState.getOperand(1); - if (isa(stateOp.getType())) { - auto getStateOp = stateOp.getDefiningOp(); - auto numOfQubits = allocaOp->getOperand(0).getDefiningOp(); - - if (isGetStateCall(getStateOp)) { - auto calleeNameOp = getStateOp->getOperand(0); - if (auto cast = - dyn_cast(calleeNameOp.getDefiningOp())) { - calleeNameOp = cast.getOperand(); - - if (auto literal = dyn_cast( - calleeNameOp.getDefiningOp())) { - auto calleeName = literal.getStringLiteral(); - - Value result = - rewriter - .create(loc, initState.getType(), calleeName, - mlir::ValueRange{}) - .getResult(0); - rewriter.replaceAllUsesWith(initState, result); - initState.erase(); - allocaOp->dropAllUses(); - rewriter.eraseOp(allocaOp); - if (isNumberOfQubitsCall(numOfQubits)) { - numOfQubits->dropAllUses(); - rewriter.eraseOp(numOfQubits); + if (auto ptrTy = dyn_cast(stateOp.getType())) { + if (isa(ptrTy.getElementType())) { + auto *getState = stateOp.getDefiningOp(); + auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); + + if (isGetStateCall(getState)) { + auto calleeNameOp = getState->getOperand(0); + if (auto cast = calleeNameOp.getDefiningOp()) { + calleeNameOp = cast.getOperand(); + + if (auto literal = + calleeNameOp.getDefiningOp()) { + auto calleeName = literal.getStringLiteral(); + rewriter.replaceOpWithNewOp(initState, initState.getType(), calleeName, + mlir::ValueRange{}); + + if (alloca->getUses().empty()) + rewriter.eraseOp(alloca); + else { + alloca->emitError("Failed to remove `quake.alloca` in state synthesis"); + return failure(); + } + if (isNumberOfQubitsCall(numOfQubits)) { + if (numOfQubits->getUses().empty()) + rewriter.eraseOp(numOfQubits); + else { + numOfQubits->emitError("Failed to remove runtime call to get number of qubits in state synthesis"); + return failure(); + } + } + if (getState->getUses().empty()) + rewriter.eraseOp(getState); + else { + alloca->emitError("Failed to remove runtime call to get state in state synthesis"); + return failure(); + } + return success(); } - getStateOp->dropAllUses(); - rewriter.eraseOp(getStateOp); - cast->dropAllUses(); - rewriter.eraseOp(cast); - literal->dropAllUses(); - rewriter.eraseOp(literal); - return success(); } } } @@ -117,25 +123,25 @@ class StateInitPattern : public OpRewritePattern { } }; -class StateInitializationPass - : public cudaq::opt::impl::StateInitializationBase< - StateInitializationPass> { +class ReplaceStateWithKernelPass + : public cudaq::opt::impl::ReplaceStateWithKernelBase< + ReplaceStateWithKernelPass> { public: - using StateInitializationBase::StateInitializationBase; + using ReplaceStateWithKernelBase::ReplaceStateWithKernelBase; void runOnOperation() override { auto *ctx = &getContext(); auto func = getOperation(); RewritePatternSet patterns(ctx); - patterns.insert(ctx); + patterns.insert(ctx); - LLVM_DEBUG(llvm::dbgs() << "Before state initialization: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() << "Before replace state with kernel: " << func << '\n'); if (failed(applyPatternsAndFoldGreedily(func.getOperation(), std::move(patterns)))) signalPassFailure(); - LLVM_DEBUG(llvm::dbgs() << "After state initialization: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() << "After replace state with kerenl: " << func << '\n'); } }; } // namespace diff --git a/lib/Optimizer/Transforms/StateValidation.cpp b/lib/Optimizer/Transforms/StateValidation.cpp deleted file mode 100644 index c9d301740c..0000000000 --- a/lib/Optimizer/Transforms/StateValidation.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#include "PassDetails.h" -#include "cudaq/Optimizer/Builder/Intrinsics.h" -#include "cudaq/Optimizer/Dialect/CC/CCOps.h" -#include "cudaq/Optimizer/Dialect/Quake/QuakeOps.h" -#include "cudaq/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/Complex/IR/Complex.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Passes.h" - -namespace cudaq::opt { -#define GEN_PASS_DEF_STATEVALIDATION -#include "cudaq/Optimizer/Transforms/Passes.h.inc" -} // namespace cudaq::opt - -#define DEBUG_TYPE "state-validation" - -using namespace mlir; - -/// Validate that quantum code does not contain runtime calls and remove runtime -/// function definitions. -namespace { - -static bool isRuntimeStateCallName(llvm::StringRef funcName) { - static std::vector names = { - cudaq::getCudaqState, cudaq::createCudaqStateFromDataFP32, - cudaq::createCudaqStateFromDataFP64, cudaq::deleteCudaqState, - cudaq::getNumQubitsFromCudaqState}; - if (std::find(names.begin(), names.end(), funcName) != names.end()) - return true; - return false; -} - -static bool isRuntimeStateCall(Operation *callOp) { - if (callOp) { - if (auto call = dyn_cast(callOp)) { - if (auto calleeAttr = call.getCalleeAttr()) { - auto funcName = calleeAttr.getValue().str(); - if (isRuntimeStateCallName(funcName)) - return true; - } - } - } - return false; -} - -class ValidateStateCallPattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(func::CallOp callOp, - PatternRewriter &rewriter) const override { - if (isRuntimeStateCall(callOp)) { - auto name = callOp.getCalleeAttr().getValue(); - callOp.emitError( - "Synthesis did not remove func call for quantum platform: " + name); - } - return failure(); - } -}; - -class ValidateStateInitPattern - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(quake::InitializeStateOp initState, - PatternRewriter &rewriter) const override { - auto stateOp = initState.getOperand(1); - if (isa(stateOp.getType())) - initState.emitError("Synthesis did not remove `quake.init_state " - "` instruction"); - - return failure(); - } -}; - -class StateValidationPass - : public cudaq::opt::impl::StateValidationBase { -protected: -public: - using StateValidationBase::StateValidationBase; - - mlir::ModuleOp getModule() { return getOperation(); } - - void runOnOperation() override final { - auto *ctx = &getContext(); - auto module = getModule(); - SmallVector toErase; - - for (Operation &op : *module.getBody()) { - auto func = dyn_cast(op); - if (!func) - continue; - - RewritePatternSet patterns(ctx); - patterns.insert(ctx); - - LLVM_DEBUG(llvm::dbgs() << "Before state validation: " << func << '\n'); - - if (failed(applyPatternsAndFoldGreedily(func.getOperation(), - std::move(patterns)))) - signalPassFailure(); - - // Delete runtime function definitions. - if (func.getBody().empty() && isRuntimeStateCallName(func.getName())) - toErase.push_back(func); - - LLVM_DEBUG(llvm::dbgs() << "After state validation: " << func << '\n'); - } - - for (auto *op : toErase) - op->erase(); - } -}; - -} // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index c467811a66..a9053411fa 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -437,6 +437,9 @@ class BaseRemoteRESTQPU : public cudaq::QPU { mlir::PassManager pm(&context); if (!rawArgs.empty()) { cudaq::info("Run Argument Synth.\n"); + // For quantum hardware, we collect substitutions for the + // whole call tree of states, which are treated as calls to + // the kernels and their arguments that produced the state. opt::ArgumentConverter argCon(kernelName, moduleOp); argCon.gen(rawArgs); auto [kernels, substs] = argCon.collectAllSubstitutions(); @@ -446,10 +449,11 @@ class BaseRemoteRESTQPU : public cudaq::QPU { kernels.end()}, mlir::SmallVector{substs.begin(), substs.end()})); - pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(opt::createDeleteStates()); - pm.addNestedPass(opt::createStateInitialization()); - pm.addPass(opt::createStateValidation()); + pm.addNestedPass( + opt::createReplaceStateWithKernel()); + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(mlir::createSymbolDCEPass()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); pm.addPass(cudaq::opt::createQuakeSynthesizer(kernelName, updatedArgs)); diff --git a/test/Quake/state_init.qke b/test/Quake/replace_state_with_kernel.qke similarity index 96% rename from test/Quake/state_init.qke rename to test/Quake/replace_state_with_kernel.qke index 9f43a965a4..70b04e3103 100644 --- a/test/Quake/state_init.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -6,7 +6,7 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt -state-initialization -canonicalize %s | FileCheck %s +// RUN: cudaq-opt -replace-state-with-kernel -canonicalize %s | FileCheck %s module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { From 9f0937fcb022663cf1e94216e7acb9bd7c429572 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Mon, 21 Oct 2024 15:41:40 -0700 Subject: [PATCH 10/18] Format Signed-off-by: Anna Gringauze --- .../Transforms/ReplaceStateWithKernel.cpp | 37 +++++++++++-------- runtime/common/BaseRemoteRESTQPU.h | 2 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index d588f09216..5300f57415 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -67,13 +67,13 @@ static bool isNumberOfQubitsCall(Operation *op) { /// %5 = call @callee.modified_0() : () -> !quake.veq /// ``` // clang-format on -class ReplaceStateWithKernelPattern : public OpRewritePattern { +class ReplaceStateWithKernelPattern + : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(quake::InitializeStateOp initState, PatternRewriter &rewriter) const override { - //auto loc = initState.getLoc(); auto *alloca = initState.getOperand(0).getDefiningOp(); auto stateOp = initState.getOperand(1); @@ -87,30 +87,35 @@ class ReplaceStateWithKernelPattern : public OpRewritePattern()) { calleeNameOp = cast.getOperand(); - if (auto literal = - calleeNameOp.getDefiningOp()) { + if (auto literal = + calleeNameOp + .getDefiningOp()) { auto calleeName = literal.getStringLiteral(); - rewriter.replaceOpWithNewOp(initState, initState.getType(), calleeName, - mlir::ValueRange{}); + rewriter.replaceOpWithNewOp( + initState, initState.getType(), calleeName, + mlir::ValueRange{}); - if (alloca->getUses().empty()) + if (alloca->getUses().empty()) rewriter.eraseOp(alloca); - else { - alloca->emitError("Failed to remove `quake.alloca` in state synthesis"); + else { + alloca->emitError( + "Failed to remove `quake.alloca` in state synthesis"); return failure(); } if (isNumberOfQubitsCall(numOfQubits)) { if (numOfQubits->getUses().empty()) rewriter.eraseOp(numOfQubits); - else { - numOfQubits->emitError("Failed to remove runtime call to get number of qubits in state synthesis"); + else { + numOfQubits->emitError("Failed to remove runtime call to get " + "number of qubits in state synthesis"); return failure(); } } if (getState->getUses().empty()) rewriter.eraseOp(getState); - else { - alloca->emitError("Failed to remove runtime call to get state in state synthesis"); + else { + alloca->emitError("Failed to remove runtime call to get state " + "in state synthesis"); return failure(); } return success(); @@ -135,13 +140,15 @@ class ReplaceStateWithKernelPass RewritePatternSet patterns(ctx); patterns.insert(ctx); - LLVM_DEBUG(llvm::dbgs() << "Before replace state with kernel: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() + << "Before replace state with kernel: " << func << '\n'); if (failed(applyPatternsAndFoldGreedily(func.getOperation(), std::move(patterns)))) signalPassFailure(); - LLVM_DEBUG(llvm::dbgs() << "After replace state with kerenl: " << func << '\n'); + LLVM_DEBUG(llvm::dbgs() + << "After replace state with kerenl: " << func << '\n'); } }; } // namespace diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index cd57a245d6..2253b4a996 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -411,7 +411,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { } // Add any global symbols, including global constant arrays. // Global constant arrays can be created during compilation, - // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, + // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, // and `get-concrete-matrix`passes. if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); From 2f3a62327293e5c79b49c2249ecdf241467e6d9b Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 09:54:47 -0700 Subject: [PATCH 11/18] Fix failing test Signed-off-by: Anna Gringauze --- targettests/execution/qvector_init_from_state.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 06c97b1e6a..681e42eee0 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -108,8 +108,8 @@ int main() { } // clang-format off // CHECK: Passing large state from another kernel as argument (kernel mode) -// CHECK: 01111111111111 -// CHECK: 11111111111111 +// CHECK: 01111 +// CHECK: 11111 // clang-format on { From b3813503b148b98f4d7d074075a6a7496b1082c9 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 09:56:28 -0700 Subject: [PATCH 12/18] Format Signed-off-by: Anna Gringauze --- targettests/execution/qvector_init_from_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index 681e42eee0..d75a7e30d8 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -109,7 +109,7 @@ int main() { // clang-format off // CHECK: Passing large state from another kernel as argument (kernel mode) // CHECK: 01111 -// CHECK: 11111 +// CHECK: 111111 // clang-format on { From dc87ca4c9b31d7d1037c5f103adc58a353822135 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 09:57:02 -0700 Subject: [PATCH 13/18] Format Signed-off-by: Anna Gringauze --- targettests/execution/qvector_init_from_state.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/targettests/execution/qvector_init_from_state.cpp b/targettests/execution/qvector_init_from_state.cpp index d75a7e30d8..681e42eee0 100644 --- a/targettests/execution/qvector_init_from_state.cpp +++ b/targettests/execution/qvector_init_from_state.cpp @@ -109,7 +109,7 @@ int main() { // clang-format off // CHECK: Passing large state from another kernel as argument (kernel mode) // CHECK: 01111 -// CHECK: 111111 +// CHECK: 11111 // clang-format on { From 53a34c97759a619a9298523705392412a2fc7974 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 14:46:03 -0700 Subject: [PATCH 14/18] Replaced getState intrinsic by cc.get_state op Signed-off-by: Anna Gringauze --- include/cudaq/Optimizer/Builder/Intrinsics.h | 4 - include/cudaq/Optimizer/Dialect/CC/CCOps.td | 20 +++++ lib/Optimizer/Builder/Intrinsics.cpp | 4 - .../Transforms/ReplaceStateWithKernel.cpp | 77 +++++++------------ runtime/common/ArgumentConversion.cpp | 21 +---- runtime/common/BaseRemoteRESTQPU.h | 1 - runtime/test/test_argument_conversion.cpp | 22 ++---- test/Quake/replace_state_with_kernel.qke | 15 ++-- 8 files changed, 63 insertions(+), 101 deletions(-) diff --git a/include/cudaq/Optimizer/Builder/Intrinsics.h b/include/cudaq/Optimizer/Builder/Intrinsics.h index d545a57602..fa9ce53097 100644 --- a/include/cudaq/Optimizer/Builder/Intrinsics.h +++ b/include/cudaq/Optimizer/Builder/Intrinsics.h @@ -55,10 +55,6 @@ static constexpr const char createCudaqStateFromDataFP32[] = // Delete a state created by the runtime functions above. static constexpr const char deleteCudaqState[] = "__nvqpp_cudaq_state_delete"; -// Get state of a kernel (placeholder function, calls are always replaced in -// opts) -static constexpr const char getCudaqState[] = "__nvqpp_cudaq_state_get"; - /// Builder for lowering the clang AST to an IR for CUDA-Q. Lowering includes /// the transformation of both quantum and classical computation. Different /// features of the CUDA-Q programming model are lowered into different dialects diff --git a/include/cudaq/Optimizer/Dialect/CC/CCOps.td b/include/cudaq/Optimizer/Dialect/CC/CCOps.td index a58e3d403d..cda02c7a23 100644 --- a/include/cudaq/Optimizer/Dialect/CC/CCOps.td +++ b/include/cudaq/Optimizer/Dialect/CC/CCOps.td @@ -898,6 +898,26 @@ def cc_AddressOfOp : CCOp<"address_of", [Pure, }]; } +def cc_GetStateOp : CCOp<"get_state", [Pure] > { + let summary = "Get state from kernel with the provided name."; + let description = [{ + This operation is created by argument synthesis of state pointer arguments + for quantum devices. It takes a kernel name as ASCIIZ string literal value + and returns the kernel's quantum state. The operation is replaced by a call + to the kernel with the provided name in ReplaceStateByKernel pass. + + ```mlir + %0 = cc.get_state "callee" : !cc.ptr + ``` + }]; + + let arguments = (ins StrAttr:$calleeName); + let results = (outs cc_PointerType:$result); + let assemblyFormat = [{ + $calleeName `:` qualified(type(results)) attr-dict + }]; +} + def cc_GlobalOp : CCOp<"global", [IsolatedFromAbove, Symbol]> { let summary = "Create a global constant or variable"; let description = [{ diff --git a/lib/Optimizer/Builder/Intrinsics.cpp b/lib/Optimizer/Builder/Intrinsics.cpp index e0ed794264..315743f057 100644 --- a/lib/Optimizer/Builder/Intrinsics.cpp +++ b/lib/Optimizer/Builder/Intrinsics.cpp @@ -269,10 +269,6 @@ static constexpr IntrinsicCode intrinsicTable[] = { {cudaq::deleteCudaqState, {}, R"#( func.func private @__nvqpp_cudaq_state_delete(%p : !cc.ptr) -> () - )#"}, - - {cudaq::getCudaqState, {}, R"#( - func.func private @__nvqpp_cudaq_state_get(%p : !cc.ptr) -> !cc.ptr )#"}, {cudaq::getNumQubitsFromCudaqState, {}, R"#( diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 5300f57415..80907bfec1 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -43,10 +43,6 @@ static bool isCall(Operation *op, std::vector &&names) { return false; } -static bool isGetStateCall(Operation *op) { - return isCall(op, {cudaq::getCudaqState}); -} - static bool isNumberOfQubitsCall(Operation *op) { return isCall(op, {cudaq::getNumQubitsFromCudaqState}); } @@ -56,12 +52,10 @@ static bool isNumberOfQubitsCall(Operation *op) { /// the state. /// /// ``` -/// %0 = cc.string_literal "callee.modified_0" : !cc.ptr> -/// %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr -/// %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr -/// %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 -/// %4 = quake.alloca !quake.veq[%3 : i64] -/// %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq +/// %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr +/// %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 +/// %2 = quake.alloca !quake.veq[%1 : i64] +/// %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq /// ─────────────────────────────────────────── /// ... /// %5 = call @callee.modified_0() : () -> !quake.veq @@ -79,49 +73,34 @@ class ReplaceStateWithKernelPattern if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { - auto *getState = stateOp.getDefiningOp(); auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); - - if (isGetStateCall(getState)) { - auto calleeNameOp = getState->getOperand(0); - if (auto cast = calleeNameOp.getDefiningOp()) { - calleeNameOp = cast.getOperand(); - - if (auto literal = - calleeNameOp - .getDefiningOp()) { - auto calleeName = literal.getStringLiteral(); - rewriter.replaceOpWithNewOp( - initState, initState.getType(), calleeName, - mlir::ValueRange{}); - - if (alloca->getUses().empty()) - rewriter.eraseOp(alloca); - else { - alloca->emitError( - "Failed to remove `quake.alloca` in state synthesis"); - return failure(); - } - if (isNumberOfQubitsCall(numOfQubits)) { - if (numOfQubits->getUses().empty()) - rewriter.eraseOp(numOfQubits); - else { - numOfQubits->emitError("Failed to remove runtime call to get " - "number of qubits in state synthesis"); - return failure(); - } - } - if (getState->getUses().empty()) - rewriter.eraseOp(getState); - else { - alloca->emitError("Failed to remove runtime call to get state " - "in state synthesis"); - return failure(); - } - return success(); + stateOp.getDefiningOp()->dump(); + + if (auto getState = stateOp.getDefiningOp()) { + auto calleeName = getState.getCalleeName(); + rewriter.replaceOpWithNewOp( + initState, initState.getType(), calleeName, mlir::ValueRange{}); + + if (alloca->getUses().empty()) + rewriter.eraseOp(alloca); + else { + alloca->emitError( + "Failed to remove `quake.alloca` in state synthesis"); + return failure(); + } + if (isNumberOfQubitsCall(numOfQubits)) { + if (numOfQubits->getUses().empty()) + rewriter.eraseOp(numOfQubits); + else { + numOfQubits->emitError("Failed to remove runtime call to get " + "number of qubits in state synthesis"); + return failure(); } } + return success(); } + numOfQubits->emitError( + "Failed to replace `quake.init_state` in state synthesis"); } } return failure(); diff --git a/runtime/common/ArgumentConversion.cpp b/runtime/common/ArgumentConversion.cpp index 42b228dd3b..c548d23523 100644 --- a/runtime/common/ArgumentConversion.cpp +++ b/runtime/common/ArgumentConversion.cpp @@ -243,27 +243,10 @@ static Value genConstant(OpBuilder &builder, const cudaq::state *v, converter.genCallee(modifiedCalleeName, calleeArgs); // Create a subst for state pointer. - auto strLitTy = cudaq::cc::PointerType::get( - cudaq::cc::ArrayType::get(builder.getContext(), builder.getI8Type(), - modifiedCalleeKernelName.size() + 1)); - auto callee = builder.create( - loc, strLitTy, builder.getStringAttr(modifiedCalleeKernelName)); - - auto i8PtrTy = cudaq::cc::PointerType::get(builder.getI8Type()); - auto calleeCast = builder.create(loc, i8PtrTy, callee); - - cudaq::IRBuilder irBuilder(ctx); - auto result = irBuilder.loadIntrinsic(substMod, cudaq::getCudaqState); - assert(succeeded(result) && "loading intrinsic should never fail"); - auto statePtrTy = cudaq::cc::PointerType::get(cudaq::cc::StateType::get(ctx)); - auto statePtr = - builder - .create(loc, statePtrTy, cudaq::getCudaqState, - ValueRange{calleeCast}) - .getResult(0); - return builder.create(loc, statePtrTy, statePtr); + return builder.create( + loc, statePtrTy, builder.getStringAttr(modifiedCalleeKernelName)); } TODO("cudaq::state* argument synthesis for quantum hardware for c functions"); diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 2c8654d540..0421cde877 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -458,7 +458,6 @@ class BaseRemoteRESTQPU : public cudaq::QPU { pm.addPass(opt::createDeleteStates()); pm.addNestedPass( opt::createReplaceStateWithKernel()); - pm.addPass(mlir::createCanonicalizerPass()); pm.addPass(mlir::createSymbolDCEPass()); } else if (updatedArgs) { cudaq::info("Run Quake Synth.\n"); diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 9fe3d92f8f..93939125c1 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -380,13 +380,10 @@ void test_state(mlir::MLIRContext *ctx) { // CHECK-LABEL: cc.arg_subst[0] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_2:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr x 8>>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_2]], %[[VAL_1]]) : (!cc.ptr, i64) -> !cc.ptr +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr @@ -490,13 +487,10 @@ void test_combinations(mlir::MLIRContext *ctx) { // CHECK: } // CHECK-LABEL: cc.arg_subst[1] { // CHECK: %[[VAL_0:.*]] = cc.address_of @[[VAL_GC:.*]] : !cc.ptr x 8>> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_0]] : !cc.ptr x 8>> -// CHECK: %[[VAL_2:.*]] = arith.constant 8 : i64 -// CHECK: %[[VAL_3:.*]] = cc.alloca !cc.array x 8> -// CHECK: cc.store %[[VAL_1]], %[[VAL_3]] : !cc.ptr x 8>> -// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr x 8>>) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_4]], %[[VAL_2]]) : (!cc.ptr, i64) -> !cc.ptr -// CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr) -> !cc.ptr +// CHECK: %[[VAL_1:.*]] = arith.constant 8 : i64 +// CHECK: %[[VAL_2:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr x 8>>) -> !cc.ptr +// CHECK: %[[VAL_3:.*]] = func.call @__nvqpp_cudaq_state_createFromData_fp64(%[[VAL_2]], %[[VAL_1]]) : (!cc.ptr, i64) -> !cc.ptr +// CHECK: %[[VAL_4:.*]] = cc.cast %[[VAL_3]] : (!cc.ptr) -> !cc.ptr // CHECK: } // CHECK-DAG: cc.global constant @[[VAL_GC]] (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<8xcomplex>) : !cc.array x 8> // CHECK-DAG: func.func private @__nvqpp_cudaq_state_createFromData_fp64(!cc.ptr, i64) -> !cc.ptr diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 70b04e3103..751e29775a 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -10,18 +10,13 @@ module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %0 = cc.string_literal "callee.modified_0" : !cc.ptr> - %1 = cc.cast %0 : (!cc.ptr>) -> !cc.ptr - %2 = call @__nvqpp_cudaq_state_get(%1) : (!cc.ptr) -> !cc.ptr - %3 = call @__nvqpp_cudaq_state_numberOfQubits(%2) : (!cc.ptr) -> i64 - %4 = quake.alloca !quake.veq[%3 : i64] - %5 = quake.init_state %4, %2 : (!quake.veq, !cc.ptr) -> !quake.veq - return + %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr + %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 + %2 = quake.alloca !quake.veq[%1 : i64] + %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq +return } - func.func private @__nvqpp_cudaq_state_numberOfQubits(!cc.ptr) -> i64 - func.func private @__nvqpp_cudaq_state_get(!cc.ptr) -> !cc.ptr - func.func private @callee.modified_0() -> !quake.veq attributes {"cudaq-entrypoint", "cudaq-kernel"} { %cst = arith.constant 1.5707963267948966 : f64 %0 = quake.alloca !quake.veq<2> From fe6d409ec21b0f72016690213dd5a3781d9c53cc Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 14:47:59 -0700 Subject: [PATCH 15/18] Remove print Signed-off-by: Anna Gringauze --- lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp index 80907bfec1..bdc1898284 100644 --- a/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp +++ b/lib/Optimizer/Transforms/ReplaceStateWithKernel.cpp @@ -74,7 +74,6 @@ class ReplaceStateWithKernelPattern if (auto ptrTy = dyn_cast(stateOp.getType())) { if (isa(ptrTy.getElementType())) { auto *numOfQubits = alloca->getOperand(0).getDefiningOp(); - stateOp.getDefiningOp()->dump(); if (auto getState = stateOp.getDefiningOp()) { auto calleeName = getState.getCalleeName(); From 48704e3bcb648043ba9c1ccd7ecd056d620e88e6 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 14:50:08 -0700 Subject: [PATCH 16/18] Remove getCudaqState references Signed-off-by: Anna Gringauze --- lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp | 3 +-- runtime/common/BaseRemoteRESTQPU.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp index 04eac5b06f..4de20fd7be 100644 --- a/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp +++ b/lib/Optimizer/CodeGen/VerifyNVQIRCalls.cpp @@ -49,8 +49,7 @@ struct VerifyNVQIRCallOpsPass cudaq::getNumQubitsFromCudaqState, cudaq::createCudaqStateFromDataFP32, cudaq::createCudaqStateFromDataFP64, - cudaq::deleteCudaqState, - cudaq::getCudaqState}; + cudaq::deleteCudaqState}; // It must be either NVQIR extension functions or in the allowed list. return std::find(NVQIR_FUNCS.begin(), NVQIR_FUNCS.end(), functionName) != NVQIR_FUNCS.end() || diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 0421cde877..0d9a5ddbc9 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -408,8 +408,7 @@ class BaseRemoteRESTQPU : public cudaq::QPU { (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || - funcOp.getName().equals(cudaq::deleteCudaqState) || - funcOp.getName().equals(cudaq::getCudaqState))) + funcOp.getName().equals(cudaq::deleteCudaqState))) moduleOp.push_back(funcOp.clone()); } // Add any global symbols, including global constant arrays. From 137f621febc0c607dbea69d25eba70e7bcb696ca Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 15:01:25 -0700 Subject: [PATCH 17/18] Minor updates Signed-off-by: Anna Gringauze --- runtime/common/BaseRemoteRESTQPU.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/runtime/common/BaseRemoteRESTQPU.h b/runtime/common/BaseRemoteRESTQPU.h index 0d9a5ddbc9..5cf89c0332 100644 --- a/runtime/common/BaseRemoteRESTQPU.h +++ b/runtime/common/BaseRemoteRESTQPU.h @@ -403,18 +403,21 @@ class BaseRemoteRESTQPU : public cudaq::QPU { for (auto &op : m_module.getOps()) { if (auto funcOp = dyn_cast(op)) { // Add function definitions for runtime functions that must - // be removed after synthesis in cleanup ops. + // be removed after synthesis in cleanup passes. + static const std::vector stateFuncs = { + cudaq::getNumQubitsFromCudaqState, + cudaq::createCudaqStateFromDataFP32, + cudaq::createCudaqStateFromDataFP64}; + if (funcOp.getBody().empty() && - (funcOp.getName().equals(cudaq::getNumQubitsFromCudaqState) || - funcOp.getName().equals(cudaq::createCudaqStateFromDataFP64) || - funcOp.getName().equals(cudaq::createCudaqStateFromDataFP32) || - funcOp.getName().equals(cudaq::deleteCudaqState))) + std::find(stateFuncs.begin(), stateFuncs.end(), funcOp.getName()) != + stateFuncs.end()) moduleOp.push_back(funcOp.clone()); } // Add any global symbols, including global constant arrays. // Global constant arrays can be created during compilation, // `lift-array-alloc`, `argument-synthesis`, `quake-synthesizer`, - // and `get-concrete-matrix`passes. + // and `get-concrete-matrix` passes. if (auto globalOp = dyn_cast(op)) moduleOp.push_back(globalOp.clone()); } From ad7c6bcd26a521f4401e4b46e97e09795a4f6333 Mon Sep 17 00:00:00 2001 From: Anna Gringauze Date: Tue, 22 Oct 2024 18:05:49 -0700 Subject: [PATCH 18/18] Fix failing quake test Signed-off-by: Anna Gringauze --- test/Quake/replace_state_with_kernel.qke | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Quake/replace_state_with_kernel.qke b/test/Quake/replace_state_with_kernel.qke index 751e29775a..09570c6290 100644 --- a/test/Quake/replace_state_with_kernel.qke +++ b/test/Quake/replace_state_with_kernel.qke @@ -10,7 +10,7 @@ module { func.func @foo() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { - %0 = cc.get_state "__nvqpp__mlirgen__test_init_state.modified_0" : !cc.ptr + %0 = cc.get_state "callee.modified_0" : !cc.ptr %1 = call @__nvqpp_cudaq_state_numberOfQubits(%0) : (!cc.ptr) -> i64 %2 = quake.alloca !quake.veq[%1 : i64] %3 = quake.init_state %2, %0 : (!quake.veq, !cc.ptr) -> !quake.veq