diff --git a/renderdoc/api/replay/data_types.h b/renderdoc/api/replay/data_types.h index 3dbb1f1b7f..2423a486d1 100644 --- a/renderdoc/api/replay/data_types.h +++ b/renderdoc/api/replay/data_types.h @@ -2100,12 +2100,13 @@ struct PixelModification { return eventId == o.eventId && directShaderWrite == o.directShaderWrite && unboundPS == o.unboundPS && fragIndex == o.fragIndex && primitiveID == o.primitiveID && - preMod == o.preMod && shaderOut == o.shaderOut && postMod == o.postMod && - sampleMasked == o.sampleMasked && backfaceCulled == o.backfaceCulled && - depthClipped == o.depthClipped && depthBoundsFailed == o.depthBoundsFailed && - viewClipped == o.viewClipped && scissorClipped == o.scissorClipped && - shaderDiscarded == o.shaderDiscarded && depthTestFailed == o.depthTestFailed && - stencilTestFailed == o.stencilTestFailed; + preMod == o.preMod && shaderOut == o.shaderOut && + shaderOutDualSrc == o.shaderOutDualSrc && blendSrc == o.blendSrc && + blendDst == o.blendDst && postMod == o.postMod && sampleMasked == o.sampleMasked && + backfaceCulled == o.backfaceCulled && depthClipped == o.depthClipped && + depthBoundsFailed == o.depthBoundsFailed && viewClipped == o.viewClipped && + scissorClipped == o.scissorClipped && shaderDiscarded == o.shaderDiscarded && + depthTestFailed == o.depthTestFailed && stencilTestFailed == o.stencilTestFailed; } bool operator<(const PixelModification &o) const { @@ -2123,6 +2124,12 @@ struct PixelModification return preMod < o.preMod; if(!(shaderOut == o.shaderOut)) return shaderOut < o.shaderOut; + if(!(shaderOutDualSrc == o.shaderOutDualSrc)) + return shaderOutDualSrc < o.shaderOutDualSrc; + if(!(blendSrc == o.blendSrc)) + return blendSrc < o.blendSrc; + if(!(blendDst == o.blendDst)) + return blendDst < o.blendDst; if(!(postMod == o.postMod)) return postMod < o.postMod; if(!(sampleMasked == o.sampleMasked)) @@ -2175,6 +2182,21 @@ pixel. :type: ModificationValue )"); ModificationValue shaderOut; + DOCUMENT(R"(The value that this fragment wrote from the pixel shader to the second output. + +:type: ModificationValue +)"); + ModificationValue shaderOutDualSrc; + DOCUMENT(R"(The source component in the blend equation for this fragment. + +:type: ModificationValue +)"); + ModificationValue blendSrc; + DOCUMENT(R"(The destination component in the blend equation for this fragment. + +:type: ModificationValue +)"); + ModificationValue blendDst; DOCUMENT(R"(The value of the texture after this fragment ran. :type: ModificationValue diff --git a/renderdoc/driver/d3d11/d3d11_pixelhistory.cpp b/renderdoc/driver/d3d11/d3d11_pixelhistory.cpp index 77f436b73f..efa940f10b 100644 --- a/renderdoc/driver/d3d11/d3d11_pixelhistory.cpp +++ b/renderdoc/driver/d3d11/d3d11_pixelhistory.cpp @@ -1940,6 +1940,9 @@ rdcarray D3D11Replay::PixelHistory(rdcarray event // data[3].x (depth) unused // fragments writing to the pixel in this event with original shader mod.shaderOut.col.intValue[1] = int32_t(data[3].y); + mod.shaderOutDualSrc.SetInvalid(); + mod.blendSrc.SetInvalid(); + mod.blendDst.SetInvalid(); } } @@ -2316,6 +2319,9 @@ rdcarray D3D11Replay::PixelHistory(rdcarray event byte *data = shadoutStoreData + sizeof(Vec4f) * pixstoreStride * offsettedSlot; memcpy(&history[h].shaderOut.col.uintValue[0], data, 4 * sizeof(float)); + history[h].shaderOutDualSrc.SetInvalid(); + history[h].blendSrc.SetInvalid(); + history[h].blendDst.SetInvalid(); // primitive ID is in the next slot after that memcpy(&history[h].primitiveID, data + sizeof(Vec4f), sizeof(uint32_t)); diff --git a/renderdoc/driver/gl/gl_pixelhistory.cpp b/renderdoc/driver/gl/gl_pixelhistory.cpp index 515608be3e..648f81805a 100644 --- a/renderdoc/driver/gl/gl_pixelhistory.cpp +++ b/renderdoc/driver/gl/gl_pixelhistory.cpp @@ -1303,6 +1303,9 @@ std::map QueryNumFragmentsByEvent( numFragments = history[i].shaderOut.stencil; history[i].shaderOut.stencil = history[i].postMod.stencil; } + history[i].shaderOutDualSrc.SetInvalid(); + history[i].blendSrc.SetInvalid(); + history[i].blendDst.SetInvalid(); eventFragments.emplace(modEvents[i].eventId, numFragments); @@ -1587,6 +1590,9 @@ void QueryShaderOutPerFragment(WrappedOpenGL *driver, GLReplay *replay, int(historyIndex - history.begin())); historyIndex->shaderOut.stencil = oldStencil; } + historyIndex->shaderOutDualSrc.SetInvalid(); + historyIndex->blendSrc.SetInvalid(); + historyIndex->blendDst.SetInvalid(); historyIndex++; } diff --git a/renderdoc/driver/vulkan/vk_debug.h b/renderdoc/driver/vulkan/vk_debug.h index 84c5b2f076..2e0e4cf3a9 100644 --- a/renderdoc/driver/vulkan/vk_debug.h +++ b/renderdoc/driver/vulkan/vk_debug.h @@ -49,8 +49,6 @@ struct VKMeshDisplayPipelines uint32_t secondaryStridePadding = 0; }; -struct VkCopyPixelParams; - struct PixelHistoryResources; class VulkanResourceManager; @@ -97,10 +95,10 @@ class VulkanDebugManager bool PixelHistorySetupResources(PixelHistoryResources &resources, VkImage targetImage, VkExtent3D extent, VkFormat format, VkSampleCountFlagBits samples, const Subresource &sub, uint32_t numEvents); + bool PixelHistorySetupPerFragResources(PixelHistoryResources &resources, uint32_t numEvents, + uint32_t numFragments); bool PixelHistoryDestroyResources(const PixelHistoryResources &resources); - void PixelHistoryCopyPixel(VkCommandBuffer cmd, VkCopyPixelParams &p, size_t offset); - VkImageLayout GetImageLayout(ResourceId image, VkImageAspectFlagBits aspect, uint32_t mip, uint32_t slice); diff --git a/renderdoc/driver/vulkan/vk_pixelhistory.cpp b/renderdoc/driver/vulkan/vk_pixelhistory.cpp index 500ef7b28a..edde480857 100644 --- a/renderdoc/driver/vulkan/vk_pixelhistory.cpp +++ b/renderdoc/driver/vulkan/vk_pixelhistory.cpp @@ -63,11 +63,17 @@ * * - Fourth callback: Per fragment callback (VulkanPixelHistoryPerFragmentCallback) * This callback is used to get per fragment data for each event and fragment (primitive ID, - * shader output value, post event value for each fragment). - * For each fragment the draw is replayed 3 times: + * shader output value, dual source shader output value, blending source/destination components, + * and post event value for each fragment). + * For each fragment the draw is replayed 8 times: * 1) with a fragment shader that outputs primitive ID only * 2) with blending OFF, to get shader output value - * 3) with blending ON, to get post modification value + * 3) with blending OFF and a modified shader, to get the output at index 1 (dual source) + * 4) with blending ON, but with the stencil configured to only draw previous fragments + * 5) with blending ON but the destination factor set to 0, to get the source blend component + * 6) with blending ON, but with the stencil configured to only draw previous fragments + * 7) with blending ON but the source factor set to 0, to get the destination blend component + * 8) with blending ON, to get post modification value * For each such replay we set the stencil reference to the fragment number and set the * stencil compare to equal, so it passes for that particular fragment only. * @@ -81,6 +87,25 @@ * * We slot the per frament data correctly accounting for the fragments that were discarded. * + * Dual Source: + * + * To capture the output for both output indices when dual source blending is in use, we need to + * patch the shader so that ouptut index 1 is directed to output index 0. This might seem as simple + * as iterating through all of the outputs, and then for each Index decoration, swapping 0 with 1. + * Unfortunately, this does not work because index 0 is implicitly used if there is no Index + * decoration, meaning index 1 would get swapped into index 0, but index 0 would not get swapped to + * index 1, so multiple outputs end up at index 0. + * + * Another possible implementation would be to swap the target of any Index decorations which use + * index 1 to those which use index 0. However, there's no guarantee that there are the same number + * of outputs decorated with index 1 as with index 0, because the Component decoration means that + * e.g. index 0 could be written separately as ra and gb while index 1 is written as one output. + * + * The current implementation first computes what index values are currently in use for each output, + * and then NOPing out all Index decorations and adding new ones with the correctly swapped ones. + * Care is also taken to ensure that built-in outputs (those with the BuiltIn decoration) aren't + * given an Index decoration. + * * Current Limitations: * * - Multiple subpasses @@ -100,6 +125,9 @@ */ #include +#include +#include +#include #include "driver/shaders/spirv/spirv_editor.h" #include "driver/shaders/spirv/spirv_op_helpers.h" #include "maths/formatpacking.h" @@ -228,6 +256,9 @@ struct PerFragmentInfo int32_t primitiveID; uint32_t padding[3]; PixelHistoryValue shaderOut; + PixelHistoryValue shaderOutDualSrc; + PixelHistoryValue blendSrc; + PixelHistoryValue blendDst; PixelHistoryValue postMod; }; @@ -314,14 +345,31 @@ struct PixelHistoryShaderCache if(it != m_ShaderReplacements.end()) return it->second; - VkShaderModule shaderModule = CreateShaderReplacement(shaderId, entryPoint, stage); + VkShaderModule shaderModule = CreateShaderReplacement(shaderId, entryPoint, stage, false); m_ShaderReplacements.insert(std::make_pair(shaderKey, shaderModule)); return shaderModule; } + // Returns a shader that is equivalent to the given shader, except that index 0 outputs + // are directed to index 1 and index 1 outputs are directed to index 0 (as long as both exist). + // The shader is also stripped of side effects as is done by GetShaderWithoutSideEffects. + VkShaderModule GetDualSrcSwappedShader(ResourceId shaderId, const rdcstr &entryPoint, + ShaderStage stage) + { + ShaderKey shaderKey = make_rdcpair(shaderId, entryPoint); + auto it = m_ShaderReplacementsDualSrc.find(shaderKey); + // Check if we processed this shader before. + if(it != m_ShaderReplacementsDualSrc.end()) + return it->second; + + VkShaderModule shaderModule = CreateShaderReplacement(shaderId, entryPoint, stage, true); + m_ShaderReplacementsDualSrc.insert(std::make_pair(shaderKey, shaderModule)); + return shaderModule; + } + private: VkShaderModule CreateShaderReplacement(ResourceId shaderId, const rdcstr &entryName, - ShaderStage stage) + ShaderStage stage, bool swapDualSrc) { const VulkanCreationInfo::ShaderModule &moduleInfo = m_pDriver->GetDebugManager()->GetShaderInfo(shaderId); @@ -343,6 +391,8 @@ struct PixelHistoryShaderCache // just insert VK_NULL_HANDLE to indicate that this shader has been processed. found = true; modified = StripShaderSideEffects(editor, entry.id); + if(swapDualSrc) + modified |= SwapOutputIndex(editor, entry.usedIds, 1); break; } } @@ -589,6 +639,185 @@ struct PixelHistoryShaderCache return modified; } + bool SwapOutputIndex(rdcspv::Editor &editor, const rdcarray usedIds, + uint32_t index_to_swap_with_zero) + { + if(index_to_swap_with_zero == 0) + { + // Swapping 0 with 0 does nothing interesting + return false; + } + + bool modified = false; + + std::set outputs; + for(const rdcspv::Variable var : editor.GetGlobals()) + { + if(var.storage == rdcspv::StorageClass::Output && usedIds.contains(var.id)) + { + outputs.insert(var.id); + } + } + + // First pass: build up maps for location and index. + // Also NOP out any existing index declarations (but don't flag the shader as modified + // while doing so, as we might still decide that it doesn't need to be modified + // afterwards) + std::map id_to_location; + std::unordered_multimap location_to_id; + std::map id_to_index; + + std::set builtins; + + for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations), + end = editor.End(rdcspv::Section::Annotations); + it < end; ++it) + { + if(it.opcode() == rdcspv::Op::Decorate) + { + rdcspv::OpDecorate dec(it); + const rdcspv::Id decorated_id = dec.target; + if(dec.decoration == rdcspv::Decoration::Location && outputs.count(decorated_id) > 0) + { + const uint32_t decorated_location = dec.decoration.location; + + const auto insert_result = + id_to_location.emplace(std::make_pair(decorated_id, decorated_location)); + if(!insert_result.second) + { + // Note: by definition insert_result.first->first == decorated_id + const uint32_t existing_location = insert_result.first->second; + RDCWARN("Variable %u is decorated with multiple Locations: was %u, is %u", + decorated_id.value(), existing_location, decorated_location); + } + else + { + location_to_id.emplace(std::make_pair(decorated_location, decorated_id)); + } + } + if(dec.decoration == rdcspv::Decoration::Index && outputs.count(decorated_id) > 0) + { + const uint32_t decorated_index = dec.decoration.index; + + const auto insert_result = + id_to_index.emplace(std::make_pair(decorated_id, decorated_index)); + if(!insert_result.second) + { + // Note: by definition insert_result.first->first == decorated_id + const uint32_t existing_index = insert_result.first->second; + RDCWARN("Variable %u is decorated with multiple Indexes: was %u, is %u", + decorated_id.value(), existing_index, decorated_index); + } + + it.nopRemove(); + } + if(dec.decoration == rdcspv::Decoration::BuiltIn && outputs.count(decorated_id) > 0) + { + builtins.insert(decorated_id); + } + } + } + + std::unordered_map> location_to_indexes; + for(const auto entry : location_to_id) + { + const uint32_t location = entry.first; + const rdcspv::Id id = entry.second; + + const auto index_itr = id_to_index.find(id); + // Index is allowed to be absent, in which case it is implicitly zero + const uint32_t index = (index_itr != id_to_index.end()) ? index_itr->second : 0; + + location_to_indexes[location].insert(index); + } + + std::set locations_to_swap; + for(const auto &entry : location_to_indexes) + { + // https://registry.khronos.org/OpenGL/specs/gl/GLSLangSpec.4.60.html#output-layout-qualifiers + // The GLSL spec says that "Compile-time errors may also be given if at compile time it is + // known the link will fail. [...] It is also a compile-time error if a fragment shader sets a + // layout index to less than 0 or greater than 1." This is enforced by glslang. However, the + // Vulkan spec does not make this requirement (nor does the SPIRV spec). + // Note also that glslang will still compile a shader that uses index 1 but not index 0. + const uint32_t location = entry.first; + const std::set &indexes = entry.second; + + if(indexes.empty() || indexes.count(0) == 0) + { + RDCWARN("Output location %u does not output anything to index 0", location); + continue; + } + + const uint32_t highest_index = *indexes.rbegin(); + if(highest_index != (indexes.size() - 1)) + { + RDCWARN( + "Indexes for output location %u has a gap in the used indexes (highest used: %u, but " + "should be %zu based on size)", + location, highest_index, indexes.size() - 1); + continue; + } + + if(index_to_swap_with_zero <= highest_index) + { + // This location contains index 0 and the index we want to swap, + // so we can safely do the swap. + locations_to_swap.insert(location); + } + } + + for(const rdcspv::Id output : outputs) + { + if(builtins.count(output) > 0) + { + // VUID-StandaloneSpirv-Location-04915: "The Location or Component decorations must not be + // used with BuiltIn" + const auto location_itr = id_to_location.find(output); + if(location_itr != id_to_location.end()) + { + RDCWARN("Variable %u is a BuiltIn but is decorated with Location %u", output.value(), + location_itr->second); + continue; + } + } + else + { + // VUID-StandaloneSpirv-Location-04916: "The Location decorations must be used on + // user-defined variables" (Sec 15.1.2: "The non-built-in variables listed by OpEntryPoint + // with the Input or Output storage class form the user-defined variable interface.") + const auto location_itr = id_to_location.find(output); + if(location_itr == id_to_location.end()) + { + RDCWARN("Variable %u is missing a Location decoration", output.value()); + continue; + } + + const uint32_t location = location_itr->second; + + const auto index_itr = id_to_index.find(output); + // Index is allowed to be absent, in which case it is implicitly zero + const uint32_t existing_index = (index_itr != id_to_index.end()) ? index_itr->second : 0; + + uint32_t new_index = existing_index; + + if(locations_to_swap.count(location) != 0) + { + if(existing_index == 0) + new_index = index_to_swap_with_zero; + else if(existing_index == index_to_swap_with_zero) + new_index = 0; + modified = true; + } + + editor.AddDecoration(rdcspv::OpDecorate( + output, rdcspv::DecorationParam(new_index))); + } + } + + return modified; + } + WrappedVulkan *m_pDriver; std::map m_FixedColFS; std::map m_PrimIDFS; @@ -596,6 +825,7 @@ struct PixelHistoryShaderCache // ShaderKey consists of original shader module ID and entry point name. typedef rdcpair ShaderKey; std::map m_ShaderReplacements; + std::map m_ShaderReplacementsDualSrc; GPUBuffer dummybuf; }; @@ -1378,7 +1608,7 @@ struct VulkanPixelHistoryCallback : public VulkanActionCallback return (rpInfo.subpasses.size() > 1); } - // Returns teh color attachment index that corresponds to the target image for + // Returns the color attachment index that corresponds to the target image for // pixel history. uint32_t GetColorAttachmentIndex(const VulkanRenderState &renderstate, uint32_t *framebufferIndex = NULL) @@ -2654,8 +2884,17 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback VkPipeline primitiveIdPipe; // Turn off blending. VkPipeline shaderOutPipe; + // Turn off blending, and swaps output indexes to get the dual source output. + VkPipeline shaderOutDualSrcPipe; + // Enable blending but zero out the destination factor + VkPipeline blendSrcPipe; + // Enable blending but zero out the source factor + VkPipeline blendDstPipe; // Enable blending to get post event values. VkPipeline postModPipe; + // Enable blending to get post event values, and configure stencil to only draw up to + // immediately before the current event. + VkPipeline redrawToBeforePipe; }; void PreDraw(uint32_t eid, VkCommandBuffer cmd) @@ -2719,9 +2958,14 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback state.scissors[i].extent = {2, 2}; } - VkPipeline pipesIter[2]; + VkPipeline pipesIter[3] = {}; + uint32_t outputOffsetIter[3] = {}; pipesIter[0] = pipes.primitiveIdPipe; + outputOffsetIter[0] = offsetof(struct PerFragmentInfo, primitiveID); pipesIter[1] = pipes.shaderOutPipe; + outputOffsetIter[1] = offsetof(struct PerFragmentInfo, shaderOut); + pipesIter[2] = pipes.shaderOutDualSrcPipe; + outputOffsetIter[2] = offsetof(struct PerFragmentInfo, shaderOutDualSrc); VkCopyPixelParams colourCopyParams = {}; colourCopyParams.srcImage = m_CallbackInfo.subImage; @@ -2750,12 +2994,13 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback // Get primitive ID and shader output value for each fragment. for(uint32_t f = 0; f < numFragmentsInEvent; f++) { - for(uint32_t i = 0; i < 2; i++) - { - uint32_t storeOffset = (fragsProcessed + f) * sizeof(PerFragmentInfo); + const uint32_t fragStoreOffset = (fragsProcessed + f) * sizeof(PerFragmentInfo); - VkMarkerRegion region(cmd, StringFormat::Fmt("Getting %s for %u", - i == 0 ? "primitive ID" : "shader output", eid)); + for(uint32_t i = 0; i < (m_HasDualSrc ? 3u : 2u); i++) + { + const uint32_t storeOffset = fragStoreOffset + outputOffsetIter[i]; + const char *type = i == 0 ? "primitive ID" : i == 1 ? "shader output" : "shader output 2"; + VkMarkerRegion region(cmd, StringFormat::Fmt("Getting %s for %u", type, eid)); if(i == 0 && !m_pDriver->GetDeviceEnabledFeatures().geometryShader) { @@ -2772,7 +3017,7 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback // without one of the pipelines (e.g. if there was a geometry shader in use and we can't // read primitive ID in the fragment shader) we can't continue. // technically we can if the geometry shader outs a primitive ID, but that is unlikely. - VkMarkerRegion::Set("Can't get primitive ID with geometry shader in use", cmd); + VkMarkerRegion::Set(StringFormat::Fmt("No replacement shader in pipesIter[%d]", i), cmd); ObjDisp(cmd)->CmdFillBuffer(Unwrap(cmd), Unwrap(m_CallbackInfo.dstBuffer), storeOffset, 16, ~0U); @@ -2838,9 +3083,9 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback m_pDriver->ReplayDraw(cmd, *action); state.EndRenderPass(cmd); - if(i == 1) + CopyImagePixel(cmd, colourCopyParams, storeOffset); + if(i != 0) { - storeOffset += offsetof(struct PerFragmentInfo, shaderOut); if(depthEnabled) { VkCopyPixelParams depthCopyParams = colourCopyParams; @@ -2851,7 +3096,6 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback storeOffset + offsetof(struct PixelHistoryValue, depth)); } } - CopyImagePixel(cmd, colourCopyParams, storeOffset); } } @@ -2894,6 +3138,99 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback } } + // Get the blend source and destination components + const ModificationValue &premod = m_EventPremods[eid]; + if(m_HasBlend) + { + for(uint32_t f = 0; f < numFragmentsInEvent; f++) + { + for(uint32_t i = 0; i < 2; i++) + { + const uint32_t offset = (i == 0) ? offsetof(struct PerFragmentInfo, blendSrc) + : offsetof(struct PerFragmentInfo, blendDst); + VkMarkerRegion::Begin(StringFormat::Fmt("Prepare getting blend %s for fragment %u in %u", + i == 0 ? "source" : "destination", f, eid)); + + // Apply all draws BEFORE this point + // It's obvious why this is needed for the destination component; but for the source + // component, it is needed if the source factor uses the destination color or alpha. + state.graphics.pipeline = GetResID(pipes.redrawToBeforePipe); + state.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics, false); + + // Have to reset stencil. + VkClearAttachment stencilAtt = {}; + stencilAtt.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + VkClearRect rect = {}; + rect.rect.offset.x = m_CallbackInfo.x; + rect.rect.offset.y = m_CallbackInfo.y; + rect.rect.extent.width = 1; + rect.rect.extent.height = 1; + rect.baseArrayLayer = 0; + rect.layerCount = 1; + ObjDisp(cmd)->CmdClearAttachments(Unwrap(cmd), 1, &stencilAtt, 1, &rect); + + // Before starting the draw, initialize the pixel to the premodification value + // for this event, for both color and depth. + VkClearAttachment clearAtts[2] = {}; + + clearAtts[0].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + clearAtts[0].colorAttachment = colorOutputIndex; + memcpy(clearAtts[0].clearValue.color.float32, premod.col.floatValue.data(), + sizeof(clearAtts[0].clearValue.color)); + + clearAtts[1].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + clearAtts[1].clearValue.depthStencil.depth = premod.depth; + + if(IsDepthOrStencilFormat(m_CallbackInfo.targetImageFormat)) + ObjDisp(cmd)->CmdClearAttachments(Unwrap(cmd), 1, clearAtts + 1, 1, &rect); + else + ObjDisp(cmd)->CmdClearAttachments(Unwrap(cmd), 2, clearAtts, 1, &rect); + + // Note: pipes.redrawToBeforePipe has the stencil compare op set to VK_COMPARE_OP_GREATER, + // so this reference is not inclusive + ObjDisp(cmd)->CmdSetStencilCompareMask(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, 0xff); + ObjDisp(cmd)->CmdSetStencilWriteMask(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, 0xff); + ObjDisp(cmd)->CmdSetStencilReference(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, f); + + const ActionDescription *action = m_pDriver->GetAction(eid); + m_pDriver->ReplayDraw(cmd, *action); + + // Now actually draw the current one. Stencil op is set to VK_COMPARE_OP_EQUAL. + VkMarkerRegion::Set(StringFormat::Fmt("Getting blend %s for fragment %u in %u", + i == 0 ? "source" : "destination", f, eid), + cmd); + + state.graphics.pipeline = GetResID(i == 0 ? pipes.blendSrcPipe : pipes.blendDstPipe); + state.BindPipeline(m_pDriver, cmd, VulkanRenderState::BindGraphics, false); + + ObjDisp(cmd)->CmdClearAttachments(Unwrap(cmd), 1, &stencilAtt, 1, &rect); + ObjDisp(cmd)->CmdSetStencilCompareMask(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, 0xff); + ObjDisp(cmd)->CmdSetStencilWriteMask(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, 0xff); + ObjDisp(cmd)->CmdSetStencilReference(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, f); + + m_pDriver->ReplayDraw(cmd, *action); + state.EndRenderPass(cmd); + + CopyImagePixel(cmd, colourCopyParams, + (fragsProcessed + f) * sizeof(PerFragmentInfo) + offset); + + // TODO: is this useful? + if(depthImage != VK_NULL_HANDLE) + { + VkCopyPixelParams depthCopyParams = colourCopyParams; + depthCopyParams.srcImage = depthImage; + depthCopyParams.srcImageLayout = depthLayout; + depthCopyParams.srcImageFormat = depthFormat; + CopyImagePixel(cmd, depthCopyParams, (fragsProcessed + f) * sizeof(PerFragmentInfo) + + offset + + offsetof(struct PixelHistoryValue, depth)); + } + + VkMarkerRegion::End(); + } + } + } + // use the original renderpass and framebuffer attachment, but ensure we have depth and stencil state.SetRenderPass(prevState.GetRenderPass()); state.SetFramebuffer(prevState.GetFramebuffer(), prevState.GetFramebufferAttachments()); @@ -2912,7 +3249,6 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback GetResID(m_CallbackInfo.targetImage), aspect, m_CallbackInfo.targetSubresource.mip, m_CallbackInfo.targetSubresource.slice); - const ModificationValue &premod = m_EventPremods[eid]; // For every fragment except the last one, retrieve post-modification // value. for(uint32_t f = 0; f < numFragmentsInEvent - 1; f++) @@ -3055,14 +3391,24 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback EventFlags eventFlags = m_pDriver->GetEventFlags(eid); VkShaderModule replacementShaders[5] = {}; + VkShaderModule replacementDualSourceFragmentShader = VK_NULL_HANDLE; // Clean shaders uint32_t numberOfStages = 5; for(size_t i = 0; i < numberOfStages; i++) { - if((eventFlags & PipeStageRWEventFlags(StageFromIndex(i))) != EventFlags::NoFlags) + ShaderStage stage = StageFromIndex(i); + if(m_pDriver->GetDeviceEnabledFeatures().dualSrcBlend && stage == ShaderStage::Fragment) + { + replacementDualSourceFragmentShader = m_ShaderCache->GetDualSrcSwappedShader( + p.shaders[i].module, p.shaders[i].entryPoint, p.shaders[i].stage); + } + bool rwInStage = (eventFlags & PipeStageRWEventFlags(stage)) != EventFlags::NoFlags; + if(rwInStage) + { replacementShaders[i] = m_ShaderCache->GetShaderWithoutSideEffects( p.shaders[i].module, p.shaders[i].entryPoint, p.shaders[i].stage); + } } for(uint32_t i = 0; i < pipeCreateInfo.stageCount; i++) { @@ -3082,19 +3428,32 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback pipeCreateInfo.renderPass = rp; + ds->front.compareOp = VK_COMPARE_OP_GREATER; + ds->back.compareOp = VK_COMPARE_OP_GREATER; + + // Note: this uses the modified renderpass, as we switch to a different pipeline with the + // modified renderpass + vkr = m_pDriver->vkCreateGraphicsPipelines(m_pDriver->GetDev(), VK_NULL_HANDLE, 1, + &pipeCreateInfo, NULL, &pipes.redrawToBeforePipe); + m_pDriver->CheckVkResult(vkr); + m_PipesToDestroy.push_back(pipes.redrawToBeforePipe); + + // Revert to the previous stencil op + ds->front.compareOp = VK_COMPARE_OP_EQUAL; + ds->back.compareOp = VK_COMPARE_OP_EQUAL; + VkPipelineColorBlendStateCreateInfo *cbs = (VkPipelineColorBlendStateCreateInfo *)pipeCreateInfo.pColorBlendState; - // Turn off blending so that we can get shader output values. - VkPipelineColorBlendAttachmentState *atts = - (VkPipelineColorBlendAttachmentState *)cbs->pAttachments; + rdcarray newAtts; + newAtts.resize(colorOutputIndex == cbs->attachmentCount ? cbs->attachmentCount + 1 + : cbs->attachmentCount); + memcpy(newAtts.data(), cbs->pAttachments, + cbs->attachmentCount * sizeof(VkPipelineColorBlendAttachmentState)); // Check if we need to add a new color attachment. if(colorOutputIndex == cbs->attachmentCount) { - newAtts.resize(cbs->attachmentCount + 1); - memcpy(newAtts.data(), cbs->pAttachments, - cbs->attachmentCount * sizeof(VkPipelineColorBlendAttachmentState)); VkPipelineColorBlendAttachmentState newAtt = {}; if(cbs->attachmentCount > 0) { @@ -3105,22 +3464,28 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback else { newAtt.blendEnable = VK_FALSE; - newAtt.srcColorBlendFactor = VK_BLEND_FACTOR_DST_COLOR; } - newAtts[cbs->attachmentCount] = newAtt; - cbs->attachmentCount = (uint32_t)newAtts.size(); - cbs->pAttachments = newAtts.data(); - - atts = newAtts.data(); + newAtts[colorOutputIndex] = newAtt; + cbs->logicOpEnable = VK_FALSE; } + cbs->attachmentCount = (uint32_t)newAtts.size(); + cbs->pAttachments = newAtts.data(); + + // Turn off blending so that we can get shader output values. for(uint32_t i = 0; i < cbs->attachmentCount; i++) { - atts[i].blendEnable = 0; - atts[i].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + if(newAtts[i].blendEnable) + m_HasBlend = true; + newAtts[i].blendEnable = VK_FALSE; + newAtts[i].colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; } + // Logic ops treat it as if blendEnable were false + if(cbs->logicOpEnable) + m_HasBlend = false; + { ds->depthBoundsTestEnable = VK_FALSE; ds->depthWriteEnable = VK_TRUE; @@ -3133,6 +3498,73 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback m_PipesToDestroy.push_back(pipes.shaderOutPipe); + if(replacementDualSourceFragmentShader != VK_NULL_HANDLE) + { + m_HasDualSrc = true; + rdcarray dual_src_stages = stages; + for(uint32_t i = 0; i < pipeCreateInfo.stageCount; i++) + { + if(dual_src_stages[i].stage == VK_SHADER_STAGE_FRAGMENT_BIT) + dual_src_stages[i].module = replacementDualSourceFragmentShader; + } + + pipeCreateInfo.pStages = dual_src_stages.data(); + + vkr = m_pDriver->vkCreateGraphicsPipelines(m_pDriver->GetDev(), VK_NULL_HANDLE, 1, + &pipeCreateInfo, NULL, &pipes.shaderOutDualSrcPipe); + m_pDriver->CheckVkResult(vkr); + + m_PipesToDestroy.push_back(pipes.shaderOutDualSrcPipe); + + pipeCreateInfo.pStages = stages.data(); + } + + if(m_HasBlend) + { + rdcarray blendModifiedAtts; + blendModifiedAtts.resize(newAtts.size()); + cbs->pAttachments = blendModifiedAtts.data(); + + // Get the source component (shader out * source factor) of the blend equation + for(uint32_t i = 0; i < cbs->attachmentCount; i++) + { + blendModifiedAtts[i] = newAtts[i]; + blendModifiedAtts[i].blendEnable = VK_TRUE; + // Without changing the op, VK_BLEND_OP_REVERSE_SUBTRACT would cause problems + blendModifiedAtts[i].colorBlendOp = VK_BLEND_OP_ADD; + blendModifiedAtts[i].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; + blendModifiedAtts[i].alphaBlendOp = VK_BLEND_OP_ADD; + blendModifiedAtts[i].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + } + + vkr = m_pDriver->vkCreateGraphicsPipelines(m_pDriver->GetDev(), VK_NULL_HANDLE, 1, + &pipeCreateInfo, NULL, &pipes.blendSrcPipe); + m_pDriver->CheckVkResult(vkr); + + m_PipesToDestroy.push_back(pipes.blendSrcPipe); + + // Get the destination component (pre mod * destination factor) of the blend equation + for(uint32_t i = 0; i < cbs->attachmentCount; i++) + { + blendModifiedAtts[i] = newAtts[i]; + blendModifiedAtts[i].blendEnable = VK_TRUE; + // Without changing the op, VK_BLEND_OP_SUBTRACT would cause problems + blendModifiedAtts[i].colorBlendOp = VK_BLEND_OP_ADD; + blendModifiedAtts[i].srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; + blendModifiedAtts[i].alphaBlendOp = VK_BLEND_OP_ADD; + blendModifiedAtts[i].srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; + } + + vkr = m_pDriver->vkCreateGraphicsPipelines(m_pDriver->GetDev(), VK_NULL_HANDLE, 1, + &pipeCreateInfo, NULL, &pipes.blendDstPipe); + m_pDriver->CheckVkResult(vkr); + + m_PipesToDestroy.push_back(pipes.blendDstPipe); + + // Return to blending being disabled + cbs->pAttachments = newAtts.data(); + } + { ds->depthTestEnable = VK_FALSE; ds->depthWriteEnable = VK_FALSE; @@ -3212,6 +3644,9 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback return it->second; } + bool m_HasBlend = false; + bool m_HasDualSrc = false; + private: // For each event, specifies where the occlusion query results start. std::map m_EventIndices; @@ -3361,9 +3796,9 @@ bool VulkanDebugManager::PixelHistorySetupResources(PixelHistoryResources &resou VkFormat format, VkSampleCountFlagBits samples, const Subresource &sub, uint32_t numEvents) { - VkMarkerRegion region(StringFormat::Fmt("PixelHistorySetupResources %ux%ux%u %s %ux MSAA", - extent.width, extent.height, extent.depth, - ToStr(format).c_str(), samples)); + VkMarkerRegion region( + StringFormat::Fmt("PixelHistorySetupResources %ux%ux%u %s %ux MSAA, %u events", extent.width, + extent.height, extent.depth, ToStr(format).c_str(), samples, numEvents)); VulkanCreationInfo::Image targetImageInfo = GetImageInfo(GetResID(targetImage)); VkImage colorImage; @@ -3473,8 +3908,6 @@ bool VulkanDebugManager::PixelHistorySetupResources(PixelHistoryResources &resou CheckVkResult(vkr); VkBufferCreateInfo bufferInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; - // TODO: the size for memory is calculated to fit pre and post modification values and - // stencil values. But we might run out of space when getting per fragment data. bufferInfo.size = AlignUp((uint32_t)(numEvents * sizeof(EventInfo)), 4096U); bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; @@ -3530,6 +3963,75 @@ bool VulkanDebugManager::PixelHistorySetupResources(PixelHistoryResources &resou return true; } +bool VulkanDebugManager::PixelHistorySetupPerFragResources(PixelHistoryResources &resources, + uint32_t numEvents, uint32_t numFrags) +{ + const uint32_t existingBufferSize = AlignUp((uint32_t)(numEvents * sizeof(EventInfo)), 4096U); + const uint32_t requiredBufferSize = AlignUp((uint32_t)(numFrags * sizeof(PerFragmentInfo)), 4096U); + + // If the existing buffer is big enough for all of the fragments, we can re-use it. + const bool canReuseBuffer = existingBufferSize >= requiredBufferSize; + + VkMarkerRegion region(StringFormat::Fmt( + "PixelHistorySetupPerFragResources %u events %u frags, buffer size %u -> %u, %s old buffer", + numEvents, numFrags, existingBufferSize, requiredBufferSize, + canReuseBuffer ? "reusing" : "NOT reusing")); + + if(canReuseBuffer) + return true; + + // Otherwise, destroy it and create a new one that's big enough in its place. + VkDevice dev = m_pDriver->GetDev(); + + if(resources.dstBuffer != VK_NULL_HANDLE) + m_pDriver->vkDestroyBuffer(dev, resources.dstBuffer, NULL); + if(resources.bufferMemory != VK_NULL_HANDLE) + m_pDriver->vkFreeMemory(dev, resources.bufferMemory, NULL); + resources.dstBuffer = VK_NULL_HANDLE; + resources.bufferMemory = VK_NULL_HANDLE; + + VkBufferCreateInfo bufferInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bufferInfo.size = requiredBufferSize; + bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + VkResult vkr = m_pDriver->vkCreateBuffer(m_Device, &bufferInfo, NULL, &resources.dstBuffer); + CheckVkResult(vkr); + + // Allocate memory + VkMemoryRequirements mrq = {}; + m_pDriver->vkGetBufferMemoryRequirements(m_Device, resources.dstBuffer, &mrq); + VkMemoryAllocateInfo allocInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, mrq.size, + m_pDriver->GetReadbackMemoryIndex(mrq.memoryTypeBits), + }; + vkr = m_pDriver->vkAllocateMemory(m_Device, &allocInfo, NULL, &resources.bufferMemory); + CheckVkResult(vkr); + + if(vkr != VK_SUCCESS) + return false; + + vkr = m_pDriver->vkBindBufferMemory(m_Device, resources.dstBuffer, resources.bufferMemory, 0); + CheckVkResult(vkr); + + VkCommandBuffer cmd = m_pDriver->GetNextCmd(); + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + if(cmd == VK_NULL_HANDLE) + return false; + + vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); + CheckVkResult(vkr); + ObjDisp(cmd)->CmdFillBuffer(Unwrap(cmd), Unwrap(resources.dstBuffer), 0, VK_WHOLE_SIZE, 0); + + vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd)); + CheckVkResult(vkr); + m_pDriver->SubmitCmds(); + m_pDriver->FlushQ(); + + return true; +} + VkDescriptorSet VulkanReplay::GetPixelHistoryDescriptor() { VkDescriptorSet descSet; @@ -3972,6 +4474,9 @@ rdcarray VulkanReplay::PixelHistory(rdcarray even mod.preMod.SetInvalid(); mod.postMod.SetInvalid(); mod.shaderOut.SetInvalid(); + mod.shaderOutDualSrc.SetInvalid(); + mod.blendSrc.SetInvalid(); + mod.blendDst.SetInvalid(); h++; continue; } @@ -3999,6 +4504,9 @@ rdcarray VulkanReplay::PixelHistory(rdcarray even int32_t fragsClipped = int32_t(ei.dsWithShaderDiscard[4]); mod.shaderOut.col.intValue[0] = frags; mod.shaderOut.col.intValue[1] = fragsClipped; + mod.shaderOutDualSrc.SetInvalid(); + mod.blendSrc.SetInvalid(); + mod.blendDst.SetInvalid(); bool someFragsClipped = (fragsClipped < frags); mod.primitiveID = someFragsClipped; // Draws in secondary command buffers will fail this check, @@ -4029,6 +4537,17 @@ rdcarray VulkanReplay::PixelHistory(rdcarray even if(eventsWithFrags.size() > 0) { + uint32_t numFrags = 0; + for(auto &item : eventsWithFrags) + { + numFrags += item.second; + } + + GetDebugManager()->PixelHistorySetupPerFragResources(resources, (uint32_t)events.size(), + numFrags); + + callbackInfo.dstBuffer = resources.dstBuffer; + // Replay to get shader output value, post modification value and primitive ID for every // fragment. VulkanPixelHistoryPerFragmentCallback perFragmentCB(m_pDriver, shaderCache, callbackInfo, @@ -4145,6 +4664,22 @@ rdcarray VulkanReplay::PixelHistory(rdcarray even { history[h].preMod = history[h - 1].postMod; } + + if(perFragmentCB.m_HasDualSrc) + FillInColor(shaderOutFormat, bp[offset].shaderOutDualSrc, history[h].shaderOutDualSrc); + else + history[h].shaderOutDualSrc.SetInvalid(); + + if(perFragmentCB.m_HasBlend) + { + FillInColor(shaderOutFormat, bp[offset].blendSrc, history[h].blendSrc); + FillInColor(shaderOutFormat, bp[offset].blendDst, history[h].blendDst); + } + else + { + history[h].blendSrc.SetInvalid(); + history[h].blendDst.SetInvalid(); + } } // check the depth value between premod/shaderout against the known test if we have valid @@ -4194,6 +4729,8 @@ rdcarray VulkanReplay::PixelHistory(rdcarray even history[h].depthBoundsFailed = true; } } + + m_pDriver->vkUnmapMemory(dev, resources.bufferMemory); } SAFE_DELETE(tfCb); diff --git a/renderdoc/replay/renderdoc_serialise.inl b/renderdoc/replay/renderdoc_serialise.inl index 058149e3bb..4fd593c975 100644 --- a/renderdoc/replay/renderdoc_serialise.inl +++ b/renderdoc/replay/renderdoc_serialise.inl @@ -893,6 +893,9 @@ void DoSerialise(SerialiserType &ser, PixelModification &el) SERIALISE_MEMBER(preMod); SERIALISE_MEMBER(shaderOut); + SERIALISE_MEMBER(shaderOutDualSrc); + SERIALISE_MEMBER(blendSrc); + SERIALISE_MEMBER(blendDst); SERIALISE_MEMBER(postMod); SERIALISE_MEMBER(sampleMasked); @@ -906,7 +909,7 @@ void DoSerialise(SerialiserType &ser, PixelModification &el) SERIALISE_MEMBER(stencilTestFailed); SERIALISE_MEMBER(predicationSkipped); - SIZE_CHECK(100); + SIZE_CHECK(172); } template