diff options
author | CrystalP <crystalp@kodi.tv> | 2023-07-22 00:54:08 -0400 |
---|---|---|
committer | CrystalP <crystalp@kodi.tv> | 2023-07-29 14:14:17 -0400 |
commit | 46dd53e0a4be248338dbc5d8994f93cf8228a53c (patch) | |
tree | 7387b7963529076d2f045a25c8336c3345bd3b0a | |
parent | bb4fd820267b69aba8e6ba546a78ae7d755d8589 (diff) |
[dxva] Add synchronization for the separate decoder device / shared buffers path
This fixes flashes or wrong picture when skipping in videos for AMD/all render methods
and nVidia/software renderer. No issue with Intel.
The issue is due to the lack of synchronization between the main d3d device and the device
of the decoder (when separate). Video decoding completes after the surface has already been
rendered, maybe even presented.
ID3D11Fence, which fixes both AMD and nVidia, is available on Windows 10 Creators Update and above only.
ID3D11Query doesn't help with AMD, unknown status for nVidia.
The GPU fence is set right after the picture decode is queued and a GPU wait instruction is placed
in the queue of the device used for rendering.
-rw-r--r-- | xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp | 129 | ||||
-rw-r--r-- | xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h | 16 | ||||
-rw-r--r-- | xbmc/rendering/dx/DeviceResources.cpp | 8 | ||||
-rw-r--r-- | xbmc/rendering/dx/DeviceResources.h | 2 |
4 files changed, 150 insertions, 5 deletions
diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp index 5ba6cc3ca1..ffaa06604e 100644 --- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp +++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp @@ -32,7 +32,6 @@ #include <mutex> #include <Windows.h> -#include <d3d11_4.h> #include <dxva.h> #include <initguid.h> #include <sdkddkver.h> @@ -783,6 +782,12 @@ void DXVA::CVideoBuffer::Unref() av_frame_unref(m_pFrame); } +CVideoBufferShared::~CVideoBufferShared() +{ + if (m_handleFence != INVALID_HANDLE_VALUE) + CloseHandle(m_handleFence); +} + HRESULT CVideoBufferShared::GetResource(ID3D11Resource** ppResource) { HRESULT hr = S_OK; @@ -793,13 +798,49 @@ HRESULT CVideoBufferShared::GetResource(ID3D11Resource** ppResource) { // open resource on app device ComPtr<ID3D11Device> pD3DDevice = DX::DeviceResources::Get()->GetD3DDevice(); - hr = pD3DDevice->OpenSharedResource(handle, __uuidof(ID3D11Resource), &m_sharedRes); + if (FAILED(hr = pD3DDevice->OpenSharedResource(handle, __uuidof(ID3D11Resource), &m_sharedRes))) + { + CLog::LogF(LOGDEBUG, "unable to open the shared resource, error description: {}", + DX::GetErrorDescription(hr)); + return hr; + } + + // open fence on app device. Errors if any are not blocking, log only + if (m_handleFence != INVALID_HANDLE_VALUE) + { + ComPtr<ID3D11DeviceContext1> context1 = DX::DeviceResources::Get()->GetImmediateContext(); + ComPtr<ID3D11Device5> device5; + if (FAILED(hr = context1.As(&m_appContext4))) + { + CLog::LogF(LOGDEBUG, "ID3D11DeviceContext4 is not available, error description: {}", + DX::GetErrorDescription(hr)); + } + else if (FAILED(hr = pD3DDevice.As(&device5))) + { + CLog::LogF(LOGDEBUG, "ID3D11Device5 is not available, error description: {}", + DX::GetErrorDescription(hr)); + m_appContext4 = nullptr; + } + else if (FAILED(hr = device5->OpenSharedFence(m_handleFence, IID_PPV_ARGS(&m_appFence)))) + { + CLog::LogF(LOGDEBUG, "unable to open the shared fence, error description: {}", + DX::GetErrorDescription(hr)); + m_appContext4 = nullptr; + } + } } - if (SUCCEEDED(hr)) - hr = m_sharedRes.CopyTo(ppResource); + if (m_appFence) + { + // Make the GPU wait for the fence value that produced the picture + if (FAILED(hr = m_appContext4->Wait(m_appFence.Get(), m_fenceValue))) + { + CLog::LogF(LOGDEBUG, "error waiting for the fence value, error description: {}", + DX::GetErrorDescription(hr)); + } + } - return hr; + return m_sharedRes.CopyTo(ppResource); } void CVideoBufferShared::Initialize(CDecoder* decoder) @@ -807,7 +848,85 @@ void CVideoBufferShared::Initialize(CDecoder* decoder) CVideoBuffer::Initialize(decoder); if (handle == INVALID_HANDLE_VALUE) + { handle = decoder->m_sharedHandle; + if (DX::DeviceResources::Get()->UseFence()) + InitializeFence(decoder); + } + // Set the fence to wait until this picture is ready + SetFence(); +} + +void CVideoBufferShared::InitializeFence(CDecoder* decoder) +{ + if (!decoder) + { + CLog::LogF(LOGERROR, "NULL decoder"); + return; + } + + CLog::LogF(LOGDEBUG, "activating fence synchronization."); + + ComPtr<ID3D11Device> device; + decoder->m_pD3D11Context->GetDevice(&device); + ComPtr<ID3D11DeviceContext> immediateContext; + device->GetImmediateContext(&immediateContext); + ComPtr<ID3D11Device5> d3ddev5; + + HRESULT hr; + if (FAILED(hr = immediateContext.As(&m_deviceContext4))) + { + CLog::LogF(LOGDEBUG, "ID3D11DeviceContext4 is not available, error description: {}", + DX::GetErrorDescription(hr)); + goto error; + } + + if (FAILED(hr = device.As(&d3ddev5))) + { + CLog::LogF(LOGDEBUG, "ID3D11Device5 is not available, error description: {}", + DX::GetErrorDescription(hr)); + goto error; + } + + if (FAILED(hr = d3ddev5->CreateFence(0, D3D11_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence)))) + { + CLog::LogF(LOGDEBUG, "unable to create ID3D11Fence, error description: {}", + DX::GetErrorDescription(hr)); + goto error; + } + + if (FAILED(hr = m_fence->CreateSharedHandle(NULL, GENERIC_ALL, NULL, &m_handleFence))) + { + CLog::LogF(LOGDEBUG, "unable to create the shared handle of the fence, error description: {}", + DX::GetErrorDescription(hr)); + goto error; + } + + CLog::LogF(LOGINFO, "fence synchronization activated."); + + return; + +error: + CLog::LogF(LOGWARNING, "The dxva decoder will run without fence synchronization of the shared " + "surfaces with the main device."); + + m_deviceContext4 = nullptr; + m_fence = nullptr; + m_handleFence = INVALID_HANDLE_VALUE; +} + +void CVideoBufferShared::SetFence() +{ + if (m_fence) + { + static UINT64 fenceValue = 0; + // Not called from multiple threads, no synchronization needed to increment + fenceValue++; + + m_fenceValue = fenceValue; + + m_deviceContext4->Signal(m_fence.Get(), m_fenceValue); + } } void CVideoBufferCopy::Initialize(CDecoder* decoder) diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h index 969069cb5d..f633eead60 100644 --- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h +++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h @@ -16,6 +16,7 @@ #include <mutex> #include <vector> +#include <d3d11_4.h> #include <wrl/client.h> extern "C" { @@ -62,13 +63,28 @@ class CVideoBufferShared : public CVideoBuffer public: HRESULT GetResource(ID3D11Resource** ppResource) override; void Initialize(CDecoder* decoder) override; + virtual ~CVideoBufferShared(); protected: explicit CVideoBufferShared(int id) : CVideoBuffer(id) {} + void InitializeFence(CDecoder* decoder); + void SetFence(); HANDLE handle = INVALID_HANDLE_VALUE; Microsoft::WRL::ComPtr<ID3D11Resource> m_sharedRes; + + /*! \brief decoder-side fence object */ + Microsoft::WRL::ComPtr<ID3D11Fence> m_fence; + /*! \brief decoder-side context */ + Microsoft::WRL::ComPtr<ID3D11DeviceContext4> m_deviceContext4; + /*! \brief fence shared handle that allows opening the fence on a different device */ + HANDLE m_handleFence{INVALID_HANDLE_VALUE}; + UINT64 m_fenceValue{0}; + /*! \brief app-side fence object */ + Microsoft::WRL::ComPtr<ID3D11Fence> m_appFence; + /*! \brief app-side context */ + Microsoft::WRL::ComPtr<ID3D11DeviceContext4> m_appContext4; }; class CVideoBufferCopy : public CVideoBufferShared diff --git a/xbmc/rendering/dx/DeviceResources.cpp b/xbmc/rendering/dx/DeviceResources.cpp index 851642f270..fca6770345 100644 --- a/xbmc/rendering/dx/DeviceResources.cpp +++ b/xbmc/rendering/dx/DeviceResources.cpp @@ -1163,6 +1163,14 @@ void DX::DeviceResources::CheckDXVA2SharedDecoderSurfaces() CLog::LogF(LOGINFO, "DXVA2 shared decoder surfaces is{}supported", m_DXVA2SharedDecoderSurfaces ? " " : " NOT "); + m_DXVA2UseFence = m_DXVA2SharedDecoderSurfaces && + (ad.VendorId == PCIV_NVIDIA || ad.VendorId == PCIV_AMD) && + CSysInfo::IsWindowsVersionAtLeast(CSysInfo::WindowsVersionWin10_1703); + + if (m_DXVA2SharedDecoderSurfaces) + CLog::LogF(LOGINFO, "DXVA2 shared decoder surfaces {} fence synchronization.", + m_DXVA2UseFence ? "WITH" : "WITHOUT"); + m_DXVASuperResolutionSupport = m_d3dFeatureLevel >= D3D_FEATURE_LEVEL_12_1 && ((ad.VendorId == PCIV_Intel && driver.valid && driver.majorVersion >= 31) || diff --git a/xbmc/rendering/dx/DeviceResources.h b/xbmc/rendering/dx/DeviceResources.h index 123517c29a..d72ea9f9f6 100644 --- a/xbmc/rendering/dx/DeviceResources.h +++ b/xbmc/rendering/dx/DeviceResources.h @@ -115,6 +115,7 @@ namespace DX bool IsNV12SharedTexturesSupported() const { return m_NV12SharedTexturesSupport; } bool IsDXVA2SharedDecoderSurfaces() const { return m_DXVA2SharedDecoderSurfaces; } bool IsSuperResolutionSupported() const { return m_DXVASuperResolutionSupport; } + bool UseFence() const { return m_DXVA2UseFence; } // Gets debug info from swapchain DEBUG_INFO_RENDER GetDebugInfo() const; @@ -188,5 +189,6 @@ namespace DX bool m_DXVA2SharedDecoderSurfaces{false}; bool m_DXVASuperResolutionSupport{false}; bool m_usedSwapChain{false}; + bool m_DXVA2UseFence{false}; }; } |