最近看了下微软的Media Foundation,发现Media Foundation里面的EVR使用比较旧的Direct3D API,于是想直接使用Direct3D 11 Video API来播放视频。并且可以在视频上添加图片和显示文字,又不想使用Media Foundation的pipeline。视频采集利用Media Foundation 的source reader,或者使用ffMpeg获取视频,然后直接通过Direct3D 11/12 Video API来渲染视频。这里只给出采样后的实现部分:
HRESULT CDevice::ProcessSample(ComPtr<IMFMediaType>& pType, UINT32* punInterlaceMode, IMFSample *pSample)
{
    HRESULT hr = S_OK;
    do
    {
        DWORD cCounts;
        hr = pSample->GetBufferCount(&cCounts);
        if(FAILED(hr)) break;
        ComPtr<IMFMediaBuffer> mfMB;
        if(cCounts == 1)
        {
            hr = pSample->GetBufferByIndex(0, &mfMB);
        }
        if(FAILED(hr)) break;
        MFVideoInterlaceMode unInterlaceMode = (MFVideoInterlaceMode) MFGetAttributeUINT32( pType.Get(), MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive );
        if (MFVideoInterlace_MixedInterlaceOrProgressive == unInterlaceMode)
        {
            BOOL fInterlaced = MFGetAttributeUINT32( pSample, MFSampleExtension_Interlaced, FALSE );
            if ( !fInterlaced )
            {
                *punInterlaceMode = MFVideoInterlace_Progressive;
            }
            else
            {
                BOOL fBottomFirst = MFGetAttributeUINT32( pSample, MFSampleExtension_BottomFieldFirst, FALSE );
                if ( fBottomFirst )
                {
                    *punInterlaceMode = MFVideoInterlace_FieldInterleavedLowerFirst;
                }
                else
                {
                    *punInterlaceMode = MFVideoInterlace_FieldInterleavedUpperFirst;
                }
            }
        }
        ComPtr<IMFDXGIBuffer> dxgiBuffer;
        hr = mfMB.As(&dxgiBuffer);
        if(FAILED(hr)) break;
        ComPtr<ID3D11Texture2D> d3d11Texture;
        hr = dxgiBuffer->GetResource(IID_PPV_ARGS(&d3d11Texture));
        if(FAILED(hr)) break;
        UINT dwViewIndex;
        hr = dxgiBuffer->GetSubresourceIndex(&dwViewIndex);
        if(FAILED(hr)) break;
        RECT rcDest;
        ZeroMemory(&rcDest, sizeof(rcDest));
        GetClientRect(mHWnd, &rcDest);
        if(IsRectEmpty(&rcDest)) break;
        ProcessFrameUsingD3D11(d3d11Texture.Get(), dwViewIndex, rcDest, *punInterlaceMode, NULL);
    } while (FALSE);
    return hr;
}
HRESULT CDevice::ProcessFrameWithD3D11(ID3D11Texture2D *pInTexture2D, UINT dwInViewIndex, RECT rcDest, UINT32 unInterlaceMode, IMFSample **ppVideoOutFrame)
{
    HRESULT hr = S_OK;
    do
    {
        if (!mD3D11VD)
        {
            hr = mD3D11Device.As(&mD3D11VD);
            if (FAILED(hr))
                break;
        }
        ComPtr<ID3D11VideoContext> d3d11VC;
        hr = mD3D11DC.As(&d3d11VC);
        if (FAILED(hr))
            break;
        // remember the original rectangles
        RECT TRectOld = mDstRect;
        RECT SRectOld = mDstRect;
        UpdateRectangles(&TRectOld, &SRectOld);
        //Update destination rect with current client rect
        mDstRect = rcDest;
        D3D11_TEXTURE2D_DESC surfaceDesc;
        pInTexture2D->GetDesc(&surfaceDesc);
        if(!mD3D11VPE || !mD3D11VP || m_imageHeightInPixels != surfaceDesc.Height || m_imageWidthInPixels != surfaceDesc.Width)
        {
            m_imageWidthInPixels = surfaceDesc.Width;
            m_imageHeightInPixels = surfaceDesc.Height;
            D3D11_VIDEO_PROCESSOR_CONTENT_DESC contentDesc;
            ZeroMemory(&contentDesc, sizeof(contentDesc));
            contentDesc.InputFrameFormat = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
            contentDesc.InputWidth = surfaceDesc.Width;
            contentDesc.InputHeight = surfaceDesc.Height;
            contentDesc.OutputWidth = surfaceDesc.Width;
            contentDesc.OutputHeight = surfaceDesc.Height;
            contentDesc.Usage = D3D11_VIDEO_USAGE_PLAYBACK_NORMAL;
            hr = mD3D11VD->CreateVideoProcessorEnumerator(&contentDesc, &mD3D11VPE);
            if(FAILED(hr)) break;
            UINT uiFlags;
            DXGI_FORMAT vp_output_format = DXGI_FORMAT_B8G8R8A8_UNORM;
            hr = mD3D11VPE->CheckVideoProcessorFormat(vp_output_format, &uiFlags);
            if(FAILED(hr) || 0 == (uiFlags & D3D11_VIDEO_PROCESSOR_FORMAT_SUPPORT_OUTPUT))
            {
                hr = MF_E_UNSUPPORTED_D3D_TYPE;
                break;
            }
            mSrcRect.left = 0;
            mSrcRect.top = 0;
            mSrcRect.right = m_uiRealDisplayWidth;
            mSrcRect.bottom = m_uiRealDisplayHeight;
            DWORD index;
            hr = FindBOBProcessorIndex(&index);
            if(FAILED(hr)) break;
            hr = mD3D11VD->CreateVideoProcessor(mD3D11VPE.Get(), index, &mD3D11VP);
            if(FAILED(hr)) break;
        }
        RECT TRect = mDstRect;
        RECT SRect = mSrcRect;
        UpdateRectangles(&TRect, &SRect);
        const BOOL fDestRectChanged = !EqualRect(&TRect, &TRectOld);
        if(fDestRectChanged)
        {
            // Get the DXGISwapChain1
            DXGI_SWAP_CHAIN_DESC1 scd;
            ZeroMemory(&scd, sizeof(scd));
            scd.SampleDesc.Count = 1;
            scd.SampleDesc.Quality = 0;
            scd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
            scd.Scaling = DXGI_SCALING_STRETCH;
            scd.Width = mDstRect.right;
            scd.Height = mDstRect.bottom;
            scd.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
            scd.Stereo = FALSE;
            scd.BufferUsage = DXGI_USAGE_BACK_BUFFER | DXGI_USAGE_RENDER_TARGET_OUTPUT;
            scd.Flags = 0; //opt in to do direct flip;
            scd.BufferCount = 4;
            hr = mDXGISC1->ResizeBuffers
                (
                4,
                mDstRect.right,
                mDstRect.bottom,
                scd.Format,
                scd.Flags
                );
            if(FAILED(hr)) break;
        }
        ComPtr<ID3D11Texture2D> pTexture2D;
        hr = mDXGISC1->GetBuffer(0, IID_PPV_ARGS(&pTexture2D));
        if(FAILED(hr)) break;
        ComPtr<IMFSample> pSample;
        hr = MFCreateSample(&pSample);
        if(FAILED(hr)) break;
        ComPtr<IMFMediaBuffer> pMediaBuffer;
        hr = MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), pTexture2D.Get(), 0, FALSE, &pMediaBuffer);
        if(FAILED(hr)) break;
        hr = pSample->AddBuffer(pMediaBuffer.Get());
        if(FAILED(hr)) break;
        D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC outputViewDesc;
        ZeroMemory(&outputViewDesc, sizeof(outputViewDesc));
        outputViewDesc.ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D;
        outputViewDesc.Texture2D.MipSlice = 0;
        outputViewDesc.Texture2DArray.FirstArraySlice = 0;
        outputViewDesc.Texture2DArray.MipSlice = 0;
        ComPtr<ID3D11VideoProcessorOutputView> pOutputView;
        hr = mD3D11VD->CreateVideoProcessorOutputView(pTexture2D.Get(), mD3D11VPE.Get(), &outputViewDesc, &pOutputView);
        if(FAILED(hr)) break;
        D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC inputViewDesc;
        ZeroMemory(&inputViewDesc, sizeof(inputViewDesc));
        inputViewDesc.ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D;
        inputViewDesc.FourCC = 0;
        inputViewDesc.Texture2D.ArraySlice = dwInViewIndex;
        inputViewDesc.Texture2D.MipSlice = 0;
        ComPtr<ID3D11VideoProcessorInputView> pInputView;
        hr = mD3D11VD->CreateVideoProcessorInputView(pInTexture2D, mD3D11VPE.Get(), &inputViewDesc, &pInputView);
        if(FAILED(hr)) break;
        SetVideoContextParameters(d3d11VC.Get(), &SRect, &TRect, unInterlaceMode);
        D3D11_VIDEO_PROCESSOR_STREAM streamData;
        ZeroMemory(&streamData, sizeof(streamData));
        streamData.Enable = TRUE;
        streamData.OutputIndex = 0;
        streamData.InputFrameOrField = 0;
        streamData.PastFrames = 0;
        streamData.FutureFrames = 0;
        streamData.ppPastSurfaces = NULL;
        streamData.ppFutureSurfaces = NULL;
        streamData.pInputSurface = pInputView.Get();
        streamData.ppPastSurfacesRight = NULL;
        streamData.ppFutureSurfacesRight = NULL;
        hr = d3d11VC->VideoProcessorBlt(mD3D11VP.Get(), pOutputView.Get(), 0, 1, &streamData );
        if(FAILED(hr)) break;
        //视频处理完,在这里就可以添加自己图片和文字
        hr = mDXGISC1->Present(0, 0);
    } while (FALSE);
    return hr;
}
Direct3D Video API也不需要处理窗口大小变化的消息,就可以自动缩放。 视频帧处理完,在注释的地方在画上自己的图片和文字。