1
0
mirror of https://github.com/jojo61/vdr-plugin-softhdcuvid.git synced 2023-10-10 13:37:41 +02:00

fixed PCIe memory usage

This commit is contained in:
jojo61 2018-10-10 20:11:43 +02:00
parent 391ce04657
commit 1640642664

60
video.c
View File

@ -1597,13 +1597,12 @@ typedef struct _cuvid_decoder_
CUcontext cuda_ctx;
cudaStream_t stream; // make my own cuda stream
CUdeviceptr dpBackBuffer;
CUgraphicsResource cuResource;
int SurfaceWrite; ///< write pointer
int SurfaceRead; ///< read pointer
atomic_t SurfacesFilled; ///< how many of the buffer is used
CUarray cu_array[CODEC_SURFACES_MAX][2];
CUarray cu_array[CODEC_SURFACES_MAX][2];
CUgraphicsResource cu_res[CODEC_SURFACES_MAX][2];
GLuint gl_textures[CODEC_SURFACES_MAX*2]; // where we will copy the CUDA result
@ -1735,7 +1734,7 @@ static void CuvidCreateSurfaces(CuvidDecoder * decoder, int width, int height,en
decoder->SurfacesNeeded = VIDEO_SURFACES_MAX;
}
#endif
Debug(3, "video/cuvid: %s: %dx%d * %d cuda_ctx %p\n", __FUNCTION__, width, height, decoder->SurfacesNeeded,decoder->cuda_ctx);
Debug(3, "video/cuvid: %s: %dx%d * %d \n", __FUNCTION__, width, height, decoder->SurfacesNeeded);
// allocate only the number of needed surfaces
decoder->SurfaceFreeN = decoder->SurfacesNeeded;
@ -1765,15 +1764,13 @@ static void CuvidDestroySurfaces(CuvidDecoder * decoder)
glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext);
GlxCheck();
if (decoder->cuda_ctx) {
checkCudaErrors(cuCtxPushCurrent(decoder->cuda_ctx));
for (i=0;i<decoder->SurfacesNeeded;i++) {
for (j=0;j<2;j++) {
checkCudaErrors(cuGraphicsUnregisterResource(decoder->cu_res[i][j]));
}
for (i=0;i<decoder->SurfacesNeeded;i++) {
for (j=0;j<2;j++) {
checkCudaErrors(cuGraphicsUnregisterResource(decoder->cu_res[i][j]));
}
checkCudaErrors(cuCtxPopCurrent(NULL));
}
glDeleteTextures(CODEC_SURFACES_MAX*2,(GLuint*)&decoder->gl_textures);
GlxCheck();
@ -1985,7 +1982,7 @@ Debug(3,"Cuvid Clean up\n");
static void CuvidDelHwDecoder(CuvidDecoder * decoder)
{
int i,n;
Debug(3,"cuvid del hw decoder cuda_ctx %p\n",decoder->cuda_ctx);
Debug(3,"cuvid del hw decoder \n");
if (decoder == CuvidDecoders[0])
pthread_mutex_lock(&VideoLockMutex);
@ -1997,12 +1994,6 @@ Debug(3,"cuvid del hw decoder cuda_ctx %p\n",decoder->cuda_ctx);
if (decoder == CuvidDecoders[0])
pthread_mutex_unlock(&VideoLockMutex);
#if 0
if (decoder->cuda_ctx) {
cuCtxDestroy (decoder->cuda_ctx);
decoder->cuda_ctx = NULL;
}
#endif
glXMakeCurrent(XlibDisplay, None, NULL);
for (i = 0; i < CuvidDecoderN; ++i) {
if (CuvidDecoders[i] == decoder) {
@ -2086,18 +2077,15 @@ void
createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned int size_y, enum AVPixelFormat PixFmt)
{
int n,i;
int n,i,size;
CUcontext dummy;
// glXMakeCurrent(XlibDisplay, VideoWindow, GlxSharedContext);
glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext);
GlxCheck();
glGenBuffers(1,&vao_buffer);
GlxCheck();
if (decoder->cuda_ctx)
checkCudaErrors(cuCtxPushCurrent(decoder->cuda_ctx));
Debug(3,"video/vdpau: create %d Textures Format %s w %d h %d \n",anz,PixFmt==AV_PIX_FMT_NV12?"NV12":"P010",size_x,size_y);
// create texture planes
@ -2119,17 +2107,16 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i
glTexImage2D(GL_TEXTURE_2D, 0,n==0?GL_R16:GL_RG16 ,n==0?size_x:size_x/2, n==0?size_y:size_y/2, 0, n==0?GL_RED:GL_RG , GL_UNSIGNED_SHORT, NULL);
SDK_CHECK_ERROR_GL();
// register this texture with CUDA
if (decoder->cuda_ctx) {
checkCudaErrors(cuGraphicsGLRegisterImage(&decoder->cu_res[i][n], decoder->gl_textures[i*2+n],GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
checkCudaErrors(cuGraphicsMapResources(1, &decoder->cu_res[i][n], 0));
checkCudaErrors(cuGraphicsSubResourceGetMappedArray(&decoder->cu_array[i][n], decoder->cu_res[i][n],0, 0));
checkCudaErrors(cuGraphicsUnmapResources(1, &decoder->cu_res[i][n], 0));
}
checkCudaErrors(cuGraphicsGLRegisterImage(&decoder->cu_res[i][n], decoder->gl_textures[i*2+n],GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
checkCudaErrors(cuGraphicsMapResources(1, &decoder->cu_res[i][n], 0));
checkCudaErrors(cuGraphicsSubResourceGetMappedArray(&decoder->cu_array[i][n], decoder->cu_res[i][n],0, 0));
checkCudaErrors(cuGraphicsUnmapResources(1, &decoder->cu_res[i][n], 0));
}
}
glBindTexture(GL_TEXTURE_2D, 0);
if (decoder->cuda_ctx)
checkCudaErrors(cuCtxPopCurrent(&dummy));
}
// copy image and process using CUDA
@ -2138,27 +2125,21 @@ void generateCUDAImage(CuvidDecoder * decoder,int index, const AVFrame *frame,in
int n,version;
CUcontext dummy=NULL;
if (decoder->cuda_ctx)
checkCudaErrors(cuCtxPushCurrent(decoder->cuda_ctx));
for (n = 0; n < 2; n++) { //
// widthInBytes must account for the chroma plane
// elements being two samples wide.
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
.srcDevice = (CUdeviceptr)frame->data[n],
.srcPitch = frame->linesize[n],
.srcY = 0,
.dstArray = decoder->cu_array[index][n],
.dstArray = decoder->cu_array[index][n],
.WidthInBytes = image_width * bytes,
.Height = n==0?image_height:image_height/2 ,
};
checkCudaErrors(cuMemcpy2D(&cpy));
}
if (decoder->cuda_ctx)
checkCudaErrors(cuCtxPopCurrent(&dummy));
}
@ -2300,6 +2281,7 @@ static enum AVPixelFormat Cuvid_get_format(CuvidDecoder * decoder,
Fatal(_("CUVID Init failed\n"));
}
#endif
CuvidMessage(2,"CUVID Init ok %dx%d\n",video_ctx->width,video_ctx->height);
ist->active_hwaccel_id = HWACCEL_CUVID;
ist->hwaccel_pix_fmt = AV_PIX_FMT_CUDA;
@ -2861,7 +2843,7 @@ static void *CuvidGetHwAccelContext(CuvidDecoder * decoder)
CUdevice device;
Debug(3, "Initializing cuvid hwaccel thread ID:%ld\n",(long int)syscall(186));
//turn NULL;
if (decoder->cuda_ctx) {
Debug(3,"schon passiert\n");
return NULL;