mirror of
https://github.com/jojo61/vdr-plugin-softhdcuvid.git
synced 2023-10-10 13:37:41 +02:00
fixed PCIe memory usage
This commit is contained in:
parent
391ce04657
commit
1640642664
60
video.c
60
video.c
@ -1597,13 +1597,12 @@ typedef struct _cuvid_decoder_
|
|||||||
CUcontext cuda_ctx;
|
CUcontext cuda_ctx;
|
||||||
|
|
||||||
cudaStream_t stream; // make my own cuda stream
|
cudaStream_t stream; // make my own cuda stream
|
||||||
CUdeviceptr dpBackBuffer;
|
|
||||||
CUgraphicsResource cuResource;
|
CUgraphicsResource cuResource;
|
||||||
int SurfaceWrite; ///< write pointer
|
int SurfaceWrite; ///< write pointer
|
||||||
int SurfaceRead; ///< read pointer
|
int SurfaceRead; ///< read pointer
|
||||||
atomic_t SurfacesFilled; ///< how many of the buffer is used
|
atomic_t SurfacesFilled; ///< how many of the buffer is used
|
||||||
|
|
||||||
CUarray cu_array[CODEC_SURFACES_MAX][2];
|
CUarray cu_array[CODEC_SURFACES_MAX][2];
|
||||||
CUgraphicsResource cu_res[CODEC_SURFACES_MAX][2];
|
CUgraphicsResource cu_res[CODEC_SURFACES_MAX][2];
|
||||||
GLuint gl_textures[CODEC_SURFACES_MAX*2]; // where we will copy the CUDA result
|
GLuint gl_textures[CODEC_SURFACES_MAX*2]; // where we will copy the CUDA result
|
||||||
|
|
||||||
@ -1735,7 +1734,7 @@ static void CuvidCreateSurfaces(CuvidDecoder * decoder, int width, int height,en
|
|||||||
decoder->SurfacesNeeded = VIDEO_SURFACES_MAX;
|
decoder->SurfacesNeeded = VIDEO_SURFACES_MAX;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Debug(3, "video/cuvid: %s: %dx%d * %d cuda_ctx %p\n", __FUNCTION__, width, height, decoder->SurfacesNeeded,decoder->cuda_ctx);
|
Debug(3, "video/cuvid: %s: %dx%d * %d \n", __FUNCTION__, width, height, decoder->SurfacesNeeded);
|
||||||
|
|
||||||
// allocate only the number of needed surfaces
|
// allocate only the number of needed surfaces
|
||||||
decoder->SurfaceFreeN = decoder->SurfacesNeeded;
|
decoder->SurfaceFreeN = decoder->SurfacesNeeded;
|
||||||
@ -1765,15 +1764,13 @@ static void CuvidDestroySurfaces(CuvidDecoder * decoder)
|
|||||||
glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext);
|
glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext);
|
||||||
GlxCheck();
|
GlxCheck();
|
||||||
|
|
||||||
if (decoder->cuda_ctx) {
|
|
||||||
checkCudaErrors(cuCtxPushCurrent(decoder->cuda_ctx));
|
for (i=0;i<decoder->SurfacesNeeded;i++) {
|
||||||
for (i=0;i<decoder->SurfacesNeeded;i++) {
|
for (j=0;j<2;j++) {
|
||||||
for (j=0;j<2;j++) {
|
checkCudaErrors(cuGraphicsUnregisterResource(decoder->cu_res[i][j]));
|
||||||
checkCudaErrors(cuGraphicsUnregisterResource(decoder->cu_res[i][j]));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
checkCudaErrors(cuCtxPopCurrent(NULL));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
glDeleteTextures(CODEC_SURFACES_MAX*2,(GLuint*)&decoder->gl_textures);
|
glDeleteTextures(CODEC_SURFACES_MAX*2,(GLuint*)&decoder->gl_textures);
|
||||||
GlxCheck();
|
GlxCheck();
|
||||||
@ -1985,7 +1982,7 @@ Debug(3,"Cuvid Clean up\n");
|
|||||||
static void CuvidDelHwDecoder(CuvidDecoder * decoder)
|
static void CuvidDelHwDecoder(CuvidDecoder * decoder)
|
||||||
{
|
{
|
||||||
int i,n;
|
int i,n;
|
||||||
Debug(3,"cuvid del hw decoder cuda_ctx %p\n",decoder->cuda_ctx);
|
Debug(3,"cuvid del hw decoder \n");
|
||||||
if (decoder == CuvidDecoders[0])
|
if (decoder == CuvidDecoders[0])
|
||||||
pthread_mutex_lock(&VideoLockMutex);
|
pthread_mutex_lock(&VideoLockMutex);
|
||||||
|
|
||||||
@ -1997,12 +1994,6 @@ Debug(3,"cuvid del hw decoder cuda_ctx %p\n",decoder->cuda_ctx);
|
|||||||
if (decoder == CuvidDecoders[0])
|
if (decoder == CuvidDecoders[0])
|
||||||
pthread_mutex_unlock(&VideoLockMutex);
|
pthread_mutex_unlock(&VideoLockMutex);
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (decoder->cuda_ctx) {
|
|
||||||
cuCtxDestroy (decoder->cuda_ctx);
|
|
||||||
decoder->cuda_ctx = NULL;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
glXMakeCurrent(XlibDisplay, None, NULL);
|
glXMakeCurrent(XlibDisplay, None, NULL);
|
||||||
for (i = 0; i < CuvidDecoderN; ++i) {
|
for (i = 0; i < CuvidDecoderN; ++i) {
|
||||||
if (CuvidDecoders[i] == decoder) {
|
if (CuvidDecoders[i] == decoder) {
|
||||||
@ -2086,18 +2077,15 @@ void
|
|||||||
createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned int size_y, enum AVPixelFormat PixFmt)
|
createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned int size_y, enum AVPixelFormat PixFmt)
|
||||||
{
|
{
|
||||||
|
|
||||||
int n,i;
|
int n,i,size;
|
||||||
CUcontext dummy;
|
CUcontext dummy;
|
||||||
// glXMakeCurrent(XlibDisplay, VideoWindow, GlxSharedContext);
|
|
||||||
glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext);
|
glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext);
|
||||||
GlxCheck();
|
GlxCheck();
|
||||||
|
|
||||||
glGenBuffers(1,&vao_buffer);
|
glGenBuffers(1,&vao_buffer);
|
||||||
GlxCheck();
|
GlxCheck();
|
||||||
|
|
||||||
if (decoder->cuda_ctx)
|
|
||||||
checkCudaErrors(cuCtxPushCurrent(decoder->cuda_ctx));
|
|
||||||
|
|
||||||
Debug(3,"video/vdpau: create %d Textures Format %s w %d h %d \n",anz,PixFmt==AV_PIX_FMT_NV12?"NV12":"P010",size_x,size_y);
|
Debug(3,"video/vdpau: create %d Textures Format %s w %d h %d \n",anz,PixFmt==AV_PIX_FMT_NV12?"NV12":"P010",size_x,size_y);
|
||||||
|
|
||||||
// create texture planes
|
// create texture planes
|
||||||
@ -2119,17 +2107,16 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i
|
|||||||
glTexImage2D(GL_TEXTURE_2D, 0,n==0?GL_R16:GL_RG16 ,n==0?size_x:size_x/2, n==0?size_y:size_y/2, 0, n==0?GL_RED:GL_RG , GL_UNSIGNED_SHORT, NULL);
|
glTexImage2D(GL_TEXTURE_2D, 0,n==0?GL_R16:GL_RG16 ,n==0?size_x:size_x/2, n==0?size_y:size_y/2, 0, n==0?GL_RED:GL_RG , GL_UNSIGNED_SHORT, NULL);
|
||||||
SDK_CHECK_ERROR_GL();
|
SDK_CHECK_ERROR_GL();
|
||||||
// register this texture with CUDA
|
// register this texture with CUDA
|
||||||
if (decoder->cuda_ctx) {
|
|
||||||
checkCudaErrors(cuGraphicsGLRegisterImage(&decoder->cu_res[i][n], decoder->gl_textures[i*2+n],GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
|
checkCudaErrors(cuGraphicsGLRegisterImage(&decoder->cu_res[i][n], decoder->gl_textures[i*2+n],GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
|
||||||
checkCudaErrors(cuGraphicsMapResources(1, &decoder->cu_res[i][n], 0));
|
checkCudaErrors(cuGraphicsMapResources(1, &decoder->cu_res[i][n], 0));
|
||||||
checkCudaErrors(cuGraphicsSubResourceGetMappedArray(&decoder->cu_array[i][n], decoder->cu_res[i][n],0, 0));
|
checkCudaErrors(cuGraphicsSubResourceGetMappedArray(&decoder->cu_array[i][n], decoder->cu_res[i][n],0, 0));
|
||||||
checkCudaErrors(cuGraphicsUnmapResources(1, &decoder->cu_res[i][n], 0));
|
checkCudaErrors(cuGraphicsUnmapResources(1, &decoder->cu_res[i][n], 0));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
glBindTexture(GL_TEXTURE_2D, 0);
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
if (decoder->cuda_ctx)
|
|
||||||
checkCudaErrors(cuCtxPopCurrent(&dummy));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy image and process using CUDA
|
// copy image and process using CUDA
|
||||||
@ -2138,27 +2125,21 @@ void generateCUDAImage(CuvidDecoder * decoder,int index, const AVFrame *frame,in
|
|||||||
int n,version;
|
int n,version;
|
||||||
CUcontext dummy=NULL;
|
CUcontext dummy=NULL;
|
||||||
|
|
||||||
if (decoder->cuda_ctx)
|
|
||||||
checkCudaErrors(cuCtxPushCurrent(decoder->cuda_ctx));
|
|
||||||
|
|
||||||
for (n = 0; n < 2; n++) { //
|
for (n = 0; n < 2; n++) { //
|
||||||
// widthInBytes must account for the chroma plane
|
// widthInBytes must account for the chroma plane
|
||||||
// elements being two samples wide.
|
// elements being two samples wide.
|
||||||
CUDA_MEMCPY2D cpy = {
|
CUDA_MEMCPY2D cpy = {
|
||||||
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
|
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
|
||||||
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
|
.dstMemoryType = CU_MEMORYTYPE_ARRAY,
|
||||||
.srcDevice = (CUdeviceptr)frame->data[n],
|
.srcDevice = (CUdeviceptr)frame->data[n],
|
||||||
.srcPitch = frame->linesize[n],
|
.srcPitch = frame->linesize[n],
|
||||||
.srcY = 0,
|
.srcY = 0,
|
||||||
.dstArray = decoder->cu_array[index][n],
|
.dstArray = decoder->cu_array[index][n],
|
||||||
.WidthInBytes = image_width * bytes,
|
.WidthInBytes = image_width * bytes,
|
||||||
.Height = n==0?image_height:image_height/2 ,
|
.Height = n==0?image_height:image_height/2 ,
|
||||||
};
|
};
|
||||||
checkCudaErrors(cuMemcpy2D(&cpy));
|
checkCudaErrors(cuMemcpy2D(&cpy));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (decoder->cuda_ctx)
|
|
||||||
checkCudaErrors(cuCtxPopCurrent(&dummy));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2300,6 +2281,7 @@ static enum AVPixelFormat Cuvid_get_format(CuvidDecoder * decoder,
|
|||||||
Fatal(_("CUVID Init failed\n"));
|
Fatal(_("CUVID Init failed\n"));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CuvidMessage(2,"CUVID Init ok %dx%d\n",video_ctx->width,video_ctx->height);
|
CuvidMessage(2,"CUVID Init ok %dx%d\n",video_ctx->width,video_ctx->height);
|
||||||
ist->active_hwaccel_id = HWACCEL_CUVID;
|
ist->active_hwaccel_id = HWACCEL_CUVID;
|
||||||
ist->hwaccel_pix_fmt = AV_PIX_FMT_CUDA;
|
ist->hwaccel_pix_fmt = AV_PIX_FMT_CUDA;
|
||||||
@ -2861,7 +2843,7 @@ static void *CuvidGetHwAccelContext(CuvidDecoder * decoder)
|
|||||||
CUdevice device;
|
CUdevice device;
|
||||||
|
|
||||||
Debug(3, "Initializing cuvid hwaccel thread ID:%ld\n",(long int)syscall(186));
|
Debug(3, "Initializing cuvid hwaccel thread ID:%ld\n",(long int)syscall(186));
|
||||||
|
//turn NULL;
|
||||||
if (decoder->cuda_ctx) {
|
if (decoder->cuda_ctx) {
|
||||||
Debug(3,"schon passiert\n");
|
Debug(3,"schon passiert\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user