diff --git a/Makefile b/Makefile index dc314b3..0feac71 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,10 @@ OSS ?= 1 # support OPENGLOSD OPENGLOSD=1 - # use DPMS + # use Libplacebo +LIBPLACEBO=0 + + # use DMPS SCREENSAVER=1 OPENGL=1 @@ -68,7 +71,6 @@ TMPDIR ?= /tmp export CFLAGS = $(call PKGCFG,cflags) export CXXFLAGS = $(call PKGCFG,cxxflags) - ifeq ($(CFLAGS),) $(warning CFLAGS not set) endif @@ -119,6 +121,9 @@ LIBS += $(shell pkg-config --libs glew) _CFLAGS += $(shell pkg-config --cflags freetype2) LIBS += $(shell pkg-config --libs freetype2) endif +ifeq ($(LIBPLACEBO),1) +CONFIG += -DPLACEBO +endif # # Test that libswresample is available @@ -190,6 +195,10 @@ _CFLAGS += -I./opengl -I./ LIBS += -L/usr/lib64/opengl/nvidia/lib LIBS += -L/usr/local/cuda/lib64 +ifeq ($(LIBPLACEBO),1) +LIBS += -lplacebo +endif + LIBS += -lGLEW -lGLX -ldl -lcuda -L/usr/local/cuda/targets/x86_64-linux/lib -lcudart -lnvcuvid ### Includes and Defines (add further entries here): diff --git a/README.md b/README.md index 4018fe9..82a189c 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ To compile you must have the 'requires' installed. This is a fork of johns original softhddevice work and I reworked ist to support HEVC with CUDA and opengl output. -Currently I have tested it with a GTX 1050 from NVIDIA. HD and UHD ist working, but SD ist instable. +Currently I have tested it with a GTX 1050 from NVIDIA. SD, HD and UHD is working. Current Status NVIDA: The CUDA driver supports HEVC with 8 Bit and 10 Bit up to UHD resolution. Opengl is able to output also 10 Bit, but NVIDIA does not support to output 10 Bit via HDMI. @@ -49,10 +49,11 @@ Only via DisplayPort you can get 10 Bit output to a ompatible screen. This is a You have to adapt the Makefile to your needs. I use FFMPEG 4.0 -The Makefile expects the CUDA SDK in /usr/local/cuda. Currently it is tested with CUDA 9.1 +The Makefile expects the CUDA SDK in /usr/local/cuda. Currently it is tested with CUDA 10 -Unfortunatly FFMEG has a bug with deinterlacing cuda frames. So you have to patch the file in libavcodec/cuviddec.c +Unfortunatly older FFMEGs has a bug with deinterlacing cuda frames. Best to get the latest FFMPEG Version. +Otherwise you have to patch the file in libavcodec/cuviddec.c Somewhere near line 860 and 1066 depending on your release: old: ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame)); @@ -60,6 +61,10 @@ old: new: ctx->frame_queue = av_fifo_alloc((ctx->nb_surfaces + 2 ) * sizeof(CuvidParsedFrame)); +This Version supports building with libplacebo. https://github.com/haasn/libplacebo +You have to enable it in the Makefile and install libplacebo yourself. +At the moment this is Work in progress and the used upscaler is hardcoded. This will be changend to be configurable. +It also needs the NVIDIA driver 410.48 or newer as well as CUDA 10. @@ -84,7 +89,7 @@ Install: You can edit Makefile to enable/disable Alsa / OSS support. The default is to autodetect as much as possible. - You have to start vdr with -P 'softhdcuvid -d :0.0 .... -v cuvid' + You have to start vdr with -P 'softhdcuvid -d :0.0 .... ' Setup: environment diff --git a/drvapi_error_string.h b/drvapi_error_string.h index 0786ea5..18bfb1c 100644 --- a/drvapi_error_string.h +++ b/drvapi_error_string.h @@ -446,7 +446,7 @@ inline const char *getCudaDrvErrorString(CUresult error_id) int index = 0; while (sCudaDrvErrorString[index].error_id != error_id && - sCudaDrvErrorString[index].error_id != -1) + (int)sCudaDrvErrorString[index].error_id != -1) { index++; } diff --git a/video.c b/video.c index e324ebc..c8650d0 100644 --- a/video.c +++ b/video.c @@ -133,6 +133,7 @@ typedef enum #include #include // For GL_COLOR_BUFFER_BIT #include // For GL_COLOR_BUFFER_BIT +//#include #include // only for gluErrorString #include @@ -1621,8 +1622,8 @@ typedef struct _cuvid_decoder_ int SurfacesRb[VIDEO_SURFACES_MAX]; // CUcontext cuda_ctx; - cudaStream_t stream; // make my own cuda stream - CUgraphicsResource cuResource; +// cudaStream_t stream; // make my own cuda stream +// CUgraphicsResource cuResource; int SurfaceWrite; ///< write pointer int SurfaceRead; ///< read pointer atomic_t SurfacesFilled; ///< how many of the buffer is used @@ -1632,9 +1633,9 @@ typedef struct _cuvid_decoder_ GLuint gl_textures[(CODEC_SURFACES_MAX+1)*2]; // where we will copy the CUDA result #ifdef PLACEBO - const struct pl_image pl_images[CODEC_SURFACES_MAX+1]; // images für Placebo chain + struct pl_image pl_images[CODEC_SURFACES_MAX+1]; // images für Placebo chain const struct pl_tex *pl_tex_in[CODEC_SURFACES_MAX+1][2]; // Textures in image - struct pl_buf *pl_buf_Y,*pl_buf_UV; // buffer for Texture upload + const struct pl_buf *pl_buf_Y,*pl_buf_UV; // buffer for Texture upload struct ext_buf ebuf[2]; // for managing vk buffer #endif @@ -1657,6 +1658,7 @@ typedef struct _cuvid_decoder_ int FrameCounter; ///< number of frames decoded int FramesDisplayed; ///< number of frames displayed float Frameproc; /// Time to process frame + int newchannel; } CuvidDecoder; static CuvidDecoder *CuvidDecoders[2]; ///< open decoder streams @@ -1676,7 +1678,7 @@ struct priv { }; struct priv *p; #endif -//CUcontext cuda_ctx; + GLuint vao_buffer; // //GLuint vao_vao[4]; // GLuint gl_shader=0,gl_prog = 0,gl_fbo=0; // shader programm @@ -2149,10 +2151,12 @@ void SDK_CHECK_ERROR_GL() { void createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned int size_y, enum AVPixelFormat PixFmt) { - int n,i,size; + int n,i,size=1; const struct pl_fmt *fmt; struct pl_tex *tex; - + struct pl_image *img; + struct pl_plane *pl; + glXMakeCurrent(XlibDisplay, VideoWindow, GlxContext); GlxCheck(); //printf("Create textures and planes %d %d\n",size_x,size_y); @@ -2179,7 +2183,7 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i .address_mode = PL_TEX_ADDRESS_CLAMP, }); // make planes for image - struct pl_plane *pl = &decoder->pl_images[i].planes[n]; + pl = &decoder->pl_images[i].planes[n]; pl->texture = decoder->pl_tex_in[i][n]; pl->components = n==0?1:2; pl->shift_x = 0.0f; @@ -2200,7 +2204,7 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i } } // make image - struct pl_image *img = &decoder->pl_images[i]; + img = &decoder->pl_images[i]; img->signature = i; img->num_planes = 2; img->repr.sys = PL_COLOR_SYSTEM_BT_709; // overwritten later @@ -2224,7 +2228,7 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i .memory_type = PL_BUF_MEM_DEVICE, .ext_handles = PL_HANDLE_FD, }); - decoder->pl_buf_Y->handles.fd = dup(decoder->pl_buf_Y->handles.fd); // dup fd + decoder->ebuf[0].fd = dup(decoder->pl_buf_Y->handles.fd); // dup fd // printf("Y Offset %d Size %d FD %d\n",decoder->pl_buf_Y->handle_offset,decoder->pl_buf_Y->handles.size,decoder->pl_buf_Y->handles.fd); decoder->pl_buf_UV = pl_buf_create(p->gpu, &(struct pl_buf_params) { // buffer für UV texture upload @@ -2235,13 +2239,13 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i .memory_type = PL_BUF_MEM_DEVICE, .ext_handles = PL_HANDLE_FD, }); -// decoder->pl_buf_UV->handles.fd = dup(decoder->pl_buf_UV->handles.fd); // dup fd not need use the first FD - decoder->pl_buf_UV->handles.fd = -1; + decoder->ebuf[1].fd = dup(decoder->pl_buf_UV->handles.fd); // dup fd + // printf("UV Offset %d Size %d FD %d\n",decoder->pl_buf_UV->handle_offset,decoder->pl_buf_UV->handles.size,decoder->pl_buf_UV->handles.fd); CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, - .handle.fd = decoder->pl_buf_Y->handles.fd, + .handle.fd = decoder->ebuf[0].fd, .size = decoder->pl_buf_Y->handles.size, // image_width * image_height * bytes, .flags = 0, }; @@ -2254,20 +2258,20 @@ createTextureDst(CuvidDecoder * decoder,int anz, unsigned int size_x, unsigned i }; checkCudaErrors(cuExternalMemoryGetMappedBuffer(&decoder->ebuf[0].buf, decoder->ebuf[0].mem, &buf_desc)); // get Pointer -// CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc1 = { -// .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, -// .handle.fd = decoder->pl_buf_UV->handles.fd, -// .size = decoder->pl_buf_UV->handles.size, // image_width * image_height * bytes / 2, -// .flags = 0, -// }; -// checkCudaErrors(cuImportExternalMemory(&decoder->ebuf[1].mem, &ext_desc1)); // Import Memory Segment Use the first FD + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc1 = { + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .handle.fd = decoder->ebuf[1].fd, + .size = decoder->pl_buf_UV->handles.size, // image_width * image_height * bytes / 2, + .flags = 0, + }; + checkCudaErrors(cuImportExternalMemory(&decoder->ebuf[1].mem, &ext_desc1)); // Import Memory Segment CUDA_EXTERNAL_MEMORY_BUFFER_DESC buf_desc1 = { .offset = decoder->pl_buf_UV->handle_offset, .size = size_x * size_y * size / 2, .flags = 0, }; - checkCudaErrors(cuExternalMemoryGetMappedBuffer(&decoder->ebuf[1].buf, decoder->ebuf[0].mem, &buf_desc1)); // get pointer + checkCudaErrors(cuExternalMemoryGetMappedBuffer(&decoder->ebuf[1].buf, decoder->ebuf[1].mem, &buf_desc1)); // get pointer //printf("generate textures %d %d\n",size_x,size_y); } @@ -2288,7 +2292,6 @@ void generateCUDAImage(CuvidDecoder * decoder,int index, const AVFrame *frame,in // elements being two samples wide. CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, - .dstMemoryType = CU_MEMORYTYPE_ARRAY, .srcDevice = (CUdeviceptr)frame->data[n], .srcPitch = frame->linesize[n], .srcY = 0, @@ -3109,7 +3112,7 @@ static void CuvidRenderFrame(CuvidDecoder * decoder, { int surface; VideoDecoder *ist = video_ctx->opaque; - + enum AVColorSpace color; // update aspect ratio changes #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(53,60,100) @@ -3131,21 +3134,27 @@ printf("new aspect %d:%d\n",frame->sample_aspect_ratio.num,frame->sample_aspect_ } #endif - + color = frame->colorspace; + if (color == AVCOL_SPC_UNSPECIFIED) // if unknown + color = AVCOL_SPC_BT709; + // // Check image, format, size // if ( // decoder->PixFmt != video_ctx->pix_fmt video_ctx->width != decoder->InputWidth - || decoder->ColorSpace != frame->colorspace + || decoder->ColorSpace != color || video_ctx->height != decoder->InputHeight) { -Debug(3,"fmt %02d:%02d width %d:%d hight %d:%d\n",decoder->PixFmt,video_ctx->pix_fmt ,video_ctx->width, decoder->InputWidth,video_ctx->height, decoder->InputHeight); +Debug(3,"fmt %02d:%02d width %d:%d hight %d:%d\n",decoder->ColorSpace,frame->colorspace ,video_ctx->width, decoder->InputWidth,video_ctx->height, decoder->InputHeight); decoder->InputWidth = video_ctx->width; decoder->InputHeight = video_ctx->height; CuvidCleanup(decoder); decoder->SurfacesNeeded = VIDEO_SURFACES_MAX + 1; CuvidSetupOutput(decoder); +#ifdef PLACEBO // dont show first frame + decoder->newchannel = 1; +#endif } // // Copy data from frame to image @@ -3155,7 +3164,7 @@ Debug(3,"fmt %02d:%02d width %d:%d hight %d:%d\n",decoder->PixFmt,video_ctx->pi int w = decoder->InputWidth; int h = decoder->InputHeight; - decoder->ColorSpace = frame->colorspace; // save colorspace + decoder->ColorSpace = color; // save colorspace decoder->trc = frame->color_trc; decoder->color_primaries = frame->color_primaries; @@ -3304,6 +3313,7 @@ static void CuvidMixVideo(CuvidDecoder * decoder, int level) glUseProgram(0); glActiveTexture(GL_TEXTURE0); + #else img = &decoder->pl_images[current]; @@ -3377,7 +3387,12 @@ static void CuvidMixVideo(CuvidDecoder * decoder, int level) pl_gpu_finish(p->gpu); //printf("Finish renderer on frame %d with IN: %d-%d on image %d-%d\n",current,w,h,decoder->pl_images[current].width,decoder->pl_images[current].height); vkp = target.fbo->priv; - glDrawVkImageNV((GLuint64)(VkImage)vkp->img, 0, dst_video_rect.x0,dst_video_rect.y0,dst_video_rect.x1, dst_video_rect.y1, 0,0,1,1,0); + if (decoder->newchannel == 1 && current == 0) + ; + else + glDrawVkImageNV((GLuint64)(VkImage)vkp->img, 0, dst_video_rect.x0,dst_video_rect.y0,dst_video_rect.x1, dst_video_rect.y1, 0,0,1,1,0); + if (decoder->newchannel && current) + decoder->newchannel = 0; pl_tex_destroy(p->gpu,&target.fbo); @@ -4606,6 +4621,11 @@ static void *VideoDisplayHandlerThread(void *dummy) unsigned int device_count,version; CUdevice device; +#ifdef PLACEBO + struct pl_vulkan_params params; +#endif + + checkCudaErrors(cuInit(0)); // checkCudaErrors(cuGLGetDevices(&device_count, &device, 1, CU_GL_DEVICE_LIST_ALL)); @@ -4631,8 +4651,8 @@ static void *VideoDisplayHandlerThread(void *dummy) if (!p->ctx) { Fatal(_("Failed initializing libplacebo\n")); } - - struct pl_vulkan_params params = pl_vulkan_default_params; + + memcpy (¶ms,&pl_vulkan_default_params, sizeof(params)); params.async_transfer = true, params.async_compute = true, params.queue_count = 8, @@ -4677,6 +4697,7 @@ static void *VideoDisplayHandlerThread(void *dummy) pl_renderer_destroy(&p->renderer); pl_vulkan_destroy(&p->vk); pl_context_destroy(&p->ctx); + free(p); #endif cuCtxDestroy (cuda_ctx); return dummy;