From 749b0d672142c685556fa9f3788bff732920460b Mon Sep 17 00:00:00 2001 From: Johns Date: Sat, 10 Dec 2011 00:15:38 +0100 Subject: [PATCH] Video enhancements. Pass PTS to video codec. Flush audio buffer on channel change. Sync audio + video stream. Add workaround for Intel VA-API driver that forgets OSD position/size. --- ChangeLog | 6 +- Todo | 5 +- audio.c | 73 +++++++--- audio.h | 3 +- codec.c | 4 + softhddev.c | 31 ++-- softhddevice.cpp | 2 +- video.c | 368 +++++++++++++++++++++++------------------------ 8 files changed, 271 insertions(+), 221 deletions(-) diff --git a/ChangeLog b/ChangeLog index c9c01f9..1f75198 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,10 @@ User johns -Date: +Date: Sat Dec 10 00:06:46 CET 2011 + Release Version 0.0.9 + Pass audio/video PTS to codec. + Fix libva-driver-intel OSD problems. + Add audio resample support. Reduce program exit crashes. Add libva-driver-vdpau autodetection. Add workaround for bugs in libva-driver-vdpau. diff --git a/Todo b/Todo index 9671086..c6c5dbc 100644 --- a/Todo +++ b/Todo @@ -1,12 +1,11 @@ libva-intel-driver: - intel still has hangups + intel still has hangups most with 1080i osd has sometimes wrong size (workaround written) - can show defect surfaces (white bars on top of frame) only interlace - defect shown upto driver surface swap? libva-vdpau-driver: G210 osd update too slow (needs hardware problem workaround) + OSD update is too slow x11: support resize of x11 window diff --git a/audio.c b/audio.c index f983787..09faed5 100644 --- a/audio.c +++ b/audio.c @@ -152,7 +152,9 @@ static int AlsaPlayRingbuffer(void) // happens with broken alsa drivers Error(_("audio/alsa: broken driver %d\n"), avail); } - //break; + Debug(4, "audio/alsa: break state %s\n", + snd_pcm_state_name(snd_pcm_state(AlsaPCMHandle))); + break; } n = RingBufferGetReadPointer(AlsaRingBuffer, &p); @@ -320,8 +322,7 @@ void AudioEnqueue(const void *samples, int count) } #endif state = snd_pcm_state(AlsaPCMHandle); - Debug(3, "audio/alsa: state %d - %s\n", state, - snd_pcm_state_name(state)); + Debug(3, "audio/alsa: state %s\n", snd_pcm_state_name(state)); Debug(3, "audio/alsa: unpaused\n"); AudioPaused = 0; } @@ -381,10 +382,15 @@ static void *AudioPlayHandlerThread(void *dummy) Debug(3, "audio/alsa: flushing buffers\n"); RingBufferReadAdvance(AlsaRingBuffer, RingBufferUsedBytes(AlsaRingBuffer)); - if ((err = snd_pcm_drain(AlsaPCMHandle))) { - Error(_("audio: snd_pcm_drain(): %s\n"), +#if 1 + if ((err = snd_pcm_drop(AlsaPCMHandle))) { + Error(_("audio: snd_pcm_drop(): %s\n"), snd_strerror(err)); + } + if ((err = snd_pcm_prepare(AlsaPCMHandle))) { + Error(_("audio: snd_pcm_prepare(): %s\n"), snd_strerror(err)); } +#endif AlsaFlushBuffer = 0; break; } @@ -504,10 +510,11 @@ static void AlsaInitPCM(void) SND_PCM_NONBLOCK)) < 0) { Fatal(_("audio/alsa: playback open '%s' error: %s\n"), device, snd_strerror(err)); + // FIXME: no fatal error for plugins! } AlsaPCMHandle = handle; - if ((err = snd_pcm_nonblock(handle, SND_PCM_NONBLOCK)) < 0) { + if ((err = snd_pcm_nonblock(handle, 0)) < 0) { Error(_("audio/alsa: can't set block mode: %s\n"), snd_strerror(err)); } @@ -610,13 +617,27 @@ static void AlsaInitMixer(void) void AudioSetClock(int64_t pts) { if (AudioPTS != pts) { - Debug(3, "audio: set clock to %#012" PRIx64 " %#012" PRIx64 " pts\n", + Debug(4, "audio: set clock to %#012" PRIx64 " %#012" PRIx64 " pts\n", AudioPTS, pts); AudioPTS = pts; } } +/** +** Get current audio clock. +*/ +int64_t AudioGetClock(void) +{ + int64_t delay; + + delay = AudioGetDelay(); + if (delay) { + return AudioPTS - delay; + } + return INT64_C(0x8000000000000000); +} + /** ** Get audio delay in time stamps. */ @@ -626,17 +647,19 @@ uint64_t AudioGetDelay(void) snd_pcm_sframes_t delay; uint64_t pts; + // delay in frames in alsa + kernel buffers if ((err = snd_pcm_delay(AlsaPCMHandle, &delay)) < 0) { //Debug(3, "audio/alsa: no hw delay\n"); delay = 0UL; } else if (snd_pcm_state(AlsaPCMHandle) != SND_PCM_STATE_RUNNING) { - //Debug(3, "audio/alsa: %ld delay ok, but not running\n", delay); + //Debug(3, "audio/alsa: %ld frames delay ok, but not running\n", delay); } - - pts = ((uint64_t) delay * 90000) / AudioSampleRate; - pts += ((uint64_t) RingBufferUsedBytes(AlsaRingBuffer) * 90000) - / (AudioSampleRate * AudioChannels); - //Debug(3, "audio/alsa: hw+sw delay %"PRId64" ms\n", pts / 90); + //Debug(3, "audio/alsa: %ld frames hw delay\n", delay); + pts = ((uint64_t) delay * 90 * 1000) / AudioSampleRate; + pts += ((uint64_t) RingBufferUsedBytes(AlsaRingBuffer) * 90 * 1000) + / (AudioSampleRate * AudioChannels * 2); + Debug(4, "audio/alsa: hw+sw delay %zd %" PRId64 " ms\n", + RingBufferUsedBytes(AlsaRingBuffer), pts / 90); return pts; } @@ -676,13 +699,18 @@ int AudioSetup(int *freq, int *channels) // flush any buffered data #ifdef USE_AUDIO_THREAD if (AudioRunning) { - AlsaFlushBuffer = 1; + while (AudioRunning) { + AlsaFlushBuffer = 1; + usleep(1 * 1000); + } + AlsaFlushBuffer = 0; } else #endif { RingBufferReadAdvance(AlsaRingBuffer, RingBufferUsedBytes(AlsaRingBuffer)); } + AudioPTS = INT64_C(0x8000000000000000); ret = 0; try_again: @@ -692,6 +720,15 @@ int AudioSetup(int *freq, int *channels) SND_PCM_ACCESS_RW_INTERLEAVED, *channels, *freq, 1, 125 * 1000))) { Error(_("audio/alsa: set params error: %s\n"), snd_strerror(err)); + + /* + if ( err == -EBADFD ) { + snd_pcm_close(AlsaPCMHandle); + AlsaPCMHandle = NULL; + goto try_again; + } + */ + switch (*channels) { case 1: // FIXME: enable channel upmix @@ -795,10 +832,10 @@ int AudioSetup(int *freq, int *channels) Debug(3, "audio/alsa: state %s\n", snd_pcm_state_name(snd_pcm_state(AlsaPCMHandle))); - AlsaStartThreshold = snd_pcm_frames_to_bytes(AlsaPCMHandle, buffer_size); - // min 500ms - if (AlsaStartThreshold < (*freq * *channels * 2U) / 2) { - AlsaStartThreshold = (*freq * *channels * 2U) / 2; + AlsaStartThreshold = snd_pcm_frames_to_bytes(AlsaPCMHandle, period_size); + // min 333ms + if (AlsaStartThreshold < (*freq * *channels * 2U) / 3) { + AlsaStartThreshold = (*freq * *channels * 2U) / 3; } Debug(3, "audio/alsa: delay %u ms\n", (AlsaStartThreshold * 1000) / (AudioSampleRate * AudioChannels * 2)); diff --git a/audio.h b/audio.h index 257aa83..fbd41ed 100644 --- a/audio.h +++ b/audio.h @@ -32,9 +32,10 @@ extern void AudioEnqueue(const void *, int); ///< buffer audio samples //extern int AudioFreeBytes(void); ///< free bytes in audio output //extern int AudioUsedBytes(void); ///< used bytes in audio output extern void AudioSetClock(int64_t); ///< set audio clock base +extern int64_t AudioGetClock(); ///< get current audio clock -//extern int64_t AudioGetClock(); ///< get current audio clock extern uint64_t AudioGetDelay(void); ///< get current audio delay + extern int AudioSetup(int *, int *); ///< setup audio output //extern void AudioPlay(void); ///< play audio diff --git a/codec.c b/codec.c index c5269b4..52e79bd 100644 --- a/codec.c +++ b/codec.c @@ -520,6 +520,10 @@ void CodecAudioOpen(AudioDecoder * audio_decoder, const char *name, av_parser_init(audio_decoder->AudioCtx->codec_id))) { Fatal(_("codec: can't init audio parser\n")); } + audio_decoder->SampleRate = 0; + audio_decoder->Channels = 0; + audio_decoder->HwSampleRate = 0; + audio_decoder->HwChannels = 0; } /** diff --git a/softhddev.c b/softhddev.c index 067b8f5..60187db 100644 --- a/softhddev.c +++ b/softhddev.c @@ -90,7 +90,7 @@ void PlayAudio(const uint8_t * data, int size, uint8_t id) avpkt->pts = (int64_t) (data[9] & 0x0E) << 29 | data[10] << 22 | (data[11] & 0xFE) << 14 | data[12] << 7 | (data[13] & 0xFE) >> 1; - // Debug(3, "audio: pts %#012" PRIx64 "\n", avpkt->pts); + //Debug(3, "audio: pts %#012" PRIx64 "\n", avpkt->pts); } if (0) { // dts is unused if (data[7] & 0x40) { @@ -234,13 +234,17 @@ static void VideoPacketInit(void) /** ** Place video data in packet ringbuffer. */ -static void VideoEnqueue(const void *data, int size) +static void VideoEnqueue(int64_t pts, const void *data, int size) { AVPacket *avpkt; // Debug(3, "video: enqueue %d\n", size); avpkt = &VideoPacketRb[VideoPacketWrite]; + if (!avpkt->stream_index) { // add pts only for first added + avpkt->pts = pts; + avpkt->dts = pts; + } if (avpkt->stream_index + size + FF_INPUT_BUFFER_PADDING_SIZE >= avpkt->size) { @@ -255,9 +259,9 @@ static void VideoEnqueue(const void *data, int size) abort(); } } -#ifdef DEBUG +#ifdef xxDEBUG if (!avpkt->stream_index) { // debug save time of first packet - avpkt->dts = GetMsTicks(); + avpkt->pos = GetMsTicks(); } #endif if (!VideoStartTick) { // tick of first valid packet @@ -329,6 +333,14 @@ int VideoDecode(void) // Debug(3, "video: decode no packets buffered\n"); return -1; } +#if 0 + // FIXME: flush buffers, if close is in the queue + while (filled) { + avpkt = &VideoPacketRb[VideoPacketRead]; + if ((int)(size_t) avpkt->priv == CODEC_ID_NONE) { + } + } +#endif avpkt = &VideoPacketRb[VideoPacketRead]; // @@ -430,9 +442,9 @@ void VideoWakeup(void) while (filled) { avpkt = &VideoPacketRb[VideoPacketRead]; now = GetMsTicks(); - if (avpkt->dts + 500 > now) { + if (avpkt->pos + 500 > now) { Debug(3, "video: %d packets %u delayed\n", filled, - (unsigned)(now - avpkt->dts)); + (unsigned)(now - avpkt->pos)); return; } filled = atomic_read(&VideoPacketsFilled); @@ -474,7 +486,7 @@ static void StartVideo(void) void PlayVideo(const uint8_t * data, int size) { const uint8_t *check; - uint64_t pts; + int64_t pts; int n; if (BrokenThreadsAndPlugins) { @@ -513,7 +525,7 @@ void PlayVideo(const uint8_t * data, int size) pts = (int64_t) (data[9] & 0x0E) << 29 | data[10] << 22 | (data[11] & 0xFE) << 14 | data[12] << 7 | (data[13] & 0xFE) >> 1; - // Debug(3, "video: pts %#012" PRIx64 "\n", pts); + //Debug(3, "video: pts %#012" PRIx64 "\n", pts); } // FIXME: no valid mpeg2/h264 detection yet @@ -527,6 +539,7 @@ void PlayVideo(const uint8_t * data, int size) if (VideoCodecID == CODEC_ID_MPEG2VIDEO) { VideoNextPacket(CODEC_ID_MPEG2VIDEO); } else { + Debug(3, "video: mpeg2 detected\n"); VideoCodecID = CODEC_ID_MPEG2VIDEO; } // Access Unit Delimiter @@ -552,7 +565,7 @@ void PlayVideo(const uint8_t * data, int size) // SKIP PES header size -= 9 + n; - VideoEnqueue(check, size); + VideoEnqueue(pts, check, size); } ////////////////////////////////////////////////////////////////////////////// diff --git a/softhddevice.cpp b/softhddevice.cpp index 9e011ed..d017c0c 100644 --- a/softhddevice.cpp +++ b/softhddevice.cpp @@ -36,7 +36,7 @@ ////////////////////////////////////////////////////////////////////////////// -static const char *const VERSION = "0.0.8"; +static const char *const VERSION = "0.0.9"; static const char *const DESCRIPTION = trNOOP("A software and GPU emulated HD device"); diff --git a/video.c b/video.c index f8dce63..3a65e74 100644 --- a/video.c +++ b/video.c @@ -37,6 +37,7 @@ /// #define DEBUG +#define Fix60Hz 0 #define USE_XLIB_XCB #define noUSE_GLX #define noUSE_DOUBLEBUFFER @@ -687,6 +688,7 @@ static void GlxExit(void) static int VideoVaapiEnabled = 1; ///< use VA-API decoder static int VaapiBuggyVdpau; ///< fix libva-driver-vdpau bugs +static int VaapiBuggyIntel; ///< fix libva-driver-intel bugs static VADisplay *VaDisplay; ///< VA-API display @@ -694,7 +696,7 @@ static VAImage VaOsdImage = { .image_id = VA_INVALID_ID }; ///< osd VA-API image -static VASubpictureID VaOsdSubpicture; ///< osd VA-API subpicture +static VASubpictureID VaOsdSubpicture = VA_INVALID_ID; ///< osd VA-API subpicture static char VaapiUnscaledOsd; ///< unscaled osd supported /// VA-API decoder typedef @@ -753,6 +755,7 @@ struct _vaapi_decoder_ int SurfaceField; ///< current displayed field struct timespec FrameTime; ///< time of last display struct timespec StartTime; ///< decoder start time + int64_t PTS; ///< video PTS clock int FramesDuped; ///< frames duplicated int FramesDropped; ///< frames dropped @@ -800,6 +803,20 @@ static void VaapiCreateSurfaces(VaapiDecoder * decoder, int width, int height) Warning(_("video/vaapi: no osd subpicture yet\n")); return; } +#if 0 + // FIXME: try to fix intel osd bugs + if (vaDestroySubpicture(VaDisplay, VaOsdSubpicture) + != VA_STATUS_SUCCESS) { + Error(_("video/vaapi: can't destroy subpicture\n")); + } + VaOsdSubpicture = VA_INVALID_ID; + + if (vaCreateSubpicture(VaDisplay, VaOsdImage.image_id, + &VaOsdSubpicture) != VA_STATUS_SUCCESS) { + Error(_("video/vaapi: can't create subpicture\n")); + return; + } +#endif if (VaapiUnscaledOsd) { if (vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, @@ -845,6 +862,7 @@ static void VaapiDestroySurfaces(VaapiDecoder * decoder) Error(_("video/vaapi: can't deassociate %d surfaces\n"), decoder->SurfaceUsedN); } + } if (vaDestroySurfaces(decoder->VaDisplay, decoder->SurfacesFree, @@ -1014,6 +1032,8 @@ static VaapiDecoder *VaapiNewDecoder(void) decoder->VaapiContext->context_id = VA_INVALID_ID; #ifdef USE_GLX + decoder->GlxSurface[0] = VA_INVALID_ID; + decoder->GlxSurface[1] = VA_INVALID_ID; if (GlxEnabled) { // FIXME: create GLX context here } @@ -1051,12 +1071,22 @@ static void VaapiCleanup(VaapiDecoder * decoder) atomic_dec(&decoder->SurfacesFilled); surface = decoder->SurfacesRb[decoder->SurfaceRead]; - if (vaSyncSurface(decoder->VaDisplay, surface) + if (surface == VA_INVALID_ID) { + printf(_("video/vaapi: invalid surface in ringbuffer\n")); + Error(_("video/vaapi: invalid surface in ringbuffer\n")); + continue; + } + // can crash and hang + if (0 && vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } } + if (decoder->SurfaceRead != decoder->SurfaceWrite) { + abort(); + } + decoder->WrongInterlacedWarned = 0; // cleanup image @@ -1091,6 +1121,7 @@ static void VaapiCleanup(VaapiDecoder * decoder) VaapiDestroySurfaces(decoder); } + decoder->PTS = AV_NOPTS_VALUE; clock_gettime(CLOCK_REALTIME, &decoder->StartTime); } @@ -1103,7 +1134,7 @@ static void VaapiDelDecoder(VaapiDecoder * decoder) { VaapiCleanup(decoder); - if (decoder->BlackSurface) { + if (decoder->BlackSurface != VA_INVALID_ID) { if (vaDestroySurfaces(decoder->VaDisplay, &decoder->BlackSurface, 1) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy a surface\n")); @@ -1111,13 +1142,13 @@ static void VaapiDelDecoder(VaapiDecoder * decoder) } // FIXME: decoder->DeintImages #ifdef USE_GLX - if (decoder->GlxSurface[0]) { + if (decoder->GlxSurface[0] != VA_INVALID_ID) { if (vaDestroySurfaceGLX(VaDisplay, decoder->GlxSurface[0]) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy glx surface!\n")); } } - if (decoder->GlxSurface[1]) { + if (decoder->GlxSurface[1] != VA_INVALID_ID) { if (vaDestroySurfaceGLX(VaDisplay, decoder->GlxSurface[1]) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy glx surface!\n")); @@ -1161,14 +1192,18 @@ static void VideoVaapiInit(const char *display_name) VaDisplay = vaGetDisplay(XlibDisplay); } if (!VaDisplay) { - Fatal(_("video/vaapi: Can't connect VA-API to X11 server on '%s'"), + Error(_("video/vaapi: Can't connect VA-API to X11 server on '%s'"), display_name); // FIXME: no fatal for plugin + return; } if (vaInitialize(VaDisplay, &major, &minor) != VA_STATUS_SUCCESS) { - Fatal(_("video/vaapi: Can't inititialize VA-API on '%s'"), + Error(_("video/vaapi: Can't inititialize VA-API on '%s'"), display_name); + vaTerminate(VaDisplay); + VaDisplay = NULL; + return; } s = vaQueryVendorString(VaDisplay); Info(_("video/vaapi: libva %d.%d (%s) initialized\n"), major, minor, s); @@ -1181,6 +1216,9 @@ static void VideoVaapiInit(const char *display_name) setenv("VDPAU_VIDEO_PUTSURFACE_FAST", "0", 0); VaapiBuggyVdpau = 1; } + if (strstr(s, "Intel i965")) { + VaapiBuggyIntel = 1; + } // // check if driver makes a copy of the VA surface for display. // @@ -1210,6 +1248,15 @@ static void VideoVaapiExit(void) int i; // FIXME: more VA-API cleanups... + // FIXME: can hang with vdpau in pthread_rwlock_wrlock + + for (i = 0; i < VaapiDecoderN; ++i) { + if (VaapiDecoders[i]) { + VaapiDelDecoder(VaapiDecoders[i]); + VaapiDecoders[i] = NULL; + } + } + VaapiDecoderN = 0; if (VaOsdImage.image_id != VA_INVALID_ID) { if (vaDestroyImage(VaDisplay, @@ -1220,6 +1267,7 @@ static void VideoVaapiExit(void) } if (VaOsdSubpicture != VA_INVALID_ID) { + // still has 35 surfaces associated to it if (vaDestroySubpicture(VaDisplay, VaOsdSubpicture) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy subpicture\n")); @@ -1227,14 +1275,6 @@ static void VideoVaapiExit(void) VaOsdSubpicture = VA_INVALID_ID; } - for (i = 0; i < VaapiDecoderN; ++i) { - if (VaapiDecoders[i]) { - VaapiDelDecoder(VaapiDecoders[i]); - VaapiDecoders[i] = NULL; - } - } - VaapiDecoderN = 0; - if (!VaDisplay) { vaTerminate(VaDisplay); VaDisplay = NULL; @@ -1351,6 +1391,7 @@ static enum PixelFormat Vaapi_get_format(VaapiDecoder * decoder, VAConfigAttrib attrib; Debug(3, "video: new stream format %d\n", GetMsTicks() - VideoSwitch); + // create initial black surface and display VaapiBlackSurface(decoder); VaapiCleanup(decoder); @@ -1534,10 +1575,6 @@ static void VaapiPutSurfaceX11(VaapiDecoder * decoder, VASurfaceID surface, unsigned type; VAStatus status; - // fixes: [drm:i915_hangcheck_elapsed] *ERROR* Hangcheck - // timer elapsed... GPU hung - usleep(1 * 1000); - // deinterlace if (interlaced && VideoDeinterlace != VideoDeinterlaceWeave) { if (top_field_first) { @@ -1557,6 +1594,7 @@ static void VaapiPutSurfaceX11(VaapiDecoder * decoder, VASurfaceID surface, type = VA_FRAME_PICTURE; } + xcb_flush(Connection); if ((status = vaPutSurface(decoder->VaDisplay, surface, decoder->Window, // decoder src decoder->InputX, decoder->InputY, decoder->InputWidth, @@ -1585,10 +1623,6 @@ static void VaapiPutSurfaceX11(VaapiDecoder * decoder, VASurfaceID surface, surface, status); return; } - if (vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { - Error(_("video: vaSyncSurface failed\n")); - } - } if (0) { @@ -1606,6 +1640,7 @@ static void VaapiPutSurfaceX11(VaapiDecoder * decoder, VASurfaceID surface, usleep(1 * 1000); } } + } #ifdef USE_GLX @@ -1866,10 +1901,10 @@ static void VaapiQueueSurface(VaapiDecoder * decoder, VASurfaceID surface, if ((old = decoder->SurfacesRb[decoder->SurfaceWrite]) != VA_INVALID_ID) { - if (vaSyncSurface(decoder->VaDisplay, old) != VA_STATUS_SUCCESS) { +#if 0 + if (0 && vaSyncSurface(decoder->VaDisplay, old) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } -#if 0 VASurfaceStatus status; if (vaQuerySurfaceStatus(decoder->VaDisplay, old, &status) @@ -1881,7 +1916,9 @@ static void VaapiQueueSurface(VaapiDecoder * decoder, VASurfaceID surface, Warning(_ ("video/vaapi: surface %#x not ready: still displayed %d\n"), old, status); - if (vaSyncSurface(decoder->VaDisplay, old) != VA_STATUS_SUCCESS) { + if (0 + && vaSyncSurface(decoder->VaDisplay, + old) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } } @@ -1892,14 +1929,29 @@ static void VaapiQueueSurface(VaapiDecoder * decoder, VASurfaceID surface, VaapiReleaseSurface(decoder, old); } } -#if 0 - // - // associate the OSD with surface - // - if (vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, &surface, 1, 0, 0, - VaOsdImage.width, VaOsdImage.height, 0, 0, decoder->InputWidth, - decoder->InputHeight, 0) != VA_STATUS_SUCCESS) { - Error(_("video/vaapi: can't associate subpicture\n")); +#if 1 + // FIXME: intel seems to forget this, nvidia GT 210 has speed problems here + if (VaapiBuggyIntel && VaOsdSubpicture != VA_INVALID_ID) { + + // + // associate the OSD with surface + // + if (VaapiUnscaledOsd) { + if (vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, &surface, 1, + 0, 0, VaOsdImage.width, VaOsdImage.height, 0, 0, + VideoWindowWidth, VideoWindowHeight, + VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) + != VA_STATUS_SUCCESS) { + Error(_("video/vaapi: can't associate subpicture\n")); + } + } else { + if (vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, &surface, 1, + 0, 0, VaOsdImage.width, VaOsdImage.height, 0, 0, + decoder->InputWidth, decoder->InputHeight, 0) + != VA_STATUS_SUCCESS) { + Error(_("video/vaapi: can't associate subpicture\n")); + } + } } #endif @@ -1940,6 +1992,9 @@ static void FilterLine(const uint8_t * past, const uint8_t * cur, static void VaapiBlackSurface(VaapiDecoder * decoder) { VAStatus status; + uint32_t start; + uint32_t sync; + uint32_t put1; // wait until we have osd subpicture if (VaOsdSubpicture == VA_INVALID_ID) { @@ -1955,7 +2010,9 @@ static void VaapiBlackSurface(VaapiDecoder * decoder) return; } } + // FIXME: no need to re associate + // full sized surface, no difference unscaled/scaled osd if (vaAssociateSubpicture(decoder->VaDisplay, VaOsdSubpicture, &decoder->BlackSurface, 1, 0, 0, VaOsdImage.width, VaOsdImage.height, 0, 0, VideoWindowWidth, VideoWindowHeight, @@ -1963,6 +2020,7 @@ static void VaapiBlackSurface(VaapiDecoder * decoder) Error(_("video/vaapi: can't associate subpicture\n")); } + start = GetMsTicks(); if (vaSyncSurface(decoder->VaDisplay, decoder->BlackSurface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); @@ -1970,6 +2028,8 @@ static void VaapiBlackSurface(VaapiDecoder * decoder) Debug(4, "video/vaapi: yy black video surface %#x displayed\n", decoder->BlackSurface); + sync = GetMsTicks(); + xcb_flush(Connection); if ((status = vaPutSurface(decoder->VaDisplay, decoder->BlackSurface, decoder->Window, @@ -1982,12 +2042,16 @@ static void VaapiBlackSurface(VaapiDecoder * decoder) } clock_gettime(CLOCK_REALTIME, &decoder->FrameTime); - if (vaSyncSurface(decoder->VaDisplay, + put1 = GetMsTicks(); + Debug(4, "video/vaapi: sync %2u put1 %2u\n", sync - start, put1 - sync); + + if (0 + && vaSyncSurface(decoder->VaDisplay, decoder->BlackSurface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } - usleep(1 * 1000); + usleep(500); } /// @@ -2140,7 +2204,7 @@ static void VaapiCpuDeinterlace(VaapiDecoder * decoder, VASurfaceID surface) img1->num_planes); } - if (vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { + if (0 && vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } @@ -2163,7 +2227,7 @@ static void VaapiCpuDeinterlace(VaapiDecoder * decoder, VASurfaceID surface) Fatal("video/vaapi: can't put image!\n"); } VaapiQueueSurface(decoder, out1, 1); - if (vaSyncSurface(decoder->VaDisplay, out1) != VA_STATUS_SUCCESS) { + if (0 && vaSyncSurface(decoder->VaDisplay, out1) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } // get a free surface and upload the image @@ -2174,7 +2238,7 @@ static void VaapiCpuDeinterlace(VaapiDecoder * decoder, VASurfaceID surface) Fatal("video/vaapi: can't put image!\n"); } VaapiQueueSurface(decoder, out2, 1); - if (vaSyncSurface(decoder->VaDisplay, out2) != VA_STATUS_SUCCESS) { + if (0 && vaSyncSurface(decoder->VaDisplay, out2) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } // FIXME: must release software input surface @@ -2233,12 +2297,12 @@ static void VaapiRenderFrame(VaapiDecoder * decoder, VaapiUpdateOutput(decoder); } #else - if (av_cmp_q(decoder->InputAspect, frame->sample_aspect_ratio)) { + if (av_cmp_q(decoder->InputAspect, video_ctx->sample_aspect_ratio)) { Debug(3, "video/vaapi: aspect ratio changed\n"); //decoder->InputWidth = video_ctx->width; //decoder->InputHeight = video_ctx->height; - decoder->InputAspect = frame->sample_aspect_ratio; + decoder->InputAspect = video_ctx->sample_aspect_ratio; VaapiUpdateOutput(decoder); } #endif @@ -2371,10 +2435,13 @@ void VaapiDisplayFrame(void) atomic_dec(&decoder->SurfacesFilled); } - start = GetMsTicks(); surface = decoder->SurfacesRb[decoder->SurfaceRead]; + if (surface == VA_INVALID_ID) { + printf(_("video/vaapi: invalid surface in ringbuffer\n")); + } Debug(4, "video/vaapi: yy video surface %#x displayed\n", surface); + start = GetMsTicks(); if (vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); @@ -2387,22 +2454,33 @@ void VaapiDisplayFrame(void) put2 = put1; // deinterlace and full frame rate if (decoder->Interlaced) { + usleep(500); VaapiPutSurfaceX11(decoder, surface, decoder->Interlaced, decoder->TopFieldFirst, 1); + + if (0 && vaSyncSurface(decoder->VaDisplay, surface) + != VA_STATUS_SUCCESS) { + Error(_("video/vaapi: vaSyncSurface failed\n")); + } // FIXME: buggy libva-driver-vdpau. if (VaapiBuggyVdpau && VideoDeinterlace != VideoDeinterlaceWeave) { + usleep(500); VaapiPutSurfaceX11(decoder, surface, decoder->Interlaced, decoder->TopFieldFirst, 0); + usleep(500); VaapiPutSurfaceX11(decoder, surface, decoder->Interlaced, decoder->TopFieldFirst, 1); } put2 = GetMsTicks(); } - xcb_flush(Connection); + clock_gettime(CLOCK_REALTIME, &decoder->FrameTime); + + // fixes: [drm:i915_hangcheck_elapsed] *ERROR* Hangcheck + // timer elapsed... GPU hung + //usleep(1 * 1000); Debug(4, "video/vaapi: sync %2u put1 %2u put2 %2u\n", sync - start, put1 - sync, put2 - put1); - clock_gettime(CLOCK_REALTIME, &decoder->FrameTime); } else { Debug(3, "video/vaapi: no video surface ready\n"); } @@ -2503,6 +2581,8 @@ static void VaapiOsdInit(int width, int height) Debug(3, "video/vaapi: va-api not setup\n"); return; } + /*FIXME:return; */ + // // look through subpicture formats // @@ -2544,9 +2624,8 @@ static void VaapiOsdInit(int width, int height) Info(_("video/vaapi: vaapi supports unscaled osd\n")); VaapiUnscaledOsd = 1; } - // FIXME: - VaapiUnscaledOsd = 0; - Info(_("video/vaapi: unscaled osd disabled\n")); + //VaapiUnscaledOsd = 0; + //Info(_("video/vaapi: unscaled osd disabled\n")); if (vaCreateImage(VaDisplay, &formats[u], width, height, &VaOsdImage) != VA_STATUS_SUCCESS) { @@ -2556,6 +2635,13 @@ static void VaapiOsdInit(int width, int height) if (vaCreateSubpicture(VaDisplay, VaOsdImage.image_id, &VaOsdSubpicture) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create subpicture\n")); + + if (vaDestroyImage(VaDisplay, + VaOsdImage.image_id) != VA_STATUS_SUCCESS) { + Error(_("video/vaapi: can't destroy image!\n")); + } + VaOsdImage.image_id = VA_INVALID_ID; + return; } // FIXME: must store format, to convert ARGB to it. @@ -2950,29 +3036,36 @@ static void *VideoDisplayHandlerThread(void *dummy) struct timespec abstime; VaapiDecoder *decoder; uint64_t delay; + int64_t audio_clock; + int64_t video_clock; decoder = VaapiDecoders[0]; VideoPollEvent(); - // initial delay - delay = AudioGetDelay(); - if (delay < 100 * 90) { // no audio delay known - delay = 760 * 1000 * 1000; - } else { - delay = (delay * 1000 * 1000) / 90 + 60 * 1000 * 1000; + audio_clock = AudioGetClock(); + video_clock = audio_clock; + if ((uint64_t) audio_clock != AV_NOPTS_VALUE + && (uint64_t) decoder->PTS != AV_NOPTS_VALUE) { + video_clock = decoder->PTS - (decoder->Interlaced ? 40 : 20) * 90; } - clock_gettime(CLOCK_REALTIME, &nowtime); - if (!atomic_read(&decoder->SurfacesFilled) - || (uint64_t) ((nowtime.tv_sec - decoder->StartTime.tv_sec) - * 1000 * 1000 * 1000 + (nowtime.tv_nsec - - decoder->StartTime.tv_nsec)) > delay) { - if ((nowtime.tv_sec - decoder->StartTime.tv_sec) - * 1000 * 1000 * 1000 + (nowtime.tv_nsec - - decoder->StartTime.tv_nsec) - < 2000 * 1000 * 1000) { - Debug(3, "video: audio delay %lu ms\n", delay / (1000 * 1000)); + delay = 4 * 500L * 1000 * 1000; + clock_gettime(CLOCK_REALTIME, &nowtime); + + // wait until we got any surface + if (!atomic_read(&decoder->SurfacesFilled) + || video_clock < audio_clock + || ((uint64_t) ((nowtime.tv_sec - decoder->StartTime.tv_sec) + * 1000 * 1000 * 1000 + (nowtime.tv_nsec - + decoder->StartTime.tv_nsec)) > delay)) { + + if (!(decoder->FrameCounter % (50 * 10))) { + Debug(3, + "video: %09" PRIx64 "-%09" PRIx64 " pts %+dms %" PRId64 + "\n", audio_clock, video_clock, + (int)(audio_clock - video_clock) / 90, + AudioGetDelay() / 90); } // FIXME: hot polling pthread_mutex_lock(&VideoLockMutex); @@ -2988,9 +3081,13 @@ static void *VideoDisplayHandlerThread(void *dummy) ((nowtime.tv_sec - decoder->StartTime.tv_sec) * 1000 * 1000 * 1000 + (nowtime.tv_nsec - decoder->StartTime.tv_nsec)) / (1000 * 1000)); + Debug(3, + "video: %#012" PRIx64 "-%#012" PRIx64 " pts %+d ms %" PRId64 + "\n", audio_clock, video_clock, + (int)(audio_clock - video_clock) / 90, AudioGetDelay() / 90); abstime = nowtime; - abstime.tv_nsec += 18 * 1000 * 1000; + abstime.tv_nsec += 10 * 1000 * 1000; if (abstime.tv_nsec >= 1000 * 1000 * 1000) { // avoid overflow abstime.tv_sec++; @@ -3007,129 +3104,21 @@ static void *VideoDisplayHandlerThread(void *dummy) pthread_mutex_unlock(&VideoLockMutex); } + filled = atomic_read(&decoder->SurfacesFilled); clock_gettime(CLOCK_REALTIME, &nowtime); // time for one frame over, buggy for vaapi-vdpau - if ((nowtime.tv_sec - decoder->FrameTime.tv_sec) * 1000 * 1000 * 1000 + - (nowtime.tv_nsec - decoder->FrameTime.tv_nsec) < - (decoder->Interlaced ? 17 : 17) * 1000 * 1000) { + if (filled <= 1 && (nowtime.tv_sec - decoder->FrameTime.tv_sec) + * 1000 * 1000 * 1000 + (nowtime.tv_nsec - + decoder->FrameTime.tv_nsec) < + (decoder->Interlaced ? 15 : 15) * 1000 * 1000) { continue; } - filled = atomic_read(&decoder->SurfacesFilled); if (!filled) { pthread_mutex_lock(&VideoLockMutex); VaapiBlackSurface(decoder); pthread_mutex_unlock(&VideoLockMutex); - } else if (filled == 1) { - decoder->FramesDuped++; - ++decoder->FrameCounter; - if (!(decoder->FrameCounter % 333)) { - Warning(_ - ("video: display buffer empty, duping frame (%d/%d)\n"), - decoder->FramesDuped, decoder->FrameCounter); - VaapiPrintFrames(decoder); - } - } - - if (filled) { - pthread_mutex_lock(&VideoLockMutex); - VideoDisplayFrame(); - pthread_mutex_unlock(&VideoLockMutex); - } - } -#if 0 - for (;;) { - int err; - int filled; - struct timespec nowtime; - struct timespec abstime; - VaapiDecoder *decoder; - - clock_gettime(CLOCK_REALTIME, &abstime); - - VideoPollEvent(); - - // fill surface buffer - for (;;) { - static int max_filled; - uint32_t delay; - - clock_gettime(CLOCK_REALTIME, &nowtime); - // time to receive and decode over - if ((nowtime.tv_sec - abstime.tv_sec) * 1000 * 1000 * 1000 + - (nowtime.tv_nsec - abstime.tv_nsec) > - (decoder->Interlaced + 1) * 15 * 1000 * 1000) { - break; - } - - delay = 700 * 1000 * 1000; - // initial delay get decode only 1 frame - if ((nowtime.tv_sec - decoder->StartTime.tv_sec) - * 1000 * 1000 * 1000 + (nowtime.tv_nsec - - decoder->StartTime.tv_nsec) < delay) { - Debug(3, "video/vaapi: waiting %9lu ms\n", - ((nowtime.tv_sec - decoder->StartTime.tv_sec) - * 1000 * 1000 * 1000 + (nowtime.tv_nsec - - decoder->StartTime.tv_nsec)) / (1000 * 1000)); - - if (atomic_read(&decoder->SurfacesFilled)) { - break; - } - } - - if (atomic_read(&decoder->SurfacesFilled) >= 3) { - break; - } - // FIXME: hot polling - pthread_mutex_lock(&VideoLockMutex); - err = VideoDecode(); - pthread_mutex_unlock(&VideoLockMutex); - if (atomic_read(&decoder->SurfacesFilled) > 3) { - Debug(3, "video: %d filled\n", - atomic_read(&decoder->SurfacesFilled)); - if (atomic_read(&decoder->SurfacesFilled) > max_filled) { - max_filled = atomic_read(&decoder->SurfacesFilled); - } - } - if (err) { - usleep(1 * 1000); // nothing buffered - } - - } - - // wait up to 20ms - // FIXME: 50hz video frame rate hardcoded - abstime.tv_nsec += (decoder->Interlaced + 1) * 16 * 1000 * 1000; - if (abstime.tv_nsec >= 1000 * 1000 * 1000) { - // avoid overflow - abstime.tv_sec++; - abstime.tv_nsec -= 1000 * 1000 * 1000; - } - pthread_mutex_lock(&VideoMutex); - while ((err = - pthread_cond_timedwait(&VideoWakeupCond, &VideoMutex, - &abstime)) != ETIMEDOUT) { - Debug(3, "video/vaapi: pthread_cond_timedwait timeout\n"); - } - pthread_mutex_unlock(&VideoMutex); - if (err != ETIMEDOUT) { - Debug(3, "video/vaapi: pthread_cond_timedwait failed: %d\n", err); - } -#ifdef USE_GLX - //printf("video %p <-> %p\n", glXGetCurrentContext(), GlxThreadContext); - if (!glXMakeCurrent(XlibDisplay, VideoWindow, GlxThreadContext)) { - GlxCheck(); - Error(_("video/glx: can't make glx context current\n")); - return NULL; - } -#endif - - filled = atomic_read(&decoder->SurfacesFilled); - if (!filled) { - pthread_mutex_lock(&VideoLockMutex); - VaapiBlackSurface(decoder); - pthread_mutex_unlock(&VideoLockMutex); - } else if (filled == 1) { + } else if (filled == 1 || (Fix60Hz && !(decoder->FrameCounter % 6))) { decoder->FramesDuped++; ++decoder->FrameCounter; Warning(_("video: display buffer empty, duping frame (%d/%d)\n"), @@ -3144,15 +3133,7 @@ static void *VideoDisplayHandlerThread(void *dummy) VideoDisplayFrame(); pthread_mutex_unlock(&VideoLockMutex); } - - if (0) { - clock_gettime(CLOCK_REALTIME, &nowtime); - Debug(3, "video/vaapi: ticks %9lu ms\n", - ((nowtime.tv_sec - abstime.tv_sec) * 1000 * 1000 * 1000 + - (nowtime.tv_nsec - abstime.tv_nsec)) / (1000 * 1000)); - } } -#endif return dummy; } @@ -3194,7 +3175,7 @@ static void VideoThreadExit(void) if (VideoThread) { if (pthread_cancel(VideoThread)) { - Error(_("video: can't cancel video display thread\n")); + Error(_("video: can't queue cancel video display thread\n")); } if (pthread_join(VideoThread, &retval) || retval != PTHREAD_CANCELED) { Error(_("video: can't cancel video display thread\n")); @@ -3202,6 +3183,7 @@ static void VideoThreadExit(void) pthread_cond_destroy(&VideoWakeupCond); pthread_mutex_destroy(&VideoLockMutex); pthread_mutex_destroy(&VideoMutex); + VideoThread = 0; } } @@ -3386,6 +3368,16 @@ void VaapiTest(void) void VideoRenderFrame(VideoHwDecoder * decoder, AVCodecContext * video_ctx, AVFrame * frame) { + decoder->Vaapi.PTS += (decoder->Vaapi.Interlaced ? 40 : 20) * 90; + + // libav: sets only pkt_dts + if ((uint64_t) frame->pkt_dts != AV_NOPTS_VALUE) { + if (decoder->Vaapi.PTS != frame->pkt_dts) { + Debug(4, "video: %#012" PRIx64 "- %#012" PRIx64 " pts\n", + decoder->Vaapi.PTS, frame->pkt_dts); + decoder->Vaapi.PTS = frame->pkt_dts; + } + } if (!atomic_read(&decoder->Vaapi.SurfacesFilled)) { Debug(3, "video: new stream frame %d\n", GetMsTicks() - VideoSwitch); } @@ -3394,7 +3386,7 @@ void VideoRenderFrame(VideoHwDecoder * decoder, AVCodecContext * video_ctx, struct timespec abstime; abstime = decoder->Vaapi.FrameTime; - abstime.tv_nsec += 16 * 1000 * 1000; + abstime.tv_nsec += 10 * 1000 * 1000; if (abstime.tv_nsec >= 1000 * 1000 * 1000) { // avoid overflow abstime.tv_sec++;