Little speed improved Intel VA-API deinterlace.

This commit is contained in:
Johns 2012-01-17 18:53:53 +01:00
parent 9f668c4750
commit 19d4eeed82
1 changed files with 122 additions and 8 deletions

130
video.c
View File

@ -2388,40 +2388,54 @@ static void VaapiBlackSurface(VaapiDecoder * decoder)
static void VaapiBob(VaapiDecoder * decoder, VAImage * src, VAImage * dst1, static void VaapiBob(VaapiDecoder * decoder, VAImage * src, VAImage * dst1,
VAImage * dst2) VAImage * dst2)
{ {
uint32_t tick1;
uint32_t tick2;
uint32_t tick3;
uint32_t tick4;
uint32_t tick5;
uint32_t tick6;
uint32_t tick7;
uint32_t tick8;
void *src_base; void *src_base;
void *dst1_base; void *dst1_base;
void *dst2_base; void *dst2_base;
unsigned y; unsigned y;
unsigned p; unsigned p;
tick1 = GetMsTicks();
if (vaMapBuffer(decoder->VaDisplay, src->buf, if (vaMapBuffer(decoder->VaDisplay, src->buf,
&src_base) != VA_STATUS_SUCCESS) { &src_base) != VA_STATUS_SUCCESS) {
Fatal("video/vaapi: can't map the image!\n"); Fatal("video/vaapi: can't map the image!\n");
} }
tick2 = GetMsTicks();
if (vaMapBuffer(decoder->VaDisplay, dst1->buf, if (vaMapBuffer(decoder->VaDisplay, dst1->buf,
&dst1_base) != VA_STATUS_SUCCESS) { &dst1_base) != VA_STATUS_SUCCESS) {
Fatal("video/vaapi: can't map the image!\n"); Fatal("video/vaapi: can't map the image!\n");
} }
tick3 = GetMsTicks();
if (vaMapBuffer(decoder->VaDisplay, dst2->buf, if (vaMapBuffer(decoder->VaDisplay, dst2->buf,
&dst2_base) != VA_STATUS_SUCCESS) { &dst2_base) != VA_STATUS_SUCCESS) {
Fatal("video/vaapi: can't map the image!\n"); Fatal("video/vaapi: can't map the image!\n");
} }
tick4 = GetMsTicks();
if (0) { // test all updated if (0) { // test all updated
memset(dst1_base, 0x00, dst1->data_size); memset(dst1_base, 0x00, dst1->data_size);
memset(dst2_base, 0xFF, dst2->data_size); memset(dst2_base, 0xFF, dst2->data_size);
return; return;
} }
#if 0
// interleave
for (p = 0; p < src->num_planes; ++p) { for (p = 0; p < src->num_planes; ++p) {
for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) {
memcpy(dst1_base + src->offsets[p] + y * src->pitches[p], memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p],
src_base + src->offsets[p] + y * src->pitches[p], src_base + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]); src->pitches[p]);
memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p],
src_base + src->offsets[p] + y * src->pitches[p], src_base + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]); src->pitches[p]);
memcpy(dst2_base + src->offsets[p] + y * src->pitches[p], memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p],
src_base + src->offsets[p] + (y + 1) * src->pitches[p], src_base + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]); src->pitches[p]);
memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p],
@ -2429,16 +2443,116 @@ static void VaapiBob(VaapiDecoder * decoder, VAImage * src, VAImage * dst1,
src->pitches[p]); src->pitches[p]);
} }
} }
#endif
#if 1
// use tmp copy
if (1) {
uint8_t *tmp;
tmp = malloc(src->data_size);
memcpy(tmp, src_base, src->data_size);
for (p = 0; p < src->num_planes; ++p) {
for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) {
memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p],
tmp + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]);
memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p],
tmp + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]);
memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p],
tmp + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]);
memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p],
tmp + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]);
}
}
free(tmp);
}
#endif
#if 0
// use multiple tmp copy
if (1) {
uint8_t *tmp_src;
uint8_t *tmp_dst1;
uint8_t *tmp_dst2;
tmp_src = malloc(src->data_size);
memcpy(tmp_src, src_base, src->data_size);
tmp_dst1 = malloc(src->data_size);
tmp_dst2 = malloc(src->data_size);
for (p = 0; p < src->num_planes; ++p) {
for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) {
memcpy(tmp_dst1 + src->offsets[p] + (y + 0) * src->pitches[p],
tmp_src + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]);
memcpy(tmp_dst1 + src->offsets[p] + (y + 1) * src->pitches[p],
tmp_src + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]);
memcpy(tmp_dst2 + src->offsets[p] + (y + 0) * src->pitches[p],
tmp_src + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]);
memcpy(tmp_dst2 + src->offsets[p] + (y + 1) * src->pitches[p],
tmp_src + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]);
}
}
memcpy(dst1_base, tmp_dst1, src->data_size);
memcpy(dst2_base, tmp_dst2, src->data_size);
free(tmp_src);
free(tmp_dst1);
free(tmp_dst2);
}
#endif
#if 0
// dst1 first
for (p = 0; p < src->num_planes; ++p) {
for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) {
memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p],
src_base + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]);
memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p],
src_base + src->offsets[p] + (y + 0) * src->pitches[p],
src->pitches[p]);
}
}
// dst2 next
for (p = 0; p < src->num_planes; ++p) {
for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) {
memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p],
src_base + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]);
memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p],
src_base + src->offsets[p] + (y + 1) * src->pitches[p],
src->pitches[p]);
}
}
#endif
tick5 = GetMsTicks();
if (vaUnmapBuffer(decoder->VaDisplay, dst2->buf) != VA_STATUS_SUCCESS) { if (vaUnmapBuffer(decoder->VaDisplay, dst2->buf) != VA_STATUS_SUCCESS) {
Error(_("video/vaapi: can't unmap image buffer\n")); Error(_("video/vaapi: can't unmap image buffer\n"));
} }
tick6 = GetMsTicks();
if (vaUnmapBuffer(decoder->VaDisplay, dst1->buf) != VA_STATUS_SUCCESS) { if (vaUnmapBuffer(decoder->VaDisplay, dst1->buf) != VA_STATUS_SUCCESS) {
Error(_("video/vaapi: can't unmap image buffer\n")); Error(_("video/vaapi: can't unmap image buffer\n"));
} }
tick7 = GetMsTicks();
if (vaUnmapBuffer(decoder->VaDisplay, src->buf) != VA_STATUS_SUCCESS) { if (vaUnmapBuffer(decoder->VaDisplay, src->buf) != VA_STATUS_SUCCESS) {
Error(_("video/vaapi: can't unmap image buffer\n")); Error(_("video/vaapi: can't unmap image buffer\n"));
} }
tick8 = GetMsTicks();
Debug(3, "video/vaapi: map=%2d/%2d/%2d deint=%2d umap=%2d/%2d/%2d\n",
tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4,
tick6 - tick5, tick7 - tick6, tick8 - tick7);
} }
/// ///
@ -2576,7 +2690,7 @@ static void VaapiCpuDerive(VaapiDecoder * decoder, VASurfaceID surface)
tick5 = GetMsTicks(); tick5 = GetMsTicks();
Debug(3, "video/vaapi: get=%2d get1=%2d get2=%d deint=%2d\n", Debug(4, "video/vaapi: get=%2d get1=%2d get2=%d deint=%2d\n",
tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4); tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4);
} }
@ -2645,12 +2759,12 @@ static void VaapiCpuPut(VaapiDecoder * decoder, VASurfaceID surface)
abort(); abort();
} }
VaapiQueueSurface(decoder, out, 1); VaapiQueueSurface(decoder, out, 1);
if (1 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) { if (0 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) {
Error(_("video/vaapi: vaSyncSurface failed\n")); Error(_("video/vaapi: vaSyncSurface failed\n"));
} }
tick4 = GetMsTicks(); tick4 = GetMsTicks();
Debug(3, "video/vaapi: deint %d %#010x -> %#010x\n", decoder->SurfaceField, Debug(4, "video/vaapi: deint %d %#010x -> %#010x\n", decoder->SurfaceField,
surface, out); surface, out);
// get a free surface and upload the image // get a free surface and upload the image
@ -2664,7 +2778,7 @@ static void VaapiCpuPut(VaapiDecoder * decoder, VASurfaceID surface)
Error("video/vaapi: can't put image!\n"); Error("video/vaapi: can't put image!\n");
} }
VaapiQueueSurface(decoder, out, 1); VaapiQueueSurface(decoder, out, 1);
if (1 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) { if (0 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) {
Error(_("video/vaapi: vaSyncSurface failed\n")); Error(_("video/vaapi: vaSyncSurface failed\n"));
} }
tick5 = GetMsTicks(); tick5 = GetMsTicks();