mirror of
https://github.com/VDR4Arch/vdr.git
synced 2023-10-10 13:36:52 +02:00
Fixed handling control characters in SI data in case of UTF-8 encoded strings
This commit is contained in:
parent
f4aabad2ae
commit
cee8341545
@ -2917,3 +2917,6 @@ Dirk Heiser <dirk-vdr@gmx.de>
|
|||||||
|
|
||||||
Ludi Kaleni <ludi113@hotmail.com>
|
Ludi Kaleni <ludi113@hotmail.com>
|
||||||
for suggesting to add the source character to channel names whenever they are displayed
|
for suggesting to add the source character to channel names whenever they are displayed
|
||||||
|
|
||||||
|
Mehdi Karamnejad <mehdi_karamnejad@sfu.ca>
|
||||||
|
for reporting a problem with garbled UTF-8 EPG data and helping to debug it
|
||||||
|
5
HISTORY
5
HISTORY
@ -7191,6 +7191,9 @@ Video Disk Recorder Revision History
|
|||||||
turn on adding the source character to channel names whenever they are displayed
|
turn on adding the source character to channel names whenever they are displayed
|
||||||
(suggested by Ludi Kaleni).
|
(suggested by Ludi Kaleni).
|
||||||
|
|
||||||
2012-07-15: Version 1.7.30
|
2012-08-21: Version 1.7.30
|
||||||
|
|
||||||
- Fixed sorting recordings in the top level video directory.
|
- Fixed sorting recordings in the top level video directory.
|
||||||
|
- Fixed handling control characters in SI data in case of UTF-8 encoded strings
|
||||||
|
(thanks to Mehdi Karamnejad for reporting a problem with garbled UTF-8 EPG data
|
||||||
|
and helping to debug it).
|
||||||
|
138
libsi/si.c
138
libsi/si.c
@ -6,7 +6,7 @@
|
|||||||
* the Free Software Foundation; either version 2 of the License, or *
|
* the Free Software Foundation; either version 2 of the License, or *
|
||||||
* (at your option) any later version. *
|
* (at your option) any later version. *
|
||||||
* *
|
* *
|
||||||
* $Id: si.c 2.6 2011/12/10 15:47:15 kls Exp $
|
* $Id: si.c 2.7 2012/08/21 08:10:00 kls Exp $
|
||||||
* *
|
* *
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
@ -405,6 +405,21 @@ bool convertCharacterTable(const char *from, size_t fromLength, char *to, size_t
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A similar version is used in VDR/tools.c:
|
||||||
|
static int Utf8CharLen(const char *s)
|
||||||
|
{
|
||||||
|
if (SystemCharacterTableIsSingleByte)
|
||||||
|
return 1;
|
||||||
|
#define MT(s, m, v) ((*(s) & (m)) == (v)) // Mask Test
|
||||||
|
if (MT(s, 0xE0, 0xC0) && MT(s + 1, 0xC0, 0x80))
|
||||||
|
return 2;
|
||||||
|
if (MT(s, 0xF0, 0xE0) && MT(s + 1, 0xC0, 0x80) && MT(s + 2, 0xC0, 0x80))
|
||||||
|
return 3;
|
||||||
|
if (MT(s, 0xF8, 0xF0) && MT(s + 1, 0xC0, 0x80) && MT(s + 2, 0xC0, 0x80) && MT(s + 3, 0xC0, 0x80))
|
||||||
|
return 4;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// originally from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
|
// originally from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
|
||||||
void String::decodeText(char *buffer, int size) {
|
void String::decodeText(char *buffer, int size) {
|
||||||
const unsigned char *from=data.getData(0);
|
const unsigned char *from=data.getData(0);
|
||||||
@ -413,82 +428,71 @@ void String::decodeText(char *buffer, int size) {
|
|||||||
if (len <= 0) {
|
if (len <= 0) {
|
||||||
*to = '\0';
|
*to = '\0';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
bool singleByte;
|
bool singleByte;
|
||||||
const char *cs = getCharacterTable(from, len, &singleByte);
|
const char *cs = getCharacterTable(from, len, &singleByte);
|
||||||
// FIXME Need to make this UTF-8 aware (different control codes).
|
if (singleByte && SystemCharacterTableIsSingleByte || !convertCharacterTable((const char *)from, len, to, size, cs)) {
|
||||||
// However, there's yet to be found a broadcaster that actually
|
if (len >= size)
|
||||||
// uses UTF-8 for the SI data... (kls 2007-06-10)
|
len = size - 1;
|
||||||
for (int i = 0; i < len; i++) {
|
strncpy(to, (const char *)from, len);
|
||||||
if (*from == 0)
|
to[len] = 0;
|
||||||
break;
|
|
||||||
if ( ((' ' <= *from) && (*from <= '~'))
|
|
||||||
|| (*from == '\n')
|
|
||||||
|| (0xA0 <= *from)
|
|
||||||
)
|
|
||||||
*to++ = *from;
|
|
||||||
else if (*from == 0x8A)
|
|
||||||
*to++ = '\n';
|
|
||||||
from++;
|
|
||||||
if (to - buffer >= size - 1)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
*to = '\0';
|
else
|
||||||
if (!singleByte || !SystemCharacterTableIsSingleByte) {
|
len = strlen(to); // might have changed
|
||||||
char convBuffer[size];
|
// Handle control codes:
|
||||||
if (convertCharacterTable(buffer, strlen(buffer), convBuffer, sizeof(convBuffer), cs))
|
while (len > 0) {
|
||||||
strncpy(buffer, convBuffer, strlen(convBuffer) + 1);
|
int l = Utf8CharLen(to);
|
||||||
|
if (l <= 2) {
|
||||||
|
unsigned char *p = (unsigned char *)to;
|
||||||
|
if (l == 2 && *p == 0xC2) // UTF-8 sequence
|
||||||
|
p++;
|
||||||
|
bool Move = true;
|
||||||
|
switch (*p) {
|
||||||
|
case 0x8A: *to = '\n'; break;
|
||||||
|
case 0xA0: *to = ' '; break;
|
||||||
|
default: Move = false;
|
||||||
|
}
|
||||||
|
if (l == 2 && Move) {
|
||||||
|
memmove(p, p + 1, len - 1); // we also copy the terminating 0!
|
||||||
|
l = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
to += l;
|
||||||
|
len -= l;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
|
void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
|
||||||
const unsigned char *from=data.getData(0);
|
decodeText(buffer, sizeBuffer);
|
||||||
char *to=buffer;
|
if (!*buffer) {
|
||||||
char *toShort=shortVersion;
|
*shortVersion = '\0';
|
||||||
int IsShortName=0;
|
|
||||||
int len=getLength();
|
|
||||||
if (len <= 0) {
|
|
||||||
*to = '\0';
|
|
||||||
*toShort = '\0';
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
bool singleByte;
|
|
||||||
const char *cs = getCharacterTable(from, len, &singleByte);
|
|
||||||
// FIXME Need to make this UTF-8 aware (different control codes).
|
|
||||||
// However, there's yet to be found a broadcaster that actually
|
|
||||||
// uses UTF-8 for the SI data... (kls 2007-06-10)
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
if ( ((' ' <= *from) && (*from <= '~'))
|
|
||||||
|| (*from == '\n')
|
|
||||||
|| (0xA0 <= *from)
|
|
||||||
)
|
|
||||||
{
|
|
||||||
*to++ = *from;
|
|
||||||
if (IsShortName)
|
|
||||||
*toShort++ = *from;
|
|
||||||
}
|
|
||||||
else if (*from == 0x8A)
|
|
||||||
*to++ = '\n';
|
|
||||||
else if (*from == 0x86)
|
|
||||||
IsShortName++;
|
|
||||||
else if (*from == 0x87)
|
|
||||||
IsShortName--;
|
|
||||||
else if (*from == 0)
|
|
||||||
break;
|
|
||||||
from++;
|
|
||||||
if (to - buffer >= sizeBuffer - 1 || toShort - shortVersion >= sizeShortVersion - 1)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
*to = '\0';
|
// Handle control codes:
|
||||||
*toShort = '\0';
|
char *to=buffer;
|
||||||
if (!singleByte || !SystemCharacterTableIsSingleByte) {
|
int len=strlen(to);
|
||||||
char convBuffer[sizeBuffer];
|
int IsShortName=0;
|
||||||
if (convertCharacterTable(buffer, strlen(buffer), convBuffer, sizeof(convBuffer), cs))
|
while (len > 0) {
|
||||||
strncpy(buffer, convBuffer, strlen(convBuffer) + 1);
|
int l = Utf8CharLen(to);
|
||||||
char convShortVersion[sizeShortVersion];
|
unsigned char *p = (unsigned char *)to;
|
||||||
if (convertCharacterTable(shortVersion, strlen(shortVersion), convShortVersion, sizeof(convShortVersion), cs))
|
if (l == 2 && *p == 0xC2) // UTF-8 sequence
|
||||||
strncpy(shortVersion, convShortVersion, strlen(convShortVersion) + 1);
|
p++;
|
||||||
|
if (*p == 0x86 || *p == 0x87) {
|
||||||
|
IsShortName += (*p == 0x86) ? 1 : -1;
|
||||||
|
memmove(to, to + l, len - l + 1); // we also copy the terminating 0!
|
||||||
|
l = 0;
|
||||||
|
}
|
||||||
|
if (l && IsShortName) {
|
||||||
|
if (l < sizeShortVersion) {
|
||||||
|
for (int i = 0; i < l; i++)
|
||||||
|
*shortVersion++ = to[i];
|
||||||
|
sizeShortVersion -= l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
to += l;
|
||||||
|
len -= l;
|
||||||
}
|
}
|
||||||
|
*shortVersion = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
Descriptor *Descriptor::getDescriptor(CharArray da, DescriptorTagDomain domain, bool returnUnimplemetedDescriptor) {
|
Descriptor *Descriptor::getDescriptor(CharArray da, DescriptorTagDomain domain, bool returnUnimplemetedDescriptor) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user