mirror of
https://github.com/VDR4Arch/vdr.git
synced 2023-10-10 13:36:52 +02:00
Implemented character set conversion in 'libsi'
This commit is contained in:
parent
6d88da9385
commit
2ac9030e65
21
HISTORY
21
HISTORY
@ -5139,10 +5139,29 @@ Video Disk Recorder Revision History
|
||||
parameter to 0 turns off the automatic channel switching, and the user will
|
||||
have to confirm the entry by pressing the "Ok" key.
|
||||
|
||||
2007-03-10: Version 1.5.2
|
||||
2007-04-22: Version 1.5.2
|
||||
|
||||
- Updated the Finnish OSD texts (thanks to Rolf Ahrenberg).
|
||||
- Fixed handling user activity for shutdown, which I had messed when adopting Udo's
|
||||
original patch (thanks to Udo Richter).
|
||||
- Added Turkish language texts (thanks to Oktay Yolgeçen).
|
||||
- Added missing rules for generating iso8859-13 font to Makefile.
|
||||
- 'libsi' now converts the incoming strings into the system's character set
|
||||
according to the DVB standard. The system's character set is determined from
|
||||
the LANG environment variable. If no recognizable setting can be found, no
|
||||
conversion will take place. Note that currently only the strings received from the
|
||||
SI data stream are converted, there have not been any changes regarding displaying
|
||||
UTF-8 characters on the OSD, yet - this will follow in one of the next steps.
|
||||
With this conversion, it should now be safe to run VDR on a UTF-8 file system,
|
||||
because all incoming characters are converted to UTF-8. This will most likely
|
||||
result in wrong characters being displayed on the OSD (because there UTF-8 is
|
||||
not known, yet), but the file names should be ok (haven't tested this myself,
|
||||
though, because I don't do UTF-8 - so please be very careful when testing!).
|
||||
There's one piece of bad news here: the German pay-tv broadcaster Premiere
|
||||
apparently encodes all EPG strings as ISO8859-1, but fails to correctly mark
|
||||
these strings as such. Therefore 'libsi' (following the DVB standard) considers
|
||||
the strings to be encoded in the default ISO6937 and converts them to whatever
|
||||
the system's character set is. This, of course, results in wrong umlauts.
|
||||
On its old transponder, the ProSieben/SAT.1 channels also had their EPG data
|
||||
wrongly encoded, but apparently on the new transponder they started broadcasting
|
||||
on this month, they got it right.
|
||||
|
196
libsi/si.c
196
libsi/si.c
@ -6,12 +6,15 @@
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
* $Id: si.c 1.17 2007/02/03 11:45:58 kls Exp $
|
||||
* $Id: si.c 1.18 2007/04/22 13:56:39 kls Exp $
|
||||
* *
|
||||
***************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
#include "si.h"
|
||||
#include <errno.h>
|
||||
#include <iconv.h>
|
||||
#include <malloc.h>
|
||||
#include <string.h>
|
||||
#include "descriptor.h"
|
||||
|
||||
namespace SI {
|
||||
@ -232,7 +235,6 @@ char *String::getText(char *buffer, int size) {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
//taken from VDR, Copyright Klaus Schmidinger <kls@cadsoft.de>
|
||||
char *String::getText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
|
||||
int len=getLength();
|
||||
if (len < 0 || len >= sizeBuffer) {
|
||||
@ -245,21 +247,163 @@ char *String::getText(char *buffer, char *shortVersion, int sizeBuffer, int size
|
||||
return buffer;
|
||||
}
|
||||
|
||||
//taken from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
|
||||
static const char *CharacterTables1[] = {
|
||||
NULL, // 0x00
|
||||
"ISO8859-5", // 0x01
|
||||
"ISO8859-6", // 0x02
|
||||
"ISO8859-7", // 0x03
|
||||
"ISO8859-8", // 0x04
|
||||
"ISO8859-9", // 0x05
|
||||
"ISO8859-10", // 0x06
|
||||
"ISO8859-11", // 0x07
|
||||
"ISO8859-12", // 0x08
|
||||
"ISO8859-13", // 0x09
|
||||
"ISO8859-14", // 0x0A
|
||||
"ISO8859-15", // 0x0B
|
||||
NULL, // 0x0C
|
||||
NULL, // 0x0D
|
||||
NULL, // 0x0E
|
||||
NULL, // 0x0F
|
||||
NULL, // 0x10
|
||||
"UTF16", // 0x11
|
||||
"EUC-KR", // 0x12
|
||||
"GB2312", // 0x13
|
||||
"GBK", // 0x14
|
||||
"UTF8", // 0x15
|
||||
NULL, // 0x16
|
||||
NULL, // 0x17
|
||||
NULL, // 0x18
|
||||
NULL, // 0x19
|
||||
NULL, // 0x1A
|
||||
NULL, // 0x1B
|
||||
NULL, // 0x1C
|
||||
NULL, // 0x1D
|
||||
NULL, // 0x1E
|
||||
NULL, // 0x1F
|
||||
};
|
||||
|
||||
#define SingleByteLimit 0x0B
|
||||
|
||||
static const char *CharacterTables2[] = {
|
||||
NULL, // 0x00
|
||||
"ISO8859-1", // 0x01
|
||||
"ISO8859-2", // 0x02
|
||||
"ISO8859-3", // 0x03
|
||||
"ISO8859-4", // 0x04
|
||||
"ISO8859-5", // 0x05
|
||||
"ISO8859-6", // 0x06
|
||||
"ISO8859-7", // 0x07
|
||||
"ISO8859-8", // 0x08
|
||||
"ISO8859-9", // 0x09
|
||||
"ISO8859-10", // 0x0A
|
||||
"ISO8859-11", // 0x0B
|
||||
NULL, // 0x0C
|
||||
"ISO8859-13", // 0x0D
|
||||
"ISO8859-14", // 0x0E
|
||||
"ISO8859-15", // 0x0F
|
||||
};
|
||||
|
||||
#define NumEntries(Table) (sizeof(Table) / sizeof(char *))
|
||||
|
||||
static const char *SystemCharacterTable = NULL;
|
||||
bool SystemCharacterTableIsSingleByte = true;
|
||||
|
||||
bool SetSystemCharacterTable(const char *CharacterTable) {
|
||||
if (CharacterTable) {
|
||||
for (unsigned int i = 0; i < NumEntries(CharacterTables1); i++) {
|
||||
if (CharacterTables1[i] && strcasecmp(CharacterTable, CharacterTables1[i]) == 0) {
|
||||
SystemCharacterTable = CharacterTables1[i];
|
||||
SystemCharacterTableIsSingleByte = i <= SingleByteLimit;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (unsigned int i = 0; i < NumEntries(CharacterTables2); i++) {
|
||||
if (CharacterTables2[i] && strcasecmp(CharacterTable, CharacterTables2[i]) == 0) {
|
||||
SystemCharacterTable = CharacterTables2[i];
|
||||
SystemCharacterTableIsSingleByte = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SystemCharacterTable = NULL;
|
||||
SystemCharacterTableIsSingleByte = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Determines the character table used in the given buffer and returns
|
||||
// a string indicating that table. If no table can be determined, the
|
||||
// default ISO6937 is returned. If a table can be determined, the buffer
|
||||
// and length are adjusted accordingly.
|
||||
static const char *getCharacterTable(const unsigned char *&buffer, int &length, bool *isSingleByte = NULL) {
|
||||
const char *cs = "ISO6937";
|
||||
if (isSingleByte)
|
||||
*isSingleByte = false;
|
||||
if (length <= 0)
|
||||
return cs;
|
||||
unsigned int tag = buffer[0];
|
||||
if (tag >= 0x20)
|
||||
return cs;
|
||||
if (tag == 0x10) {
|
||||
if (length >= 3) {
|
||||
tag = (buffer[1] << 8) | buffer[2];
|
||||
if (tag < NumEntries(CharacterTables2) && CharacterTables2[tag]) {
|
||||
buffer += 3;
|
||||
length -= 3;
|
||||
if (isSingleByte)
|
||||
*isSingleByte = true;
|
||||
return CharacterTables2[tag];
|
||||
}
|
||||
}
|
||||
} else if (tag < NumEntries(CharacterTables1) && CharacterTables1[tag]) {
|
||||
buffer += 1;
|
||||
length -= 1;
|
||||
if (isSingleByte)
|
||||
*isSingleByte = tag <= SingleByteLimit;
|
||||
return CharacterTables1[tag];
|
||||
}
|
||||
return cs;
|
||||
}
|
||||
|
||||
static bool convertCharacterTable(const char *from, size_t fromLength, char *to, size_t toLength, const char *fromCode)
|
||||
{
|
||||
if (SystemCharacterTable) {
|
||||
iconv_t cd = iconv_open(SystemCharacterTable, fromCode);
|
||||
if (cd >= 0) {
|
||||
char *fromPtr = (char *)from;
|
||||
while (fromLength > 0 && toLength > 1) {
|
||||
if (iconv(cd, &fromPtr, &fromLength, &to, &toLength) == size_t(-1)) {
|
||||
if (errno == EILSEQ) {
|
||||
// A character can't be converted, so mark it with '?' and proceed:
|
||||
fromPtr++;
|
||||
fromLength--;
|
||||
*to++ = '?';
|
||||
toLength--;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
*to = 0;
|
||||
iconv_close(cd);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// originally from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
|
||||
void String::decodeText(char *buffer, int size) {
|
||||
const unsigned char *from=data.getData(0);
|
||||
char *to=buffer;
|
||||
|
||||
/* Disable detection of coding tables - libdtv doesn't do it either
|
||||
if ( (0x01 <= *from) && (*from <= 0x1f) ) {
|
||||
codeTable=*from
|
||||
}
|
||||
*/
|
||||
|
||||
if (*from == 0x10)
|
||||
from += 3; // skips code table info
|
||||
|
||||
int len=getLength();
|
||||
if (len <= 0) {
|
||||
*to = '\0';
|
||||
return;
|
||||
}
|
||||
bool singleByte;
|
||||
const char *cs = getCharacterTable(from, len, &singleByte);
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (*from == 0)
|
||||
break;
|
||||
@ -276,6 +420,11 @@ void String::decodeText(char *buffer, int size) {
|
||||
break;
|
||||
}
|
||||
*to = '\0';
|
||||
if (!singleByte || !SystemCharacterTableIsSingleByte) {
|
||||
char convBuffer[size];
|
||||
if (convertCharacterTable(buffer, strlen(buffer), convBuffer, sizeof(convBuffer), cs))
|
||||
strncpy(buffer, convBuffer, strlen(convBuffer) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
|
||||
@ -283,11 +432,14 @@ void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int si
|
||||
char *to=buffer;
|
||||
char *toShort=shortVersion;
|
||||
int IsShortName=0;
|
||||
|
||||
if (*from == 0x10)
|
||||
from += 3; // skips code table info
|
||||
|
||||
int len=getLength();
|
||||
if (len <= 0) {
|
||||
*to = '\0';
|
||||
*toShort = '\0';
|
||||
return;
|
||||
}
|
||||
bool singleByte;
|
||||
const char *cs = getCharacterTable(from, len, &singleByte);
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ( ((' ' <= *from) && (*from <= '~'))
|
||||
|| (*from == '\n')
|
||||
@ -312,6 +464,14 @@ void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int si
|
||||
}
|
||||
*to = '\0';
|
||||
*toShort = '\0';
|
||||
if (!singleByte || !SystemCharacterTableIsSingleByte) {
|
||||
char convBuffer[sizeBuffer];
|
||||
if (convertCharacterTable(buffer, strlen(buffer), convBuffer, sizeof(convBuffer), cs))
|
||||
strncpy(buffer, convBuffer, strlen(convBuffer) + 1);
|
||||
char convShortVersion[sizeShortVersion];
|
||||
if (convertCharacterTable(shortVersion, strlen(shortVersion), convShortVersion, sizeof(convShortVersion), cs))
|
||||
strncpy(shortVersion, convShortVersion, strlen(convShortVersion) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
Descriptor *Descriptor::getDescriptor(CharArray da, DescriptorTagDomain domain, bool returnUnimplemetedDescriptor) {
|
||||
|
@ -6,7 +6,7 @@
|
||||
* the Free Software Foundation; either version 2 of the License, or *
|
||||
* (at your option) any later version. *
|
||||
* *
|
||||
* $Id: si.h 1.16 2007/02/03 11:47:25 kls Exp $
|
||||
* $Id: si.h 1.17 2007/04/22 13:32:09 kls Exp $
|
||||
* *
|
||||
***************************************************************************/
|
||||
|
||||
@ -486,6 +486,11 @@ protected:
|
||||
void decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion);
|
||||
};
|
||||
|
||||
// Call this function to set the system character table. CharacterTable is a string
|
||||
// like "iso8859-15" or "utf-8" (case insensitive).
|
||||
// Returns true if the character table was recognized.
|
||||
bool SetSystemCharacterTable(const char *CharacterTable);
|
||||
|
||||
} //end of namespace
|
||||
|
||||
#endif //LIBSI_SI_H
|
||||
|
23
vdr.c
23
vdr.c
@ -22,7 +22,7 @@
|
||||
*
|
||||
* The project's page is at http://www.cadsoft.de/vdr
|
||||
*
|
||||
* $Id: vdr.c 1.286 2007/03/10 13:00:22 kls Exp $
|
||||
* $Id: vdr.c 1.287 2007/04/22 13:28:32 kls Exp $
|
||||
*/
|
||||
|
||||
#include <getopt.h>
|
||||
@ -455,15 +455,6 @@ int main(int argc, char *argv[])
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check for UTF-8 and exit if present - asprintf() will fail if it encounters 8 bit ASCII codes
|
||||
char *LangEnv;
|
||||
if ((LangEnv = getenv("LANG")) != NULL && strcasestr(LangEnv, "utf") ||
|
||||
(LangEnv = getenv("LC_ALL")) != NULL && strcasestr(LangEnv, "utf") ||
|
||||
(LangEnv = getenv("LC_CTYPE")) != NULL && strcasestr(LangEnv, "utf")) {
|
||||
fprintf(stderr, "vdr: please turn off UTF-8 before starting VDR\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Log file:
|
||||
|
||||
if (SysLogLevel > 0)
|
||||
@ -500,6 +491,18 @@ int main(int argc, char *argv[])
|
||||
dsyslog("running as daemon (tid=%d)", cThread::ThreadId());
|
||||
cThread::SetMainThreadId();
|
||||
|
||||
// Set the system character table:
|
||||
|
||||
char *LangEnv = getenv("LANG");
|
||||
if (LangEnv) {
|
||||
char *CodeSet = strchr(LangEnv, '.');
|
||||
if (CodeSet) {
|
||||
CodeSet++; // skip the dot
|
||||
bool known = SI::SetSystemCharacterTable(CodeSet);
|
||||
isyslog("codeset is '%s' - %s", CodeSet, known ? "known" : "unknown");
|
||||
}
|
||||
}
|
||||
|
||||
// Main program loop variables - need to be here to have them initialized before any EXIT():
|
||||
|
||||
cOsdObject *Menu = NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user