diff --git a/src/Base/InputSource.cpp b/src/Base/InputSource.cpp index 61cbfb400..5f669e320 100644 --- a/src/Base/InputSource.cpp +++ b/src/Base/InputSource.cpp @@ -57,6 +57,8 @@ using namespace std; StdInputStream::StdInputStream( std::istream& Stream, XERCES_CPP_NAMESPACE_QUALIFIER MemoryManager* const manager ) : stream(Stream), fMemoryManager(manager) { + state.flags |= QTextCodec::IgnoreHeader; + state.flags |= QTextCodec::ConvertInvalidToNull; } @@ -84,37 +86,19 @@ unsigned int StdInputStream::readBytes( XMLByte* const toFill, const unsigned i stream.read((char *)toFill,maxToRead); XMLSize_t len = stream.gcount(); - // See http://de.wikipedia.org/wiki/UTF-8#Kodierung - for (XMLSize_t i=0; itoUnicode((char *)toFill, len, &state); + if (state.invalidChars > 0) { + // In case invalid characters were found decode back to 'utf-8' and replace + // them with '?' + // First, Qt replaces invalid characters with '\0' (see ConvertInvalidToNull) + // but Xerces doesn't like this because it handles this as termination. Thus, + // we have to go through the array and replace '\0' with '?'. + XMLSize_t pos = 0; + QByteArray ba = codec->fromUnicode(text); + for (int i=0; itoUnicode((char *)toFill, len, &state); + if (state.invalidChars > 0) { + // In case invalid characters were found decode back to 'utf-8' and replace + // them with '?' + // First, Qt replaces invalid characters with '\0' (see ConvertInvalidToNull) + // but Xerces doesn't like this because it handles this as termination. Thus, + // we have to go through the array and replace '\0' with '?'. + XMLSize_t pos = 0; + QByteArray ba = codec->fromUnicode(text); + for (int i=0; i #include #include +#include XERCES_CPP_NAMESPACE_BEGIN @@ -75,6 +76,7 @@ private : // ----------------------------------------------------------------------- std::istream &stream; XERCES_CPP_NAMESPACE_QUALIFIER MemoryManager* const fMemoryManager; + QTextCodec::ConverterState state; };