39In Utf<8>::decode(In begin, In end,
Uint32& output,
Uint32 replacement)
42 static const int trailing[256] =
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
53 static const Uint32 offsets[6] =
55 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
59 int trailingBytes = trailing[
static_cast<Uint8>(*begin)];
60 if (begin + trailingBytes < end)
63 switch (trailingBytes)
65 case 5: output +=
static_cast<Uint8>(*begin++); output <<= 6;
66 case 4: output +=
static_cast<Uint8>(*begin++); output <<= 6;
67 case 3: output +=
static_cast<Uint8>(*begin++); output <<= 6;
68 case 2: output +=
static_cast<Uint8>(*begin++); output <<= 6;
69 case 1: output +=
static_cast<Uint8>(*begin++); output <<= 6;
70 case 0: output +=
static_cast<Uint8>(*begin++);
72 output -= offsets[trailingBytes];
86template <
typename Out>
87Out Utf<8>::encode(
Uint32 input, Out output,
Uint8 replacement)
90 static const Uint8 firstBytes[7] =
92 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
96 if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
100 *output++ = replacement;
107 std::size_t bytestoWrite = 1;
108 if (input < 0x80) bytestoWrite = 1;
109 else if (input < 0x800) bytestoWrite = 2;
110 else if (input < 0x10000) bytestoWrite = 3;
111 else if (input <= 0x0010FFFF) bytestoWrite = 4;
115 switch (bytestoWrite)
117 case 4: bytes[3] =
static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
118 case 3: bytes[2] =
static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
119 case 2: bytes[1] =
static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
120 case 1: bytes[0] =
static_cast<Uint8> (input | firstBytes[bytestoWrite]);
124 output = std::copy(bytes, bytes + bytestoWrite, output);
132template <
typename In>
133In Utf<8>::next(In begin, In end)
136 return decode(begin, end, codepoint);
141template <
typename In>
142std::size_t Utf<8>::count(In begin, In end)
144 std::size_t length = 0;
147 begin = next(begin, end);
156template <
typename In,
typename Out>
157Out Utf<8>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
161 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
162 output = encode(codepoint, output);
170template <
typename In,
typename Out>
171Out Utf<8>::fromWide(In begin, In end, Out output)
175 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
176 output = encode(codepoint, output);
184template <
typename In,
typename Out>
185Out Utf<8>::fromLatin1(In begin, In end, Out output)
190 output = encode(*begin++, output);
197template <
typename In,
typename Out>
198Out Utf<8>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
203 begin = decode(begin, end, codepoint);
204 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
212template <
typename In,
typename Out>
213Out Utf<8>::toWide(In begin, In end, Out output,
wchar_t replacement)
218 begin = decode(begin, end, codepoint);
219 output = Utf<32>::encodeWide(codepoint, output, replacement);
227template <
typename In,
typename Out>
228Out Utf<8>::toLatin1(In begin, In end, Out output,
char replacement)
235 begin = decode(begin, end, codepoint);
236 *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
244template <
typename In,
typename Out>
245Out Utf<8>::toUtf8(In begin, In end, Out output)
247 return std::copy(begin, end, output);
252template <
typename In,
typename Out>
253Out Utf<8>::toUtf16(In begin, In end, Out output)
258 begin = decode(begin, end, codepoint);
259 output = Utf<16>::encode(codepoint, output);
267template <
typename In,
typename Out>
268Out Utf<8>::toUtf32(In begin, In end, Out output)
273 begin = decode(begin, end, codepoint);
274 *output++ = codepoint;
282template <
typename In>
283In Utf<16>::decode(In begin, In end,
Uint32& output,
Uint32 replacement)
288 if ((first >= 0xD800) && (first <= 0xDBFF))
293 if ((second >= 0xDC00) && (second <= 0xDFFF))
296 output =
static_cast<Uint32>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
301 output = replacement;
308 output = replacement;
322template <
typename Out>
323Out Utf<16>::encode(
Uint32 input, Out output,
Uint16 replacement)
328 if ((input >= 0xD800) && (input <= 0xDFFF))
332 *output++ = replacement;
337 *output++ =
static_cast<Uint16>(input);
340 else if (input > 0x0010FFFF)
344 *output++ = replacement;
350 *output++ =
static_cast<Uint16>((input >> 10) + 0xD800);
351 *output++ =
static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
359template <
typename In>
360In Utf<16>::next(In begin, In end)
363 return decode(begin, end, codepoint);
368template <
typename In>
369std::size_t Utf<16>::count(In begin, In end)
371 std::size_t length = 0;
374 begin = next(begin, end);
383template <
typename In,
typename Out>
384Out Utf<16>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
388 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
389 output = encode(codepoint, output);
397template <
typename In,
typename Out>
398Out Utf<16>::fromWide(In begin, In end, Out output)
402 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
403 output = encode(codepoint, output);
411template <
typename In,
typename Out>
412Out Utf<16>::fromLatin1(In begin, In end, Out output)
416 return std::copy(begin, end, output);
421template <
typename In,
typename Out>
422Out Utf<16>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
427 begin = decode(begin, end, codepoint);
428 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
436template <
typename In,
typename Out>
437Out Utf<16>::toWide(In begin, In end, Out output,
wchar_t replacement)
442 begin = decode(begin, end, codepoint);
443 output = Utf<32>::encodeWide(codepoint, output, replacement);
451template <
typename In,
typename Out>
452Out Utf<16>::toLatin1(In begin, In end, Out output,
char replacement)
458 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
467template <
typename In,
typename Out>
468Out Utf<16>::toUtf8(In begin, In end, Out output)
473 begin = decode(begin, end, codepoint);
474 output = Utf<8>::encode(codepoint, output);
482template <
typename In,
typename Out>
483Out Utf<16>::toUtf16(In begin, In end, Out output)
485 return std::copy(begin, end, output);
490template <
typename In,
typename Out>
491Out Utf<16>::toUtf32(In begin, In end, Out output)
496 begin = decode(begin, end, codepoint);
497 *output++ = codepoint;
505template <
typename In>
506In Utf<32>::decode(In begin, In ,
Uint32& output,
Uint32 )
514template <
typename Out>
523template <
typename In>
524In Utf<32>::next(In begin, In )
531template <
typename In>
532std::size_t Utf<32>::count(In begin, In end)
539template <
typename In,
typename Out>
540Out Utf<32>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
543 *output++ = decodeAnsi(*begin++, locale);
550template <
typename In,
typename Out>
551Out Utf<32>::fromWide(In begin, In end, Out output)
554 *output++ = decodeWide(*begin++);
561template <
typename In,
typename Out>
562Out Utf<32>::fromLatin1(In begin, In end, Out output)
566 return std::copy(begin, end, output);
571template <
typename In,
typename Out>
572Out Utf<32>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
575 output = encodeAnsi(*begin++, output, replacement, locale);
582template <
typename In,
typename Out>
583Out Utf<32>::toWide(In begin, In end, Out output,
wchar_t replacement)
586 output = encodeWide(*begin++, output, replacement);
593template <
typename In,
typename Out>
594Out Utf<32>::toLatin1(In begin, In end, Out output,
char replacement)
600 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
609template <
typename In,
typename Out>
610Out Utf<32>::toUtf8(In begin, In end, Out output)
613 output = Utf<8>::encode(*begin++, output);
619template <
typename In,
typename Out>
620Out Utf<32>::toUtf16(In begin, In end, Out output)
623 output = Utf<16>::encode(*begin++, output);
630template <
typename In,
typename Out>
631Out Utf<32>::toUtf32(In begin, In end, Out output)
633 return std::copy(begin, end, output);
638template <
typename In>
639Uint32 Utf<32>::decodeAnsi(In input,
const std::locale& locale)
646 #if defined(SFML_SYSTEM_WINDOWS) && \
647 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
648 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
659 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
662 return static_cast<Uint32>(facet.widen(input));
669template <
typename In>
670Uint32 Utf<32>::decodeWide(In input)
683template <
typename Out>
684Out Utf<32>::encodeAnsi(
Uint32 codepoint, Out output,
char replacement,
const std::locale& locale)
691 #if defined(SFML_SYSTEM_WINDOWS) && \
692 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
693 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
698 if (wctomb(&
character,
static_cast<wchar_t>(codepoint)) >= 0)
700 else if (replacement)
701 *output++ = replacement;
708 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
711 *output++ = facet.narrow(
static_cast<wchar_t>(codepoint), replacement);
720template <
typename Out>
721Out Utf<32>::encodeWide(
Uint32 codepoint, Out output,
wchar_t replacement)
729 switch (
sizeof(
wchar_t))
733 *output++ =
static_cast<wchar_t>(codepoint);
739 if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
741 *output++ =
static_cast<wchar_t>(codepoint);
743 else if (replacement)
745 *output++ = replacement;