(root)/
gcc-13.2.0/
libstdc++-v3/
testsuite/
22_locale/
codecvt/
codecvt_unicode.h
       1  // Copyright (C) 2020-2023 Free Software Foundation, Inc.
       2  //
       3  // This file is part of the GNU ISO C++ Library.  This library is free
       4  // software; you can redistribute it and/or modify it under the
       5  // terms of the GNU General Public License as published by the
       6  // Free Software Foundation; either version 3, or (at your option)
       7  // any later version.
       8  
       9  // This library is distributed in the hope that it will be useful,
      10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12  // GNU General Public License for more details.
      13  
      14  // You should have received a copy of the GNU General Public License along
      15  // with this library; see the file COPYING3.  If not see
      16  // <http://www.gnu.org/licenses/>.
      17  
      18  #include <algorithm>
      19  #include <locale>
      20  #include <string>
      21  #include <testsuite_hooks.h>
      22  
      23  struct test_offsets_ok
      24  {
      25    size_t in_size, out_size;
      26  };
      27  struct test_offsets_partial
      28  {
      29    size_t in_size, out_size, expected_in_next, expected_out_next;
      30  };
      31  
      32  template <class CharT> struct test_offsets_error
      33  {
      34    size_t in_size, out_size, expected_in_next, expected_out_next;
      35    CharT replace_char;
      36    size_t replace_pos;
      37  };
      38  
      39  template <class T, size_t N>
      40  auto constexpr array_size (const T (&)[N]) -> size_t
      41  {
      42    return N;
      43  }
      44  
      45  template <class CharT>
      46  void
      47  utf8_to_utf32_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
      48  {
      49    using namespace std;
      50    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
      51    const char in[] = "\uAAAA\U0010AAAA";
      52    const char32_t exp_literal[] = U"\uAAAA\U0010AAAA";
      53    CharT exp[array_size (exp_literal)] = {};
      54    std::copy (begin (exp_literal), end (exp_literal), begin (exp));
      55  
      56    static_assert (array_size (in) == 11, "");
      57    static_assert (array_size (exp_literal) == 5, "");
      58    static_assert (array_size (exp) == 5, "");
      59    VERIFY (char_traits<char>::length (in) == 10);
      60    VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
      61    VERIFY (char_traits<CharT>::length (exp) == 4);
      62  
      63    test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
      64    for (auto t : offsets)
      65      {
      66        CharT out[array_size (exp) - 1] = {};
      67        VERIFY (t.in_size <= array_size (in));
      68        VERIFY (t.out_size <= array_size (out));
      69        auto state = mbstate_t{};
      70        auto in_next = (const char *) nullptr;
      71        auto out_next = (CharT *) nullptr;
      72        auto res = codecvt_base::result ();
      73  
      74        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
      75  		    out_next);
      76        VERIFY (res == cvt.ok);
      77        VERIFY (in_next == in + t.in_size);
      78        VERIFY (out_next == out + t.out_size);
      79        VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
      80        if (t.out_size < array_size (out))
      81  	VERIFY (out[t.out_size] == 0);
      82      }
      83  
      84    for (auto t : offsets)
      85      {
      86        CharT out[array_size (exp)] = {};
      87        VERIFY (t.in_size <= array_size (in));
      88        VERIFY (t.out_size <= array_size (out));
      89        auto state = mbstate_t{};
      90        auto in_next = (const char *) nullptr;
      91        auto out_next = (CharT *) nullptr;
      92        auto res = codecvt_base::result ();
      93  
      94        res
      95  	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
      96        VERIFY (res == cvt.ok);
      97        VERIFY (in_next == in + t.in_size);
      98        VERIFY (out_next == out + t.out_size);
      99        VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
     100        if (t.out_size < array_size (out))
     101  	VERIFY (out[t.out_size] == 0);
     102      }
     103  }
     104  
     105  template <class CharT>
     106  void
     107  utf8_to_utf32_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
     108  {
     109    using namespace std;
     110    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     111    const char in[] = "\uAAAA\U0010AAAA";
     112    const char32_t exp_literal[] = U"\uAAAA\U0010AAAA";
     113    CharT exp[array_size (exp_literal)] = {};
     114    std::copy (begin (exp_literal), end (exp_literal), begin (exp));
     115  
     116    static_assert (array_size (in) == 11, "");
     117    static_assert (array_size (exp_literal) == 5, "");
     118    static_assert (array_size (exp) == 5, "");
     119    VERIFY (char_traits<char>::length (in) == 10);
     120    VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
     121    VERIFY (char_traits<CharT>::length (exp) == 4);
     122  
     123    test_offsets_partial offsets[] = {
     124      {1, 0, 0, 0}, // no space for first CP
     125  
     126      {3, 1, 1, 1}, // no space for second CP
     127      {2, 2, 1, 1}, // incomplete second CP
     128      {2, 1, 1, 1}, // incomplete second CP, and no space for it
     129  
     130      {6, 2, 3, 2}, // no space for third CP
     131      {4, 3, 3, 2}, // incomplete third CP
     132      {5, 3, 3, 2}, // incomplete third CP
     133      {4, 2, 3, 2}, // incomplete third CP, and no space for it
     134      {5, 2, 3, 2}, // incomplete third CP, and no space for it
     135  
     136      {10, 3, 6, 3}, // no space for fourth CP
     137      {7, 4, 6, 3},  // incomplete fourth CP
     138      {8, 4, 6, 3},  // incomplete fourth CP
     139      {9, 4, 6, 3},  // incomplete fourth CP
     140      {7, 3, 6, 3},  // incomplete fourth CP, and no space for it
     141      {8, 3, 6, 3},  // incomplete fourth CP, and no space for it
     142      {9, 3, 6, 3},  // incomplete fourth CP, and no space for it
     143    };
     144  
     145    for (auto t : offsets)
     146      {
     147        CharT out[array_size (exp) - 1] = {};
     148        VERIFY (t.in_size <= array_size (in));
     149        VERIFY (t.out_size <= array_size (out));
     150        VERIFY (t.expected_in_next <= t.in_size);
     151        VERIFY (t.expected_out_next <= t.out_size);
     152        auto state = mbstate_t{};
     153        auto in_next = (const char *) nullptr;
     154        auto out_next = (CharT *) nullptr;
     155        auto res = codecvt_base::result ();
     156  
     157        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     158  		    out_next);
     159        VERIFY (res == cvt.partial);
     160        VERIFY (in_next == in + t.expected_in_next);
     161        VERIFY (out_next == out + t.expected_out_next);
     162        VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
     163        if (t.expected_out_next < array_size (out))
     164  	VERIFY (out[t.expected_out_next] == 0);
     165      }
     166  }
     167  
     168  template <class CharT>
     169  void
     170  utf8_to_utf32_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     171  {
     172    using namespace std;
     173    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     174    const char valid_in[] = "\uAAAA\U0010AAAA";
     175    const char32_t exp_literal[] = U"\uAAAA\U0010AAAA";
     176    CharT exp[array_size (exp_literal)] = {};
     177    std::copy (begin (exp_literal), end (exp_literal), begin (exp));
     178  
     179    static_assert (array_size (valid_in) == 11, "");
     180    static_assert (array_size (exp_literal) == 5, "");
     181    static_assert (array_size (exp) == 5, "");
     182    VERIFY (char_traits<char>::length (valid_in) == 10);
     183    VERIFY (char_traits<char32_t>::length (exp_literal) == 4);
     184    VERIFY (char_traits<CharT>::length (exp) == 4);
     185  
     186    test_offsets_error<char> offsets[] = {
     187  
     188      // replace leading byte with invalid byte
     189      {1, 4, 0, 0, '\xFF', 0},
     190      {3, 4, 1, 1, '\xFF', 1},
     191      {6, 4, 3, 2, '\xFF', 3},
     192      {10, 4, 6, 3, '\xFF', 6},
     193  
     194      // replace first trailing byte with ASCII byte
     195      {3, 4, 1, 1, 'z', 2},
     196      {6, 4, 3, 2, 'z', 4},
     197      {10, 4, 6, 3, 'z', 7},
     198  
     199      // replace first trailing byte with invalid byte
     200      {3, 4, 1, 1, '\xFF', 2},
     201      {6, 4, 3, 2, '\xFF', 4},
     202      {10, 4, 6, 3, '\xFF', 7},
     203  
     204      // replace second trailing byte with ASCII byte
     205      {6, 4, 3, 2, 'z', 5},
     206      {10, 4, 6, 3, 'z', 8},
     207  
     208      // replace second trailing byte with invalid byte
     209      {6, 4, 3, 2, '\xFF', 5},
     210      {10, 4, 6, 3, '\xFF', 8},
     211  
     212      // replace third trailing byte
     213      {10, 4, 6, 3, 'z', 9},
     214      {10, 4, 6, 3, '\xFF', 9},
     215  
     216      // replace first trailing byte with ASCII byte, also incomplete at end
     217      {5, 4, 3, 2, 'z', 4},
     218      {8, 4, 6, 3, 'z', 7},
     219      {9, 4, 6, 3, 'z', 7},
     220  
     221      // replace first trailing byte with invalid byte, also incomplete at end
     222      {5, 4, 3, 2, '\xFF', 4},
     223      {8, 4, 6, 3, '\xFF', 7},
     224      {9, 4, 6, 3, '\xFF', 7},
     225  
     226      // replace second trailing byte with ASCII byte, also incomplete at end
     227      {9, 4, 6, 3, 'z', 8},
     228  
     229      // replace second trailing byte with invalid byte, also incomplete at end
     230      {9, 4, 6, 3, '\xFF', 8},
     231    };
     232    for (auto t : offsets)
     233      {
     234        char in[array_size (valid_in)] = {};
     235        CharT out[array_size (exp) - 1] = {};
     236        VERIFY (t.in_size <= array_size (in));
     237        VERIFY (t.out_size <= array_size (out));
     238        VERIFY (t.expected_in_next <= t.in_size);
     239        VERIFY (t.expected_out_next <= t.out_size);
     240        char_traits<char>::copy (in, valid_in, array_size (valid_in));
     241        in[t.replace_pos] = t.replace_char;
     242  
     243        auto state = mbstate_t{};
     244        auto in_next = (const char *) nullptr;
     245        auto out_next = (CharT *) nullptr;
     246        auto res = codecvt_base::result ();
     247  
     248        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     249  		    out_next);
     250        VERIFY (res == cvt.error);
     251        VERIFY (in_next == in + t.expected_in_next);
     252        VERIFY (out_next == out + t.expected_out_next);
     253        VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
     254        if (t.expected_out_next < array_size (out))
     255  	VERIFY (out[t.expected_out_next] == 0);
     256      }
     257  }
     258  
     259  template <class CharT>
     260  void
     261  utf8_to_utf32_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
     262  {
     263    utf8_to_utf32_in_ok (cvt);
     264    utf8_to_utf32_in_partial (cvt);
     265    utf8_to_utf32_in_error (cvt);
     266  }
     267  
     268  template <class CharT>
     269  void
     270  utf32_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
     271  {
     272    using namespace std;
     273    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     274    const char32_t in_literal[] = U"\uAAAA\U0010AAAA";
     275    const char exp[] = "\uAAAA\U0010AAAA";
     276    CharT in[array_size (in_literal)] = {};
     277    copy (begin (in_literal), end (in_literal), begin (in));
     278  
     279    static_assert (array_size (in_literal) == 5, "");
     280    static_assert (array_size (in) == 5, "");
     281    static_assert (array_size (exp) == 11, "");
     282    VERIFY (char_traits<char32_t>::length (in_literal) == 4);
     283    VERIFY (char_traits<CharT>::length (in) == 4);
     284    VERIFY (char_traits<char>::length (exp) == 10);
     285  
     286    const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
     287    for (auto t : offsets)
     288      {
     289        char out[array_size (exp) - 1] = {};
     290        VERIFY (t.in_size <= array_size (in));
     291        VERIFY (t.out_size <= array_size (out));
     292        auto state = mbstate_t{};
     293        auto in_next = (const CharT *) nullptr;
     294        auto out_next = (char *) nullptr;
     295        auto res = codecvt_base::result ();
     296  
     297        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
     298  		     out_next);
     299        VERIFY (res == cvt.ok);
     300        VERIFY (in_next == in + t.in_size);
     301        VERIFY (out_next == out + t.out_size);
     302        VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
     303        if (t.out_size < array_size (out))
     304  	VERIFY (out[t.out_size] == 0);
     305      }
     306  }
     307  
     308  template <class CharT>
     309  void
     310  utf32_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
     311  {
     312    using namespace std;
     313    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     314    const char32_t in_literal[] = U"\uAAAA\U0010AAAA";
     315    const char exp[] = "\uAAAA\U0010AAAA";
     316    CharT in[array_size (in_literal)] = {};
     317    copy (begin (in_literal), end (in_literal), begin (in));
     318  
     319    static_assert (array_size (in_literal) == 5, "");
     320    static_assert (array_size (in) == 5, "");
     321    static_assert (array_size (exp) == 11, "");
     322    VERIFY (char_traits<char32_t>::length (in_literal) == 4);
     323    VERIFY (char_traits<CharT>::length (in) == 4);
     324    VERIFY (char_traits<char>::length (exp) == 10);
     325  
     326    const test_offsets_partial offsets[] = {
     327      {1, 0, 0, 0}, // no space for first CP
     328  
     329      {2, 1, 1, 1}, // no space for second CP
     330      {2, 2, 1, 1}, // no space for second CP
     331  
     332      {3, 3, 2, 3}, // no space for third CP
     333      {3, 4, 2, 3}, // no space for third CP
     334      {3, 5, 2, 3}, // no space for third CP
     335  
     336      {4, 6, 3, 6}, // no space for fourth CP
     337      {4, 7, 3, 6}, // no space for fourth CP
     338      {4, 8, 3, 6}, // no space for fourth CP
     339      {4, 9, 3, 6}, // no space for fourth CP
     340    };
     341    for (auto t : offsets)
     342      {
     343        char out[array_size (exp) - 1] = {};
     344        VERIFY (t.in_size <= array_size (in));
     345        VERIFY (t.out_size <= array_size (out));
     346        VERIFY (t.expected_in_next <= t.in_size);
     347        VERIFY (t.expected_out_next <= t.out_size);
     348        auto state = mbstate_t{};
     349        auto in_next = (const CharT *) nullptr;
     350        auto out_next = (char *) nullptr;
     351        auto res = codecvt_base::result ();
     352  
     353        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
     354  		     out_next);
     355        VERIFY (res == cvt.partial);
     356        VERIFY (in_next == in + t.expected_in_next);
     357        VERIFY (out_next == out + t.expected_out_next);
     358        VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
     359        if (t.expected_out_next < array_size (out))
     360  	VERIFY (out[t.expected_out_next] == 0);
     361      }
     362  }
     363  
     364  template <class CharT>
     365  void
     366  utf32_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     367  {
     368    using namespace std;
     369    const char32_t valid_in[] = U"\uAAAA\U0010AAAA";
     370    const char exp[] = "\uAAAA\U0010AAAA";
     371  
     372    static_assert (array_size (valid_in) == 5, "");
     373    static_assert (array_size (exp) == 11, "");
     374    VERIFY (char_traits<char32_t>::length (valid_in) == 4);
     375    VERIFY (char_traits<char>::length (exp) == 10);
     376  
     377    test_offsets_error<CharT> offsets[] = {{4, 10, 0, 0, 0x00110000, 0},
     378  					 {4, 10, 1, 1, 0x00110000, 1},
     379  					 {4, 10, 2, 3, 0x00110000, 2},
     380  					 {4, 10, 3, 6, 0x00110000, 3}};
     381  
     382    for (auto t : offsets)
     383      {
     384        CharT in[array_size (valid_in)] = {};
     385        char out[array_size (exp) - 1] = {};
     386        VERIFY (t.in_size <= array_size (in));
     387        VERIFY (t.out_size <= array_size (out));
     388        VERIFY (t.expected_in_next <= t.in_size);
     389        VERIFY (t.expected_out_next <= t.out_size);
     390        copy (begin (valid_in), end (valid_in), begin (in));
     391        in[t.replace_pos] = t.replace_char;
     392  
     393        auto state = mbstate_t{};
     394        auto in_next = (const CharT *) nullptr;
     395        auto out_next = (char *) nullptr;
     396        auto res = codecvt_base::result ();
     397  
     398        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
     399  		     out_next);
     400        VERIFY (res == cvt.error);
     401        VERIFY (in_next == in + t.expected_in_next);
     402        VERIFY (out_next == out + t.expected_out_next);
     403        VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
     404        if (t.expected_out_next < array_size (out))
     405  	VERIFY (out[t.expected_out_next] == 0);
     406      }
     407  }
     408  
     409  template <class CharT>
     410  void
     411  utf32_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
     412  {
     413    utf32_to_utf8_out_ok (cvt);
     414    utf32_to_utf8_out_partial (cvt);
     415    utf32_to_utf8_out_error (cvt);
     416  }
     417  
     418  template <class CharT>
     419  void
     420  test_utf8_utf32_codecvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
     421  {
     422    utf8_to_utf32_in (cvt);
     423    utf32_to_utf8_out (cvt);
     424  }
     425  
     426  template <class CharT>
     427  void
     428  utf8_to_utf16_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
     429  {
     430    using namespace std;
     431    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     432    const char in[] = "\uAAAA\U0010AAAA";
     433    const char16_t exp_literal[] = u"\uAAAA\U0010AAAA";
     434    CharT exp[array_size (exp_literal)] = {};
     435    copy (begin (exp_literal), end (exp_literal), begin (exp));
     436  
     437    static_assert (array_size (in) == 11, "");
     438    static_assert (array_size (exp_literal) == 6, "");
     439    static_assert (array_size (exp) == 6, "");
     440    VERIFY (char_traits<char>::length (in) == 10);
     441    VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
     442    VERIFY (char_traits<CharT>::length (exp) == 5);
     443  
     444    test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
     445    for (auto t : offsets)
     446      {
     447        CharT out[array_size (exp) - 1] = {};
     448        VERIFY (t.in_size <= array_size (in));
     449        VERIFY (t.out_size <= array_size (out));
     450        auto state = mbstate_t{};
     451        auto in_next = (const char *) nullptr;
     452        auto out_next = (CharT *) nullptr;
     453        auto res = codecvt_base::result ();
     454  
     455        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     456  		    out_next);
     457        VERIFY (res == cvt.ok);
     458        VERIFY (in_next == in + t.in_size);
     459        VERIFY (out_next == out + t.out_size);
     460        VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
     461        if (t.out_size < array_size (out))
     462  	VERIFY (out[t.out_size] == 0);
     463      }
     464  
     465    for (auto t : offsets)
     466      {
     467        CharT out[array_size (exp)] = {};
     468        VERIFY (t.in_size <= array_size (in));
     469        VERIFY (t.out_size <= array_size (out));
     470        auto state = mbstate_t{};
     471        auto in_next = (const char *) nullptr;
     472        auto out_next = (CharT *) nullptr;
     473        auto res = codecvt_base::result ();
     474  
     475        res
     476  	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
     477        VERIFY (res == cvt.ok);
     478        VERIFY (in_next == in + t.in_size);
     479        VERIFY (out_next == out + t.out_size);
     480        VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
     481        if (t.out_size < array_size (out))
     482  	VERIFY (out[t.out_size] == 0);
     483      }
     484  }
     485  
     486  template <class CharT>
     487  void
     488  utf8_to_utf16_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
     489  {
     490    using namespace std;
     491    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     492    const char in[] = "\uAAAA\U0010AAAA";
     493    const char16_t exp_literal[] = u"\uAAAA\U0010AAAA";
     494    CharT exp[array_size (exp_literal)] = {};
     495    copy (begin (exp_literal), end (exp_literal), begin (exp));
     496  
     497    static_assert (array_size (in) == 11, "");
     498    static_assert (array_size (exp_literal) == 6, "");
     499    static_assert (array_size (exp) == 6, "");
     500    VERIFY (char_traits<char>::length (in) == 10);
     501    VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
     502    VERIFY (char_traits<CharT>::length (exp) == 5);
     503  
     504    test_offsets_partial offsets[] = {
     505      {1, 0, 0, 0}, // no space for first CP
     506  
     507      {3, 1, 1, 1}, // no space for second CP
     508      {2, 2, 1, 1}, // incomplete second CP
     509      {2, 1, 1, 1}, // incomplete second CP, and no space for it
     510  
     511      {6, 2, 3, 2}, // no space for third CP
     512      {4, 3, 3, 2}, // incomplete third CP
     513      {5, 3, 3, 2}, // incomplete third CP
     514      {4, 2, 3, 2}, // incomplete third CP, and no space for it
     515      {5, 2, 3, 2}, // incomplete third CP, and no space for it
     516  
     517      {10, 3, 6, 3}, // no space for fourth CP
     518      {10, 4, 6, 3}, // no space for fourth CP
     519      {7, 5, 6, 3},  // incomplete fourth CP
     520      {8, 5, 6, 3},  // incomplete fourth CP
     521      {9, 5, 6, 3},  // incomplete fourth CP
     522      {7, 3, 6, 3},  // incomplete fourth CP, and no space for it
     523      {8, 3, 6, 3},  // incomplete fourth CP, and no space for it
     524      {9, 3, 6, 3},  // incomplete fourth CP, and no space for it
     525      {7, 4, 6, 3},  // incomplete fourth CP, and no space for it
     526      {8, 4, 6, 3},  // incomplete fourth CP, and no space for it
     527      {9, 4, 6, 3},  // incomplete fourth CP, and no space for it
     528  
     529    };
     530  
     531    for (auto t : offsets)
     532      {
     533        CharT out[array_size (exp) - 1] = {};
     534        VERIFY (t.in_size <= array_size (in));
     535        VERIFY (t.out_size <= array_size (out));
     536        VERIFY (t.expected_in_next <= t.in_size);
     537        VERIFY (t.expected_out_next <= t.out_size);
     538        auto state = mbstate_t{};
     539        auto in_next = (const char *) nullptr;
     540        auto out_next = (CharT *) nullptr;
     541        auto res = codecvt_base::result ();
     542  
     543        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     544  		    out_next);
     545        VERIFY (res == cvt.partial);
     546        VERIFY (in_next == in + t.expected_in_next);
     547        VERIFY (out_next == out + t.expected_out_next);
     548        VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
     549        if (t.expected_out_next < array_size (out))
     550  	VERIFY (out[t.expected_out_next] == 0);
     551      }
     552  }
     553  
     554  template <class CharT>
     555  void
     556  utf8_to_utf16_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     557  {
     558    using namespace std;
     559    const char valid_in[] = "\uAAAA\U0010AAAA";
     560    const char16_t exp_literal[] = u"\uAAAA\U0010AAAA";
     561    CharT exp[array_size (exp_literal)] = {};
     562    copy (begin (exp_literal), end (exp_literal), begin (exp));
     563  
     564    static_assert (array_size (valid_in) == 11, "");
     565    static_assert (array_size (exp_literal) == 6, "");
     566    static_assert (array_size (exp) == 6, "");
     567    VERIFY (char_traits<char>::length (valid_in) == 10);
     568    VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
     569    VERIFY (char_traits<CharT>::length (exp) == 5);
     570  
     571    test_offsets_error<char> offsets[] = {
     572  
     573      // replace leading byte with invalid byte
     574      {1, 5, 0, 0, '\xFF', 0},
     575      {3, 5, 1, 1, '\xFF', 1},
     576      {6, 5, 3, 2, '\xFF', 3},
     577      {10, 5, 6, 3, '\xFF', 6},
     578  
     579      // replace first trailing byte with ASCII byte
     580      {3, 5, 1, 1, 'z', 2},
     581      {6, 5, 3, 2, 'z', 4},
     582      {10, 5, 6, 3, 'z', 7},
     583  
     584      // replace first trailing byte with invalid byte
     585      {3, 5, 1, 1, '\xFF', 2},
     586      {6, 5, 3, 2, '\xFF', 4},
     587      {10, 5, 6, 3, '\xFF', 7},
     588  
     589      // replace second trailing byte with ASCII byte
     590      {6, 5, 3, 2, 'z', 5},
     591      {10, 5, 6, 3, 'z', 8},
     592  
     593      // replace second trailing byte with invalid byte
     594      {6, 5, 3, 2, '\xFF', 5},
     595      {10, 5, 6, 3, '\xFF', 8},
     596  
     597      // replace third trailing byte
     598      {10, 5, 6, 3, 'z', 9},
     599      {10, 5, 6, 3, '\xFF', 9},
     600  
     601      // replace first trailing byte with ASCII byte, also incomplete at end
     602      {5, 5, 3, 2, 'z', 4},
     603      {8, 5, 6, 3, 'z', 7},
     604      {9, 5, 6, 3, 'z', 7},
     605  
     606      // replace first trailing byte with invalid byte, also incomplete at end
     607      {5, 5, 3, 2, '\xFF', 4},
     608      {8, 5, 6, 3, '\xFF', 7},
     609      {9, 5, 6, 3, '\xFF', 7},
     610  
     611      // replace second trailing byte with ASCII byte, also incomplete at end
     612      {9, 5, 6, 3, 'z', 8},
     613  
     614      // replace second trailing byte with invalid byte, also incomplete at end
     615      {9, 5, 6, 3, '\xFF', 8},
     616    };
     617    for (auto t : offsets)
     618      {
     619        char in[array_size (valid_in)] = {};
     620        CharT out[array_size (exp) - 1] = {};
     621        VERIFY (t.in_size <= array_size (in));
     622        VERIFY (t.out_size <= array_size (out));
     623        VERIFY (t.expected_in_next <= t.in_size);
     624        VERIFY (t.expected_out_next <= t.out_size);
     625        char_traits<char>::copy (in, valid_in, array_size (valid_in));
     626        in[t.replace_pos] = t.replace_char;
     627  
     628        auto state = mbstate_t{};
     629        auto in_next = (const char *) nullptr;
     630        auto out_next = (CharT *) nullptr;
     631        auto res = codecvt_base::result ();
     632  
     633        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     634  		    out_next);
     635        VERIFY (res == cvt.error);
     636        VERIFY (in_next == in + t.expected_in_next);
     637        VERIFY (out_next == out + t.expected_out_next);
     638        VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
     639        if (t.expected_out_next < array_size (out))
     640  	VERIFY (out[t.expected_out_next] == 0);
     641      }
     642  }
     643  
     644  template <class CharT>
     645  void
     646  utf8_to_utf16_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
     647  {
     648    utf8_to_utf16_in_ok (cvt);
     649    utf8_to_utf16_in_partial (cvt);
     650    utf8_to_utf16_in_error (cvt);
     651  }
     652  
     653  template <class CharT>
     654  void
     655  utf16_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
     656  {
     657    using namespace std;
     658    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     659    const char16_t in_literal[] = u"\uAAAA\U0010AAAA";
     660    const char exp[] = "\uAAAA\U0010AAAA";
     661    CharT in[array_size (in_literal)];
     662    copy (begin (in_literal), end (in_literal), begin (in));
     663  
     664    static_assert (array_size (in_literal) == 6, "");
     665    static_assert (array_size (exp) == 11, "");
     666    static_assert (array_size (in) == 6, "");
     667    VERIFY (char_traits<char16_t>::length (in_literal) == 5);
     668    VERIFY (char_traits<char>::length (exp) == 10);
     669    VERIFY (char_traits<CharT>::length (in) == 5);
     670  
     671    const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
     672    for (auto t : offsets)
     673      {
     674        char out[array_size (exp) - 1] = {};
     675        VERIFY (t.in_size <= array_size (in));
     676        VERIFY (t.out_size <= array_size (out));
     677        auto state = mbstate_t{};
     678        auto in_next = (const CharT *) nullptr;
     679        auto out_next = (char *) nullptr;
     680        auto res = codecvt_base::result ();
     681  
     682        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
     683  		     out_next);
     684        VERIFY (res == cvt.ok);
     685        VERIFY (in_next == in + t.in_size);
     686        VERIFY (out_next == out + t.out_size);
     687        VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
     688        if (t.out_size < array_size (out))
     689  	VERIFY (out[t.out_size] == 0);
     690      }
     691  }
     692  
     693  template <class CharT>
     694  void
     695  utf16_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
     696  {
     697    using namespace std;
     698    // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
     699    const char16_t in_literal[] = u"\uAAAA\U0010AAAA";
     700    const char exp[] = "\uAAAA\U0010AAAA";
     701    CharT in[array_size (in_literal)];
     702    copy (begin (in_literal), end (in_literal), begin (in));
     703  
     704    static_assert (array_size (in_literal) == 6, "");
     705    static_assert (array_size (exp) == 11, "");
     706    static_assert (array_size (in) == 6, "");
     707    VERIFY (char_traits<char16_t>::length (in_literal) == 5);
     708    VERIFY (char_traits<char>::length (exp) == 10);
     709    VERIFY (char_traits<CharT>::length (in) == 5);
     710  
     711    const test_offsets_partial offsets[] = {
     712      {1, 0, 0, 0}, // no space for first CP
     713  
     714      {2, 1, 1, 1}, // no space for second CP
     715      {2, 2, 1, 1}, // no space for second CP
     716  
     717      {3, 3, 2, 3}, // no space for third CP
     718      {3, 4, 2, 3}, // no space for third CP
     719      {3, 5, 2, 3}, // no space for third CP
     720  
     721      {5, 6, 3, 6}, // no space for fourth CP
     722      {5, 7, 3, 6}, // no space for fourth CP
     723      {5, 8, 3, 6}, // no space for fourth CP
     724      {5, 9, 3, 6}, // no space for fourth CP
     725  
     726      {4, 10, 3, 6}, // incomplete fourth CP
     727  
     728      {4, 6, 3, 6}, // incomplete fourth CP, and no space for it
     729      {4, 7, 3, 6}, // incomplete fourth CP, and no space for it
     730      {4, 8, 3, 6}, // incomplete fourth CP, and no space for it
     731      {4, 9, 3, 6}, // incomplete fourth CP, and no space for it
     732    };
     733    for (auto t : offsets)
     734      {
     735        char out[array_size (exp) - 1] = {};
     736        VERIFY (t.in_size <= array_size (in));
     737        VERIFY (t.out_size <= array_size (out));
     738        VERIFY (t.expected_in_next <= t.in_size);
     739        VERIFY (t.expected_out_next <= t.out_size);
     740        auto state = mbstate_t{};
     741        auto in_next = (const CharT *) nullptr;
     742        auto out_next = (char *) nullptr;
     743        auto res = codecvt_base::result ();
     744  
     745        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
     746  		     out_next);
     747        VERIFY (res == cvt.partial);
     748        VERIFY (in_next == in + t.expected_in_next);
     749        VERIFY (out_next == out + t.expected_out_next);
     750        VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
     751        if (t.expected_out_next < array_size (out))
     752  	VERIFY (out[t.expected_out_next] == 0);
     753      }
     754  }
     755  
     756  template <class CharT>
     757  void
     758  utf16_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     759  {
     760    using namespace std;
     761    const char16_t valid_in[] = u"\uAAAA\U0010AAAA";
     762    const char exp[] = "\uAAAA\U0010AAAA";
     763  
     764    static_assert (array_size (valid_in) == 6, "");
     765    static_assert (array_size (exp) == 11, "");
     766    VERIFY (char_traits<char16_t>::length (valid_in) == 5);
     767    VERIFY (char_traits<char>::length (exp) == 10);
     768  
     769    test_offsets_error<CharT> offsets[] = {
     770      {5, 10, 0, 0, 0xD800, 0},
     771      {5, 10, 0, 0, 0xDBFF, 0},
     772      {5, 10, 0, 0, 0xDC00, 0},
     773      {5, 10, 0, 0, 0xDFFF, 0},
     774  
     775      {5, 10, 1, 1, 0xD800, 1},
     776      {5, 10, 1, 1, 0xDBFF, 1},
     777      {5, 10, 1, 1, 0xDC00, 1},
     778      {5, 10, 1, 1, 0xDFFF, 1},
     779  
     780      {5, 10, 2, 3, 0xD800, 2},
     781      {5, 10, 2, 3, 0xDBFF, 2},
     782      {5, 10, 2, 3, 0xDC00, 2},
     783      {5, 10, 2, 3, 0xDFFF, 2},
     784  
     785      // make the leading surrogate a trailing one
     786      {5, 10, 3, 6, 0xDC00, 3},
     787      {5, 10, 3, 6, 0xDFFF, 3},
     788  
     789      // make the trailing surrogate a leading one
     790      {5, 10, 3, 6, 0xD800, 4},
     791      {5, 10, 3, 6, 0xDBFF, 4},
     792  
     793      // make the trailing surrogate a BMP char
     794      {5, 10, 3, 6, u'z', 4},
     795    };
     796  
     797    for (auto t : offsets)
     798      {
     799        CharT in[array_size (valid_in)] = {};
     800        char out[array_size (exp) - 1] = {};
     801        VERIFY (t.in_size <= array_size (in));
     802        VERIFY (t.out_size <= array_size (out));
     803        VERIFY (t.expected_in_next <= t.in_size);
     804        VERIFY (t.expected_out_next <= t.out_size);
     805        copy (begin (valid_in), end (valid_in), begin (in));
     806        in[t.replace_pos] = t.replace_char;
     807  
     808        auto state = mbstate_t{};
     809        auto in_next = (const CharT *) nullptr;
     810        auto out_next = (char *) nullptr;
     811        auto res = codecvt_base::result ();
     812  
     813        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
     814  		     out_next);
     815        VERIFY (res == cvt.error);
     816        VERIFY (in_next == in + t.expected_in_next);
     817        VERIFY (out_next == out + t.expected_out_next);
     818        VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
     819        if (t.expected_out_next < array_size (out))
     820  	VERIFY (out[t.expected_out_next] == 0);
     821      }
     822  }
     823  
     824  template <class CharT>
     825  void
     826  utf16_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
     827  {
     828    utf16_to_utf8_out_ok (cvt);
     829    utf16_to_utf8_out_partial (cvt);
     830    utf16_to_utf8_out_error (cvt);
     831  }
     832  
     833  template <class CharT>
     834  void
     835  test_utf8_utf16_cvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
     836  {
     837    utf8_to_utf16_in (cvt);
     838    utf16_to_utf8_out (cvt);
     839  }
     840  
     841  template <class CharT>
     842  void
     843  utf8_to_ucs2_in_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
     844  {
     845    using namespace std;
     846    // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
     847    const char in[] = "\uAAAA";
     848    const char16_t exp_literal[] = u"\uAAAA";
     849    CharT exp[array_size (exp_literal)] = {};
     850    copy (begin (exp_literal), end (exp_literal), begin (exp));
     851  
     852    static_assert (array_size (in) == 7, "");
     853    static_assert (array_size (exp_literal) == 4, "");
     854    static_assert (array_size (exp) == 4, "");
     855    VERIFY (char_traits<char>::length (in) == 6);
     856    VERIFY (char_traits<char16_t>::length (exp_literal) == 3);
     857    VERIFY (char_traits<CharT>::length (exp) == 3);
     858  
     859    test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
     860    for (auto t : offsets)
     861      {
     862        CharT out[array_size (exp) - 1] = {};
     863        VERIFY (t.in_size <= array_size (in));
     864        VERIFY (t.out_size <= array_size (out));
     865        auto state = mbstate_t{};
     866        auto in_next = (const char *) nullptr;
     867        auto out_next = (CharT *) nullptr;
     868        auto res = codecvt_base::result ();
     869  
     870        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     871  		    out_next);
     872        VERIFY (res == cvt.ok);
     873        VERIFY (in_next == in + t.in_size);
     874        VERIFY (out_next == out + t.out_size);
     875        VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
     876        if (t.out_size < array_size (out))
     877  	VERIFY (out[t.out_size] == 0);
     878      }
     879  
     880    for (auto t : offsets)
     881      {
     882        CharT out[array_size (exp)] = {};
     883        VERIFY (t.in_size <= array_size (in));
     884        VERIFY (t.out_size <= array_size (out));
     885        auto state = mbstate_t{};
     886        auto in_next = (const char *) nullptr;
     887        auto out_next = (CharT *) nullptr;
     888        auto res = codecvt_base::result ();
     889  
     890        res
     891  	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
     892        VERIFY (res == cvt.ok);
     893        VERIFY (in_next == in + t.in_size);
     894        VERIFY (out_next == out + t.out_size);
     895        VERIFY (char_traits<CharT>::compare (out, exp, t.out_size) == 0);
     896        if (t.out_size < array_size (out))
     897  	VERIFY (out[t.out_size] == 0);
     898      }
     899  }
     900  
     901  template <class CharT>
     902  void
     903  utf8_to_ucs2_in_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
     904  {
     905    using namespace std;
     906    // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
     907    const char in[] = "\uAAAA";
     908    const char16_t exp_literal[] = u"\uAAAA";
     909    CharT exp[array_size (exp_literal)] = {};
     910    copy (begin (exp_literal), end (exp_literal), begin (exp));
     911  
     912    static_assert (array_size (in) == 7, "");
     913    static_assert (array_size (exp_literal) == 4, "");
     914    static_assert (array_size (exp) == 4, "");
     915    VERIFY (char_traits<char>::length (in) == 6);
     916    VERIFY (char_traits<char16_t>::length (exp_literal) == 3);
     917    VERIFY (char_traits<CharT>::length (exp) == 3);
     918  
     919    test_offsets_partial offsets[] = {
     920      {1, 0, 0, 0}, // no space for first CP
     921  
     922      {3, 1, 1, 1}, // no space for second CP
     923      {2, 2, 1, 1}, // incomplete second CP
     924      {2, 1, 1, 1}, // incomplete second CP, and no space for it
     925  
     926      {6, 2, 3, 2}, // no space for third CP
     927      {4, 3, 3, 2}, // incomplete third CP
     928      {5, 3, 3, 2}, // incomplete third CP
     929      {4, 2, 3, 2}, // incomplete third CP, and no space for it
     930      {5, 2, 3, 2}, // incomplete third CP, and no space for it
     931    };
     932  
     933    for (auto t : offsets)
     934      {
     935        CharT out[array_size (exp) - 1] = {};
     936        VERIFY (t.in_size <= array_size (in));
     937        VERIFY (t.out_size <= array_size (out));
     938        VERIFY (t.expected_in_next <= t.in_size);
     939        VERIFY (t.expected_out_next <= t.out_size);
     940        auto state = mbstate_t{};
     941        auto in_next = (const char *) nullptr;
     942        auto out_next = (CharT *) nullptr;
     943        auto res = codecvt_base::result ();
     944  
     945        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
     946  		    out_next);
     947        VERIFY (res == cvt.partial);
     948        VERIFY (in_next == in + t.expected_in_next);
     949        VERIFY (out_next == out + t.expected_out_next);
     950        VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
     951        if (t.expected_out_next < array_size (out))
     952  	VERIFY (out[t.expected_out_next] == 0);
     953      }
     954  }
     955  
     956  template <class CharT>
     957  void
     958  utf8_to_ucs2_in_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
     959  {
     960    using namespace std;
     961    const char valid_in[] = "\uAAAA\U0010AAAA";
     962    const char16_t exp_literal[] = u"\uAAAA\U0010AAAA";
     963    CharT exp[array_size (exp_literal)] = {};
     964    copy (begin (exp_literal), end (exp_literal), begin (exp));
     965  
     966    static_assert (array_size (valid_in) == 11, "");
     967    static_assert (array_size (exp_literal) == 6, "");
     968    static_assert (array_size (exp) == 6, "");
     969    VERIFY (char_traits<char>::length (valid_in) == 10);
     970    VERIFY (char_traits<char16_t>::length (exp_literal) == 5);
     971    VERIFY (char_traits<CharT>::length (exp) == 5);
     972  
     973    test_offsets_error<char> offsets[] = {
     974  
     975      // replace leading byte with invalid byte
     976      {1, 5, 0, 0, '\xFF', 0},
     977      {3, 5, 1, 1, '\xFF', 1},
     978      {6, 5, 3, 2, '\xFF', 3},
     979      {10, 5, 6, 3, '\xFF', 6},
     980  
     981      // replace first trailing byte with ASCII byte
     982      {3, 5, 1, 1, 'z', 2},
     983      {6, 5, 3, 2, 'z', 4},
     984      {10, 5, 6, 3, 'z', 7},
     985  
     986      // replace first trailing byte with invalid byte
     987      {3, 5, 1, 1, '\xFF', 2},
     988      {6, 5, 3, 2, '\xFF', 4},
     989      {10, 5, 6, 3, '\xFF', 7},
     990  
     991      // replace second trailing byte with ASCII byte
     992      {6, 5, 3, 2, 'z', 5},
     993      {10, 5, 6, 3, 'z', 8},
     994  
     995      // replace second trailing byte with invalid byte
     996      {6, 5, 3, 2, '\xFF', 5},
     997      {10, 5, 6, 3, '\xFF', 8},
     998  
     999      // replace third trailing byte
    1000      {10, 5, 6, 3, 'z', 9},
    1001      {10, 5, 6, 3, '\xFF', 9},
    1002  
    1003      // When we see a leading byte of 4-byte CP, we should return error, no
    1004      // matter if it is incomplete at the end or has errors in the trailing
    1005      // bytes.
    1006  
    1007      // Don't replace anything, show full 4-byte CP
    1008      {10, 4, 6, 3, 'b', 0},
    1009      {10, 5, 6, 3, 'b', 0},
    1010  
    1011      // Don't replace anything, show incomplete 4-byte CP at the end
    1012      {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
    1013      {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
    1014      {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
    1015      {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
    1016      {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
    1017      {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
    1018  
    1019      // replace first trailing byte with ASCII byte, also incomplete at end
    1020      {5, 5, 3, 2, 'z', 4},
    1021  
    1022      // replace first trailing byte with invalid byte, also incomplete at end
    1023      {5, 5, 3, 2, '\xFF', 4},
    1024  
    1025      // replace first trailing byte with ASCII byte, also incomplete at end
    1026      {8, 5, 6, 3, 'z', 7},
    1027      {9, 5, 6, 3, 'z', 7},
    1028  
    1029      // replace first trailing byte with invalid byte, also incomplete at end
    1030      {8, 5, 6, 3, '\xFF', 7},
    1031      {9, 5, 6, 3, '\xFF', 7},
    1032  
    1033      // replace second trailing byte with ASCII byte, also incomplete at end
    1034      {9, 5, 6, 3, 'z', 8},
    1035  
    1036      // replace second trailing byte with invalid byte, also incomplete at end
    1037      {9, 5, 6, 3, '\xFF', 8},
    1038    };
    1039    for (auto t : offsets)
    1040      {
    1041        char in[array_size (valid_in)] = {};
    1042        CharT out[array_size (exp) - 1] = {};
    1043        VERIFY (t.in_size <= array_size (in));
    1044        VERIFY (t.out_size <= array_size (out));
    1045        VERIFY (t.expected_in_next <= t.in_size);
    1046        VERIFY (t.expected_out_next <= t.out_size);
    1047        char_traits<char>::copy (in, valid_in, array_size (valid_in));
    1048        in[t.replace_pos] = t.replace_char;
    1049  
    1050        auto state = mbstate_t{};
    1051        auto in_next = (const char *) nullptr;
    1052        auto out_next = (CharT *) nullptr;
    1053        auto res = codecvt_base::result ();
    1054  
    1055        res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
    1056  		    out_next);
    1057        VERIFY (res == cvt.error);
    1058        VERIFY (in_next == in + t.expected_in_next);
    1059        VERIFY (out_next == out + t.expected_out_next);
    1060        VERIFY (char_traits<CharT>::compare (out, exp, t.expected_out_next) == 0);
    1061        if (t.expected_out_next < array_size (out))
    1062  	VERIFY (out[t.expected_out_next] == 0);
    1063      }
    1064  }
    1065  
    1066  template <class CharT>
    1067  void
    1068  utf8_to_ucs2_in (const std::codecvt<CharT, char, mbstate_t> &cvt)
    1069  {
    1070    utf8_to_ucs2_in_ok (cvt);
    1071    utf8_to_ucs2_in_partial (cvt);
    1072    utf8_to_ucs2_in_error (cvt);
    1073  }
    1074  
    1075  template <class CharT>
    1076  void
    1077  ucs2_to_utf8_out_ok (const std::codecvt<CharT, char, mbstate_t> &cvt)
    1078  {
    1079    using namespace std;
    1080    // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
    1081    const char16_t in_literal[] = u"\uAAAA";
    1082    const char exp[] = "\uAAAA";
    1083    CharT in[array_size (in_literal)] = {};
    1084    copy (begin (in_literal), end (in_literal), begin (in));
    1085  
    1086    static_assert (array_size (in_literal) == 4, "");
    1087    static_assert (array_size (exp) == 7, "");
    1088    static_assert (array_size (in) == 4, "");
    1089    VERIFY (char_traits<char16_t>::length (in_literal) == 3);
    1090    VERIFY (char_traits<char>::length (exp) == 6);
    1091    VERIFY (char_traits<CharT>::length (in) == 3);
    1092  
    1093    const test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
    1094    for (auto t : offsets)
    1095      {
    1096        char out[array_size (exp) - 1] = {};
    1097        VERIFY (t.in_size <= array_size (in));
    1098        VERIFY (t.out_size <= array_size (out));
    1099        auto state = mbstate_t{};
    1100        auto in_next = (const CharT *) nullptr;
    1101        auto out_next = (char *) nullptr;
    1102        auto res = codecvt_base::result ();
    1103  
    1104        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
    1105  		     out_next);
    1106        VERIFY (res == cvt.ok);
    1107        VERIFY (in_next == in + t.in_size);
    1108        VERIFY (out_next == out + t.out_size);
    1109        VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
    1110        if (t.out_size < array_size (out))
    1111  	VERIFY (out[t.out_size] == 0);
    1112      }
    1113  }
    1114  
    1115  template <class CharT>
    1116  void
    1117  ucs2_to_utf8_out_partial (const std::codecvt<CharT, char, mbstate_t> &cvt)
    1118  {
    1119    using namespace std;
    1120    // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
    1121    const char16_t in_literal[] = u"\uAAAA";
    1122    const char exp[] = "\uAAAA";
    1123    CharT in[array_size (in_literal)] = {};
    1124    copy (begin (in_literal), end (in_literal), begin (in));
    1125  
    1126    static_assert (array_size (in_literal) == 4, "");
    1127    static_assert (array_size (exp) == 7, "");
    1128    static_assert (array_size (in) == 4, "");
    1129    VERIFY (char_traits<char16_t>::length (in_literal) == 3);
    1130    VERIFY (char_traits<char>::length (exp) == 6);
    1131    VERIFY (char_traits<CharT>::length (in) == 3);
    1132  
    1133    const test_offsets_partial offsets[] = {
    1134      {1, 0, 0, 0}, // no space for first CP
    1135  
    1136      {2, 1, 1, 1}, // no space for second CP
    1137      {2, 2, 1, 1}, // no space for second CP
    1138  
    1139      {3, 3, 2, 3}, // no space for third CP
    1140      {3, 4, 2, 3}, // no space for third CP
    1141      {3, 5, 2, 3}, // no space for third CP
    1142    };
    1143    for (auto t : offsets)
    1144      {
    1145        char out[array_size (exp) - 1] = {};
    1146        VERIFY (t.in_size <= array_size (in));
    1147        VERIFY (t.out_size <= array_size (out));
    1148        VERIFY (t.expected_in_next <= t.in_size);
    1149        VERIFY (t.expected_out_next <= t.out_size);
    1150        auto state = mbstate_t{};
    1151        auto in_next = (const CharT *) nullptr;
    1152        auto out_next = (char *) nullptr;
    1153        auto res = codecvt_base::result ();
    1154  
    1155        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
    1156  		     out_next);
    1157        VERIFY (res == cvt.partial);
    1158        VERIFY (in_next == in + t.expected_in_next);
    1159        VERIFY (out_next == out + t.expected_out_next);
    1160        VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
    1161        if (t.expected_out_next < array_size (out))
    1162  	VERIFY (out[t.expected_out_next] == 0);
    1163      }
    1164  }
    1165  
    1166  template <class CharT>
    1167  void
    1168  ucs2_to_utf8_out_error (const std::codecvt<CharT, char, mbstate_t> &cvt)
    1169  {
    1170    using namespace std;
    1171    const char16_t valid_in[] = u"\uAAAA\U0010AAAA";
    1172    const char exp[] = "\uAAAA\U0010AAAA";
    1173  
    1174    static_assert (array_size (valid_in) == 6, "");
    1175    static_assert (array_size (exp) == 11, "");
    1176    VERIFY (char_traits<char16_t>::length (valid_in) == 5);
    1177    VERIFY (char_traits<char>::length (exp) == 10);
    1178  
    1179    test_offsets_error<CharT> offsets[] = {
    1180      {5, 10, 0, 0, 0xD800, 0},
    1181      {5, 10, 0, 0, 0xDBFF, 0},
    1182      {5, 10, 0, 0, 0xDC00, 0},
    1183      {5, 10, 0, 0, 0xDFFF, 0},
    1184  
    1185      {5, 10, 1, 1, 0xD800, 1},
    1186      {5, 10, 1, 1, 0xDBFF, 1},
    1187      {5, 10, 1, 1, 0xDC00, 1},
    1188      {5, 10, 1, 1, 0xDFFF, 1},
    1189  
    1190      {5, 10, 2, 3, 0xD800, 2},
    1191      {5, 10, 2, 3, 0xDBFF, 2},
    1192      {5, 10, 2, 3, 0xDC00, 2},
    1193      {5, 10, 2, 3, 0xDFFF, 2},
    1194  
    1195      // dont replace anything, just show the surrogate pair
    1196      {5, 10, 3, 6, u'b', 0},
    1197  
    1198      // make the leading surrogate a trailing one
    1199      {5, 10, 3, 6, 0xDC00, 3},
    1200      {5, 10, 3, 6, 0xDFFF, 3},
    1201  
    1202      // make the trailing surrogate a leading one
    1203      {5, 10, 3, 6, 0xD800, 4},
    1204      {5, 10, 3, 6, 0xDBFF, 4},
    1205  
    1206      // make the trailing surrogate a BMP char
    1207      {5, 10, 3, 6, u'z', 4},
    1208  
    1209      {5, 7, 3, 6, u'b', 0}, // no space for fourth CP
    1210      {5, 8, 3, 6, u'b', 0}, // no space for fourth CP
    1211      {5, 9, 3, 6, u'b', 0}, // no space for fourth CP
    1212  
    1213      {4, 10, 3, 6, u'b', 0}, // incomplete fourth CP
    1214      {4, 7, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
    1215      {4, 8, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
    1216      {4, 9, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
    1217  
    1218    };
    1219  
    1220    for (auto t : offsets)
    1221      {
    1222        CharT in[array_size (valid_in)] = {};
    1223        char out[array_size (exp) - 1] = {};
    1224        VERIFY (t.in_size <= array_size (in));
    1225        VERIFY (t.out_size <= array_size (out));
    1226        VERIFY (t.expected_in_next <= t.in_size);
    1227        VERIFY (t.expected_out_next <= t.out_size);
    1228        copy (begin (valid_in), end (valid_in), begin (in));
    1229        in[t.replace_pos] = t.replace_char;
    1230  
    1231        auto state = mbstate_t{};
    1232        auto in_next = (const CharT *) nullptr;
    1233        auto out_next = (char *) nullptr;
    1234        auto res = codecvt_base::result ();
    1235  
    1236        res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
    1237  		     out_next);
    1238        VERIFY (res == cvt.error);
    1239        VERIFY (in_next == in + t.expected_in_next);
    1240        VERIFY (out_next == out + t.expected_out_next);
    1241        VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
    1242        if (t.expected_out_next < array_size (out))
    1243  	VERIFY (out[t.expected_out_next] == 0);
    1244      }
    1245  }
    1246  
    1247  template <class CharT>
    1248  void
    1249  ucs2_to_utf8_out (const std::codecvt<CharT, char, mbstate_t> &cvt)
    1250  {
    1251    ucs2_to_utf8_out_ok (cvt);
    1252    ucs2_to_utf8_out_partial (cvt);
    1253    ucs2_to_utf8_out_error (cvt);
    1254  }
    1255  
    1256  template <class CharT>
    1257  void
    1258  test_utf8_ucs2_cvts (const std::codecvt<CharT, char, mbstate_t> &cvt)
    1259  {
    1260    utf8_to_ucs2_in (cvt);
    1261    ucs2_to_utf8_out (cvt);
    1262  }