#!/bin/sh

# Test GNU extension "\u" and "\U" (uppercase conversion)
# in "s///" command.
# This is an adaptation of the old utf8-1/2/3/4 tests.

# Copyright (C) 2017-2022 Free Software Foundation, Inc.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
print_ver_ sed

require_ru_utf8_locale_

# The letter used in these tests are:
#      UTF8:Octal  UTF8:HEX   CodePoint Name
#  А   \320\220    \xD0\x90   U+0410   \N{CYRILLIC CAPITAL LETTER A}
#  Д   \320\224    \xD0\x94   U+0414   \N{CYRILLIC CAPITAL LETTER DE}
#  а   \320\260    \xD0\xB0   U+0430   \N{CYRILLIC SMALL LETTER A}
#  д   \320\264    \xD0\xB4   U+0434   \N{CYRILLIC SMALL LETTER DE}

# Using octal values, as these are the most portable access various printfs.


# Input: Same input for all test (all lower case letters)
#       д       а        д
printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_


# Test 1: Convert "small DE" to upper case (with \U)
#       s/д/\U&/g
printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_

# Test 1: Expected output - two capital DE letters.
#       Д       а        Д
printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_


# Test 2: Convert "small DE" to upper case (with \u - next character only)
#       s/д/\u&/g
printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_

# The expected output of test 2 is identical to test 1.
# We create the file to make the test loop (below) simpler.
cp utf8-1-exp utf8-2-exp || framework_failure_



# Test 3: Capitalize only the next character (\u)
# Only the first "DE" should be capitilized.
#       s/д.*/\u&/g
printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_

# Test 3: Expected output - First DE capitilized, second DE not.
#       Д       а        д
printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_


# Test 4: Capitalize all matched characters
#       s/д.*/\U&/g
printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_


# Test 4: Expected output - All capital letters:
#       Д       А        Д
printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_

# Step 1: force Russian UTF8 locale.
# The case-conversion should either work, or not modify the input.
for i in 1 2 3 4;
do
    LC_ALL=ru_RU.UTF-8 \
          sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1

    remove_cr_inplace utf8-$i-ru-out

    # If we have the expected output - continue to next text
    compare utf8-$i-exp utf8-$i-ru-out && continue

    # Otherwise, ensure the input wasn't modified
    # (i.e. sed did not modify partial octets resulting in
    #  invalid multibyte sequences)
    compare utf8-$i-inp utf8-$i-ru-out || fail=1
done


# Step 2: If the current locale supports UTF8, repeat the above tests.
l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//')
case "$n" in
    *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;;
    *) utf8=no;;
esac

if test "$utf8" = yes ; then
    for i in 1 2 3 4;
    do
        sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1

        remove_cr_inplace utf8-$i-out

        # If we have the expected output - continue to next text
        compare utf8-$i-exp utf8-$i-out && continue

        # Otherwise, ensure the input wasn't modified
        # (i.e. sed did not modify partial octets resulting in
        #  invalid multibyte sequences)
        compare utf8-$i-inp utf8-$i-out || fail=1
    done
fi


Exit $fail