(root)/
gcc-13.2.0/
gcc/
d/
dmd/
chkformat.d
/**
 * Check the arguments to `printf` and `scanf` against the `format` string.
 *
 * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
 * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
 * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
 * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
 * Documentation:  https://dlang.org/phobos/dmd_chkformat.html
 * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
 */
module dmd.chkformat;

//import core.stdc.stdio : printf, scanf;
import core.stdc.ctype : isdigit;

import dmd.astenums;
import dmd.cond;
import dmd.errors;
import dmd.expression;
import dmd.globals;
import dmd.identifier;
import dmd.location;
import dmd.mtype;
import dmd.target;


/******************************************
 * Check that arguments to a printf format string are compatible
 * with that string. Issue errors for incompatibilities.
 *
 * Follows the C99 specification for printf.
 *
 * Takes a generous, rather than strict, view of compatiblity.
 * For example, an unsigned value can be formatted with a signed specifier.
 *
 * Diagnosed incompatibilities are:
 *
 * 1. incompatible sizes which will cause argument misalignment
 * 2. deferencing arguments that are not pointers
 * 3. insufficient number of arguments
 * 4. struct arguments
 * 5. array and slice arguments
 * 6. non-pointer arguments to `s` specifier
 * 7. non-standard formats
 * 8. undefined behavior per C99
 *
 * Per the C Standard, extra arguments are ignored.
 *
 * No attempt is made to fix the arguments or the format string.
 *
 * Params:
 *      loc = location for error messages
 *      format = format string
 *      args = arguments to match with format string
 *      isVa_list = if a "v" function (format check only)
 *
 * Returns:
 *      `true` if errors occurred
 * References:
 * C99 7.19.6.1
 * https://www.cplusplus.com/reference/cstdio/printf/
 */
bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
{
    //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
    size_t n;    // index in args
    for (size_t i = 0; i < format.length;)
    {
        if (format[i] != '%')
        {
            ++i;
            continue;
        }
        bool widthStar;
        bool precisionStar;
        size_t j = i;
        const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
        const slice = format[i .. j];
        i = j;

        if (fmt == Format.percent)
            continue;                   // "%%", no arguments
        if (fmt == Format.GNU_m)
            continue;                   // "%m", no arguments

        if (isVa_list)
        {
            // format check only
            if (fmt == Format.error)
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
            continue;
        }

        Expression getNextArg(ref bool skip)
        {
            if (n == args.length)
            {
                if (args.length < (n + 1))
                    deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
                else
                    skip = true;
                return null;
            }
            return args[n++];
        }

        void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
        {
            deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
                  prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
        }

        if (widthStar)
        {
            bool skip;
            auto e = getNextArg(skip);
            if (skip)
                continue;
            if (!e)
                return true;
            auto t = e.type.toBasetype();
            if (t.ty != Tint32 && t.ty != Tuns32)
                errorMsg("width ", e, "int", t);
        }

        if (precisionStar)
        {
            bool skip;
            auto e = getNextArg(skip);
            if (skip)
                continue;
            if (!e)
                return true;
            auto t = e.type.toBasetype();
            if (t.ty != Tint32 && t.ty != Tuns32)
                errorMsg("precision ", e, "int", t);
        }

        bool skip;
        auto e = getNextArg(skip);
        if (skip)
            continue;
        if (!e)
            return true;
        auto t = e.type.toBasetype();
        auto tnext = t.nextOf();
        const c_longsize = target.c.longsize;
        const ptrsize = target.ptrsize;

        // Types which are promoted to int are allowed.
        // Spec: C99 6.5.2.2.7
        final switch (fmt)
        {
            case Format.u:      // unsigned int
            case Format.d:      // int
                if (t.ty != Tint32 && t.ty != Tuns32)
                    errorMsg(null, e, fmt == Format.u ? "uint" : "int", t);
                break;

            case Format.hhu:    // unsigned char
            case Format.hhd:    // signed char
                if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
                    errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t);
                break;

            case Format.hu:     // unsigned short int
            case Format.hd:     // short int
                if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
                    errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t);
                break;

            case Format.lu:     // unsigned long int
            case Format.ld:     // long int
                if (!(t.isintegral() && t.size() == c_longsize))
                {
                    if (fmt == Format.lu)
                        errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t);
                    else
                        errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t);
                }
                break;

            case Format.llu:    // unsigned long long int
            case Format.lld:    // long long int
                if (t.ty != Tint64 && t.ty != Tuns64)
                    errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t);
                break;

            case Format.ju:     // uintmax_t
            case Format.jd:     // intmax_t
                if (t.ty != Tint64 && t.ty != Tuns64)
                {
                    if (fmt == Format.ju)
                        errorMsg(null, e, "core.stdc.stdint.uintmax_t", t);
                    else
                        errorMsg(null, e, "core.stdc.stdint.intmax_t", t);
                }
                break;

            case Format.zd:     // size_t
                if (!(t.isintegral() && t.size() == ptrsize))
                    errorMsg(null, e, "size_t", t);
                break;

            case Format.td:     // ptrdiff_t
                if (!(t.isintegral() && t.size() == ptrsize))
                    errorMsg(null, e, "ptrdiff_t", t);
                break;

            case Format.lg:
            case Format.g:      // double
                if (t.ty != Tfloat64 && t.ty != Timaginary64)
                    errorMsg(null, e, "double", t);
                break;

            case Format.Lg:     // long double
                if (t.ty != Tfloat80 && t.ty != Timaginary80)
                    errorMsg(null, e, "real", t);
                break;

            case Format.p:      // pointer
                if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
                    errorMsg(null, e, "void*", t);
                break;

            case Format.n:      // pointer to int
                if (!(t.ty == Tpointer && tnext.ty == Tint32))
                    errorMsg(null, e, "int*", t);
                break;

            case Format.ln:     // pointer to long int
                if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
                    errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
                break;

            case Format.lln:    // pointer to long long int
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
                    errorMsg(null, e, "long*", t);
                break;

            case Format.hn:     // pointer to short
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
                    errorMsg(null, e, "short*", t);
                break;

            case Format.hhn:    // pointer to signed char
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
                    errorMsg(null, e, "byte*", t);
                break;

            case Format.jn:     // pointer to intmax_t
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
                    errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
                break;

            case Format.zn:     // pointer to size_t
                if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
                    errorMsg(null, e, "size_t*", t);
                break;

            case Format.tn:     // pointer to ptrdiff_t
                if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
                    errorMsg(null, e, "ptrdiff_t*", t);
                break;

            case Format.c:      // char
                if (t.ty != Tint32 && t.ty != Tuns32)
                    errorMsg(null, e, "char", t);
                break;

            case Format.lc:     // wint_t
                if (t.ty != Tint32 && t.ty != Tuns32)
                    errorMsg(null, e, "wchar_t", t);
                break;

            case Format.s:      // pointer to char string
                if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
                    errorMsg(null, e, "char*", t);
                break;

            case Format.ls:     // pointer to wchar_t string
                if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
                    errorMsg(null, e, "wchar_t*", t);
                break;

            case Format.error:
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
                break;

            case Format.GNU_m:
            case Format.POSIX_ms:
            case Format.POSIX_mls:
            case Format.percent:
                assert(0);
        }
    }
    return false;
}

/******************************************
 * Check that arguments to a scanf format string are compatible
 * with that string. Issue errors for incompatibilities.
 *
 * Follows the C99 specification for scanf.
 *
 * Takes a generous, rather than strict, view of compatiblity.
 * For example, an unsigned value can be formatted with a signed specifier.
 *
 * Diagnosed incompatibilities are:
 *
 * 1. incompatible sizes which will cause argument misalignment
 * 2. deferencing arguments that are not pointers
 * 3. insufficient number of arguments
 * 4. struct arguments
 * 5. array and slice arguments
 * 6. non-standard formats
 * 7. undefined behavior per C99
 *
 * Per the C Standard, extra arguments are ignored.
 *
 * No attempt is made to fix the arguments or the format string.
 *
 * Params:
 *      loc = location for error messages
 *      format = format string
 *      args = arguments to match with format string
 *      isVa_list = if a "v" function (format check only)
 *
 * Returns:
 *      `true` if errors occurred
 * References:
 * C99 7.19.6.2
 * https://www.cplusplus.com/reference/cstdio/scanf/
 */
bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
{
    size_t n = 0;
    for (size_t i = 0; i < format.length;)
    {
        if (format[i] != '%')
        {
            ++i;
            continue;
        }
        bool asterisk;
        size_t j = i;
        const fmt = parseScanfFormatSpecifier(format, j, asterisk);
        const slice = format[i .. j];
        i = j;

        if (fmt == Format.percent || asterisk)
            continue;   // "%%", "%*": no arguments

        if (isVa_list)
        {
            // format check only
            if (fmt == Format.error)
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
            continue;
        }

        Expression getNextArg()
        {
            if (n == args.length)
            {
                if (!asterisk)
                    deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
                return null;
            }
            return args[n++];
        }

        void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
        {
            deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
                  prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
        }

        auto e = getNextArg();
        if (!e)
            return true;

        auto t = e.type.toBasetype();
        auto tnext = t.nextOf();
        const c_longsize = target.c.longsize;
        const ptrsize = target.ptrsize;

        final switch (fmt)
        {
            case Format.n:
            case Format.d:      // pointer to int
                if (!(t.ty == Tpointer && tnext.ty == Tint32))
                    errorMsg(null, e, "int*", t);
                break;

            case Format.hhn:
            case Format.hhd:    // pointer to signed char
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
                    errorMsg(null, e, "byte*", t);
                break;

            case Format.hn:
            case Format.hd:     // pointer to short
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
                    errorMsg(null, e, "short*", t);
                break;

            case Format.ln:
            case Format.ld:     // pointer to long int
                if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize))
                    errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
                break;

            case Format.lln:
            case Format.lld:    // pointer to long long int
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
                    errorMsg(null, e, "long*", t);
                break;

            case Format.jn:
            case Format.jd:     // pointer to intmax_t
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
                    errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
                break;

            case Format.zn:
            case Format.zd:     // pointer to size_t
                if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
                    errorMsg(null, e, "size_t*", t);
                break;

            case Format.tn:
            case Format.td:     // pointer to ptrdiff_t
                if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
                    errorMsg(null, e, "ptrdiff_t*", t);
                break;

            case Format.u:      // pointer to unsigned int
                if (!(t.ty == Tpointer && tnext.ty == Tuns32))
                    errorMsg(null, e, "uint*", t);
                break;

            case Format.hhu:    // pointer to unsigned char
                if (!(t.ty == Tpointer && tnext.ty == Tuns8))
                    errorMsg(null, e, "ubyte*", t);
                break;

            case Format.hu:     // pointer to unsigned short int
                if (!(t.ty == Tpointer && tnext.ty == Tuns16))
                    errorMsg(null, e, "ushort*", t);
                break;

            case Format.lu:     // pointer to unsigned long int
                if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize))
                    errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
                break;

            case Format.llu:    // pointer to unsigned long long int
                if (!(t.ty == Tpointer && tnext.ty == Tuns64))
                    errorMsg(null, e, "ulong*", t);
                break;

            case Format.ju:     // pointer to uintmax_t
                if (!(t.ty == Tpointer && tnext.ty == Tuns64))
                    errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t);
                break;

            case Format.g:      // pointer to float
                if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
                    errorMsg(null, e, "float*", t);
                break;

            case Format.lg:     // pointer to double
                if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
                    errorMsg(null, e, "double*", t);
                break;

            case Format.Lg:     // pointer to long double
                if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
                    errorMsg(null, e, "real*", t);
                break;

            case Format.c:
            case Format.s:      // pointer to char string
                if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
                    errorMsg(null, e, "char*", t);
                break;

            case Format.lc:
            case Format.ls:     // pointer to wchar_t string
                if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
                    errorMsg(null, e, "wchar_t*", t);
                break;

            case Format.p:      // double pointer
                if (!(t.ty == Tpointer && tnext.ty == Tpointer))
                    errorMsg(null, e, "void**", t);
                break;

            case Format.POSIX_ms: // pointer to pointer to char string
                Type tnext2 = tnext ? tnext.nextOf() : null;
                if (!(t.ty == Tpointer && tnext.ty == Tpointer && (tnext2.ty == Tchar || tnext2.ty == Tint8 || tnext2.ty == Tuns8)))
                    errorMsg(null, e, "char**", t);
                break;

            case Format.POSIX_mls: // pointer to pointer to wchar_t string
                Type tnext2 = tnext ? tnext.nextOf() : null;
                if (!(t.ty == Tpointer && tnext.ty == Tpointer && tnext2.ty.isSomeChar && tnext2.size() == target.c.wchar_tsize))
                    errorMsg(null, e, "wchar_t**", t);
                break;

            case Format.error:
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
                break;

            case Format.GNU_m:
            case Format.percent:
                assert(0);
        }
    }
    return false;
}

private:

/**************************************
 * Parse the *format specifier* which is of the form:
 *
 * `%[*][width][length]specifier`
 *
 * Params:
 *      format = format string
 *      idx = index of `%` of start of format specifier,
 *          which gets updated to index past the end of it,
 *          even if `Format.error` is returned
 *      asterisk = set if there is a `*` sub-specifier
 * Returns:
 *      Format
 */
Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
        out bool asterisk) nothrow pure @safe
{
    auto i = idx;
    assert(format[i] == '%');
    const length = format.length;

    Format error()
    {
        idx = i;
        return Format.error;
    }

    ++i;
    if (i == length)
        return error();

    if (format[i] == '%')
    {
        idx = i + 1;
        return Format.percent;
    }

    // * sub-specifier
    if (format[i] == '*')
    {
        ++i;
        if (i == length)
            return error();
        asterisk = true;
    }

    // fieldWidth
    while (isdigit(format[i]))
    {
        i++;
        if (i == length)
            return error();
    }

    /* Read the specifier
     */
    Format specifier;
    Modifier flags = Modifier.none;
    switch (format[i])
    {
        case 'm':
            // https://pubs.opengroup.org/onlinepubs/9699919799/functions/scanf.html
            // POSIX.1-2017 C Extension (CX)
            flags = Modifier.m;
            ++i;
            if (i == length)
                return error();
            if (format[i] == 'l')
            {
                ++i;
                if (i == length)
                    return error();
                flags = Modifier.ml;
            }

            // Check valid conversion types for %m.
            if (format[i] == 'c' || format[i] == 's')
                specifier = flags == Modifier.ml ? Format.POSIX_mls :
                                                   Format.POSIX_ms;
            else if (format[i] == 'C' || format[i] == 'S')
                specifier = flags == Modifier.m ? Format.POSIX_mls :
                                                  Format.error;
            else if (format[i] == '[')
                goto case '[';
            else
                specifier = Format.error;
            ++i;
            break;

        case 'l':
            // Look for wchar_t scanset %l[..]
            immutable j = i + 1;
            if (j < length && format[j] == '[')
            {
                i = j;
                flags = Modifier.l;
                goto case '[';
            }
            goto default;

        case '[':
            // Read the scanset
            i++;
            if (i == length)
                return error();
            // If the conversion specifier begins with `[]` or `[^]`, the right
            // bracket character is not the terminator, but in the scanlist.
            if (format[i] == '^')
            {
                i++;
                if (i == length)
                    return error();
            }
            if (format[i] == ']')
            {
                i++;
                if (i == length)
                    return error();
            }
            // A scanset can be anything, so we just check that it is paired
            while (i < length)
            {
                if (format[i] == ']')
                    break;
                ++i;
            }
            // no `]` found
            if (i == length)
                return error();

            specifier = flags == Modifier.none ? Format.s         :
                        flags == Modifier.l    ? Format.ls        :
                        flags == Modifier.m    ? Format.POSIX_ms  :
                        flags == Modifier.ml   ? Format.POSIX_mls :
                                                 Format.error;
            ++i;
            break;

        default:
            char genSpec;
            specifier = parseGenericFormatSpecifier(format, i, genSpec);
            if (specifier == Format.error)
                return error();
            break;
    }

    idx = i;
    return specifier;  // success
}

/**************************************
 * Parse the *format specifier* which is of the form:
 *
 * `%[flags][field width][.precision][length modifier]specifier`
 *
 * Params:
 *      format = format string
 *      idx = index of `%` of start of format specifier,
 *          which gets updated to index past the end of it,
 *          even if `Format.error` is returned
 *      widthStar = set if * for width
 *      precisionStar = set if * for precision
 *      useGNUExts = true if parsing GNU format extensions
 * Returns:
 *      Format
 */
Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
        out bool widthStar, out bool precisionStar, bool useGNUExts =
        findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @safe
{
    auto i = idx;
    assert(format[i] == '%');
    const length = format.length;
    bool hash;
    bool zero;
    bool flags;
    bool width;
    bool precision;

    Format error()
    {
        idx = i;
        return Format.error;
    }

    ++i;
    if (i == length)
        return error();

    if (format[i] == '%')
    {
        idx = i + 1;
        return Format.percent;
    }

    /* Read the `flags`
     */
    while (1)
    {
        const c = format[i];
        if (c == '-' ||
            c == '+' ||
            c == ' ')
        {
            flags = true;
        }
        else if (c == '#')
        {
            hash = true;
        }
        else if (c == '0')
        {
            zero = true;
        }
        else
            break;
        ++i;
        if (i == length)
            return error();
    }

    /* Read the `field width`
     */
    {
        const c = format[i];
        if (c == '*')
        {
            width = true;
            widthStar = true;
            ++i;
            if (i == length)
                return error();
        }
        else if ('1' <= c && c <= '9')
        {
            width = true;
            ++i;
            if (i == length)
                return error();
            while ('0' <= format[i] && format[i] <= '9')
            {
                ++i;
                if (i == length)
                    return error();
            }
        }
    }

    /* Read the `precision`
     */
    if (format[i] == '.')
    {
        precision = true;
        ++i;
        if (i == length)
            return error();
        const c = format[i];
        if (c == '*')
        {
            precisionStar = true;
            ++i;
            if (i == length)
                return error();
        }
        else if ('0' <= c && c <= '9')
        {
            ++i;
            if (i == length)
                return error();
            while ('0' <= format[i] && format[i] <= '9')
            {
                ++i;
                if (i == length)
                    return error();
            }
        }
    }

    /* Read the specifier
     */
    char genSpec;
    Format specifier;
    switch (format[i])
    {
        case 'm':
            // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html
            if (useGNUExts)
            {
                specifier = Format.GNU_m;
                genSpec = format[i];
                ++i;
                break;
            }
            goto default;

        default:
            specifier = parseGenericFormatSpecifier(format, i, genSpec);
            if (specifier == Format.error)
                return error();
            break;
    }

    switch (genSpec)
    {
        case 'c':
        case 's':
        case 'C':
        case 'S':
            if (hash || zero)
                return error();
            break;

        case 'd':
        case 'i':
            if (hash)
                return error();
            break;

        case 'm':
            if (hash || zero || flags)
                return error();
            break;

        case 'n':
            if (hash || zero || precision || width || flags)
                return error();
            break;

        default:
            break;
    }

    idx = i;
    return specifier;  // success
}

/* Different kinds of conversion modifiers. */
enum Modifier
{
    none,
    h,          // short
    hh,         // char
    j,          // intmax_t
    l,          // wint_t/wchar_t
    ll,         // long long int
    L,          // long double
    m,          // char**
    ml,         // wchar_t**
    t,          // ptrdiff_t
    z           // size_t
}

/* Different kinds of formatting specifications, variations we don't
   care about are merged. (Like we don't care about the difference between
   f, e, g, a, etc.)

   For `scanf`, every format is a pointer.
 */
enum Format
{
    d,          // int
    hhd,        // signed char
    hd,         // short int
    ld,         // long int
    lld,        // long long int
    jd,         // intmax_t
    zd,         // size_t
    td,         // ptrdiff_t
    u,          // unsigned int
    hhu,        // unsigned char
    hu,         // unsigned short int
    lu,         // unsigned long int
    llu,        // unsigned long long int
    ju,         // uintmax_t
    g,          // float (scanf) / double (printf)
    lg,         // double (scanf)
    Lg,         // long double (both)
    s,          // char string (both)
    ls,         // wchar_t string (both)
    c,          // char (printf)
    lc,         // wint_t (printf)
    p,          // pointer
    n,          // pointer to int
    hhn,        // pointer to signed char
    hn,         // pointer to short
    ln,         // pointer to long int
    lln,        // pointer to long long int
    jn,         // pointer to intmax_t
    zn,         // pointer to size_t
    tn,         // pointer to ptrdiff_t
    GNU_m,      // GNU ext. : string corresponding to the error code in errno (printf)
    POSIX_ms,   // POSIX ext. : dynamically allocated char string  (scanf)
    POSIX_mls,  // POSIX ext. : dynamically allocated wchar_t string (scanf)
    percent,    // %% (i.e. no argument)
    error,      // invalid format specification
}

/**************************************
 * Parse the *length specifier* and the *specifier* of the following form:
 * `[length]specifier`
 *
 * Params:
 *      format = format string
 *      idx = index of of start of format specifier,
 *          which gets updated to index past the end of it,
 *          even if `Format.error` is returned
 *      genSpecifier = Generic specifier. For instance, it will be set to `d` if the
 *           format is `hdd`.
 * Returns:
 *      Format
 */
Format parseGenericFormatSpecifier(scope const char[] format,
    ref size_t idx, out char genSpecifier) nothrow pure @safe
{
    const length = format.length;

    /* Read the `length modifier`
     */
    const lm = format[idx];
    Modifier flags;
    switch (lm)
    {
        case 'j':
        case 'z':
        case 't':
        case 'L':
            flags = lm == 'j' ? Modifier.j :
                    lm == 'z' ? Modifier.z :
                    lm == 't' ? Modifier.t :
                                Modifier.L;
            ++idx;
            if (idx == length)
                return Format.error;
            break;

        case 'h':
        case 'l':
            ++idx;
            if (idx == length)
                return Format.error;
            if (lm == format[idx])
            {
                flags = lm == 'h' ? Modifier.hh : Modifier.ll;
                ++idx;
                if (idx == length)
                    return Format.error;
            }
            else
                flags = lm == 'h' ? Modifier.h : Modifier.l;
            break;

        default:
            flags = Modifier.none;
            break;
    }

    /* Read the `specifier`
     */
    Format specifier;
    const sc = format[idx];
    genSpecifier = sc;
    switch (sc)
    {
        case 'd':
        case 'i':
            specifier = flags == Modifier.none ? Format.d   :
                        flags == Modifier.hh   ? Format.hhd :
                        flags == Modifier.h    ? Format.hd  :
                        flags == Modifier.ll   ? Format.lld :
                        flags == Modifier.l    ? Format.ld  :
                        flags == Modifier.j    ? Format.jd  :
                        flags == Modifier.z    ? Format.zd  :
                        flags == Modifier.t    ? Format.td  :
                                                 Format.error;
            break;

        case 'u':
        case 'o':
        case 'x':
        case 'X':
            specifier = flags == Modifier.none ? Format.u   :
                        flags == Modifier.hh   ? Format.hhu :
                        flags == Modifier.h    ? Format.hu  :
                        flags == Modifier.ll   ? Format.llu :
                        flags == Modifier.l    ? Format.lu  :
                        flags == Modifier.j    ? Format.ju  :
                        flags == Modifier.z    ? Format.zd  :
                        flags == Modifier.t    ? Format.td  :
                                                 Format.error;
            break;

        case 'f':
        case 'F':
        case 'e':
        case 'E':
        case 'g':
        case 'G':
        case 'a':
        case 'A':
            specifier = flags == Modifier.none ? Format.g  :
                        flags == Modifier.L    ? Format.Lg :
                        flags == Modifier.l    ? Format.lg :
                                                 Format.error;
            break;

        case 'c':
            specifier = flags == Modifier.none ? Format.c       :
                        flags == Modifier.l    ? Format.lc      :
                                                 Format.error;
            break;

        case 's':
            specifier = flags == Modifier.none ? Format.s       :
                        flags == Modifier.l    ? Format.ls      :
                                                 Format.error;
            break;

        case 'p':
            specifier = flags == Modifier.none ? Format.p :
                                                 Format.error;
            break;

        case 'n':
            specifier = flags == Modifier.none ? Format.n   :
                        flags == Modifier.ll   ? Format.lln :
                        flags == Modifier.l    ? Format.ln  :
                        flags == Modifier.hh   ? Format.hhn :
                        flags == Modifier.h    ? Format.hn  :
                        flags == Modifier.j    ? Format.jn  :
                        flags == Modifier.z    ? Format.zn  :
                        flags == Modifier.t    ? Format.tn  :
                                                 Format.error;
            break;

        case 'C':
            // POSIX.1-2017 X/Open System Interfaces (XSI)
            // %C format is equivalent to %lc
            specifier = flags == Modifier.none ? Format.lc :
                                                 Format.error;
            break;

        case 'S':
            // POSIX.1-2017 X/Open System Interfaces (XSI)
            // %S format is equivalent to %ls
            specifier = flags == Modifier.none ? Format.ls :
                                                 Format.error;
            break;

        default:
            specifier = Format.error;
            break;
    }

    ++idx;
    return specifier; // success
}

@("parseGenericFormatSpecifier") unittest
{
    char genSpecifier;
    size_t idx;

    void testG(string fmtStr, Format expectedFormat, char expectedGenSpecifier)
    {
        idx = 0;
        assert(parseGenericFormatSpecifier(fmtStr, idx, genSpecifier) == expectedFormat);
        assert(genSpecifier == expectedGenSpecifier);
    }

    testG("hhd", Format.hhd, 'd');
    testG("hn", Format.hn, 'n');
    testG("ji", Format.jd, 'i');
    testG("lu", Format.lu, 'u');

    idx = 0;
    assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
}

@("parsePrintfFormatSpecifier") unittest
{
    bool useGNUExts = false;

    size_t idx = 0;
    bool widthStar;
    bool precisionStar;

    void testP(string fmtStr, Format expectedFormat, size_t expectedIdx)
    {
        idx = 0;
        assert(parsePrintfFormatSpecifier(fmtStr, idx, widthStar, precisionStar, useGNUExts) == expectedFormat);
        assert(idx == expectedIdx);
    }

    // one for each Format
    testP("%d", Format.d, 2);
    assert(!widthStar && !precisionStar);

    testP("%ld", Format.ld, 3);
    testP("%lld", Format.lld, 4);
    testP("%jd", Format.jd, 3);
    testP("%zd", Format.zd, 3);
    testP("%td", Format.td, 3);
    testP("%g", Format.g, 2);
    testP("%Lg", Format.Lg, 3);
    testP("%p", Format.p, 2);
    testP("%n", Format.n, 2);
    testP("%ln", Format.ln, 3);
    testP("%lln", Format.lln, 4);
    testP("%hn", Format.hn, 3);
    testP("%hhn", Format.hhn, 4);
    testP("%jn", Format.jn, 3);
    testP("%zn", Format.zn, 3);
    testP("%tn", Format.tn, 3);
    testP("%c", Format.c, 2);
    testP("%lc", Format.lc, 3);
    testP("%s", Format.s, 2);
    testP("%ls", Format.ls, 3);
    testP("%%", Format.percent, 2);

    // Synonyms
    testP("%i", Format.d, 2);
    testP("%u", Format.u, 2);
    testP("%o", Format.u, 2);
    testP("%x", Format.u, 2);
    testP("%X", Format.u, 2);
    testP("%f", Format.g, 2);
    testP("%F", Format.g, 2);
    testP("%G", Format.g, 2);
    testP("%a", Format.g, 2);
    testP("%La", Format.Lg, 3);
    testP("%A", Format.g, 2);
    testP("%lg", Format.lg, 3);

    // width, precision
    testP("%*d", Format.d, 3);
    assert(widthStar && !precisionStar);

    testP("%.*d", Format.d, 4);
    assert(!widthStar && precisionStar);

    testP("%*.*d", Format.d, 5);
    assert(widthStar && precisionStar);

    // Too short formats
    foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
                    "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
    {
        testP(s, Format.error, s.length);
    }

    // Undefined format combinations
    foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
                    "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
                    "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
                    "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
                    "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
    {
        testP(s, Format.error, s.length);
    }

    testP("%C", Format.lc, 2);
    testP("%S", Format.ls, 2);

    // GNU extensions: explicitly toggle ISO/GNU flag.
    foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm",
                    "%#m", "%+m", "%-m", "% m", "%0m"])
    {
        useGNUExts = false;
        testP(s, Format.error, s.length);
        useGNUExts = true;
        testP(s, Format.error, s.length);
    }

    foreach (s; ["%m", "%md", "%mz", "%mc", "%mm", "%msyz", "%ml", "%mlz", "%mlc", "%mlm"])
    {
        // valid cases, all parsed as `%m`
        // GNU printf()
        useGNUExts = true;
        testP(s, Format.GNU_m, 2);

        // ISO printf()
        useGNUExts = false;
        testP(s, Format.error, 2);
    }
}

@("parseScanfFormatSpecifier") unittest
{
    size_t idx;
    bool asterisk;

    void testS(string fmtStr, Format expectedFormat, size_t expectedIdx)
    {
        idx = 0;
        assert(parseScanfFormatSpecifier(fmtStr, idx, asterisk) == expectedFormat);
        assert(idx == expectedIdx);
    }

    // one for each Format
    testS("%d", Format.d, 2);
    testS("%hhd", Format.hhd, 4);
    testS("%hd", Format.hd, 3);
    testS("%ld", Format.ld, 3);
    testS("%lld", Format.lld, 4);
    testS("%jd", Format.jd, 3);
    testS("%zd", Format.zd, 3);
    testS("%td", Format.td, 3);
    testS("%u", Format.u, 2);
    testS("%hhu", Format.hhu, 4);
    testS("%hu", Format.hu, 3);
    testS("%lu", Format.lu, 3);
    testS("%llu", Format.llu, 4);
    testS("%ju", Format.ju, 3);
    testS("%g", Format.g, 2);
    testS("%lg", Format.lg, 3);
    testS("%Lg", Format.Lg, 3);
    testS("%p", Format.p, 2);
    testS("%s", Format.s, 2);
    testS("%ls", Format.ls, 3);
    testS("%%", Format.percent, 2);

    // Synonyms
    testS("%i", Format.d, 2);
    testS("%n", Format.n, 2);

    testS("%o", Format.u, 2);
    testS("%x", Format.u, 2);
    testS("%f", Format.g, 2);
    testS("%e", Format.g, 2);
    testS("%a", Format.g, 2);
    testS("%c", Format.c, 2);

    // asterisk
    testS("%*d", Format.d, 3);
    assert(asterisk);

    testS("%9ld", Format.ld, 4);
    assert(!asterisk);

    testS("%*25984hhd", Format.hhd, 10);
    assert(asterisk);

    // scansets
    testS("%[a-zA-Z]", Format.s, 9);
    assert(!asterisk);

    testS("%*25l[a-z]", Format.ls, 10);
    assert(asterisk);

    testS("%[]]", Format.s, 4);
    assert(!asterisk);

    testS("%[^]]", Format.s, 5);
    assert(!asterisk);

    // Too short formats
    foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
                 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
    {

        testS(s, Format.error, s.length);
    }


    // Undefined format combinations
    foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
                 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
                 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
                 "%-", "%+", "%#", "%0", "%.", "%Ln"])
    {

        testS(s, Format.error, s.length);

    }

    // Invalid scansets
    foreach (s; ["%[]", "%[^", "%[^]", "%[s", "%[0-9lld", "%[", "%l[^]"])
    {

        testS(s, Format.error, s.length);
    }

    // Posix extensions
    foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm",
                 "%m", "%ma", "%md", "%ml", "%mm", "%mlb", "%mlj", "%mlr", "%mlz",
                 "%LC", "%lC", "%llC", "%jC", "%tC", "%hC", "%hhC", "%zC",
                 "%LS", "%lS", "%llS", "%jS", "%tS", "%hS", "%hhS", "%zS"])
    {

        testS(s, Format.error, s.length);
    }

    testS("%mc", Format.POSIX_ms, 3);
    testS("%ms", Format.POSIX_ms, 3);
    testS("%m[0-9]", Format.POSIX_ms, 7);
    testS("%mlc", Format.POSIX_mls, 4);
    testS("%mls", Format.POSIX_mls, 4);
    testS("%ml[^0-9]", Format.POSIX_mls, 9);
    testS("%mC", Format.POSIX_mls, 3);
    testS("%mS", Format.POSIX_mls, 3);

    testS("%C", Format.lc, 2);
    testS("%S", Format.ls, 2);
}