(root)/
expat-2.5.0/
xmlwf/
ct.c
       1  /*
       2                              __  __            _
       3                           ___\ \/ /_ __   __ _| |_
       4                          / _ \\  /| '_ \ / _` | __|
       5                         |  __//  \| |_) | (_| | |_
       6                          \___/_/\_\ .__/ \__,_|\__|
       7                                   |_| XML parser
       8  
       9     Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
      10     Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
      11     Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
      12     Licensed under the MIT license:
      13  
      14     Permission is  hereby granted,  free of charge,  to any  person obtaining
      15     a  copy  of  this  software   and  associated  documentation  files  (the
      16     "Software"),  to  deal in  the  Software  without restriction,  including
      17     without  limitation the  rights  to use,  copy,  modify, merge,  publish,
      18     distribute, sublicense, and/or sell copies of the Software, and to permit
      19     persons  to whom  the Software  is  furnished to  do so,  subject to  the
      20     following conditions:
      21  
      22     The above copyright  notice and this permission notice  shall be included
      23     in all copies or substantial portions of the Software.
      24  
      25     THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
      26     EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
      27     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
      28     NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
      29     DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
      30     OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
      31     USE OR OTHER DEALINGS IN THE SOFTWARE.
      32  */
      33  
      34  #define CHARSET_MAX 41
      35  
      36  static const char *
      37  getTok(const char **pp) {
      38    enum { inAtom, inString, init, inComment };
      39    int state = init;
      40    const char *tokStart = 0;
      41    for (;;) {
      42      switch (**pp) {
      43      case '\0':
      44        return 0;
      45      case ' ':
      46      case '\r':
      47      case '\t':
      48      case '\n':
      49        if (state == inAtom)
      50          return tokStart;
      51        break;
      52      case '(':
      53        if (state == inAtom)
      54          return tokStart;
      55        if (state != inString)
      56          state++;
      57        break;
      58      case ')':
      59        if (state > init)
      60          --state;
      61        else if (state != inString)
      62          return 0;
      63        break;
      64      case ';':
      65      case '/':
      66      case '=':
      67        if (state == inAtom)
      68          return tokStart;
      69        if (state == init)
      70          return (*pp)++;
      71        break;
      72      case '\\':
      73        ++*pp;
      74        if (**pp == '\0')
      75          return 0;
      76        break;
      77      case '"':
      78        switch (state) {
      79        case inString:
      80          ++*pp;
      81          return tokStart;
      82        case inAtom:
      83          return tokStart;
      84        case init:
      85          tokStart = *pp;
      86          state = inString;
      87          break;
      88        }
      89        break;
      90      default:
      91        if (state == init) {
      92          tokStart = *pp;
      93          state = inAtom;
      94        }
      95        break;
      96      }
      97      ++*pp;
      98    }
      99    /* not reached */
     100  }
     101  
     102  /* key must be lowercase ASCII */
     103  
     104  static int
     105  matchkey(const char *start, const char *end, const char *key) {
     106    if (! start)
     107      return 0;
     108    for (; start != end; start++, key++)
     109      if (*start != *key && *start != 'A' + (*key - 'a'))
     110        return 0;
     111    return *key == '\0';
     112  }
     113  
     114  void
     115  getXMLCharset(const char *buf, char *charset) {
     116    const char *next, *p;
     117  
     118    charset[0] = '\0';
     119    next = buf;
     120    p = getTok(&next);
     121    if (matchkey(p, next, "text"))
     122      strcpy(charset, "us-ascii");
     123    else if (! matchkey(p, next, "application"))
     124      return;
     125    p = getTok(&next);
     126    if (! p || *p != '/')
     127      return;
     128    p = getTok(&next);
     129    if (matchkey(p, next, "xml"))
     130      isXml = 1;
     131    p = getTok(&next);
     132    while (p) {
     133      if (*p == ';') {
     134        p = getTok(&next);
     135        if (matchkey(p, next, "charset")) {
     136          p = getTok(&next);
     137          if (p && *p == '=') {
     138            p = getTok(&next);
     139            if (p) {
     140              char *s = charset;
     141              if (*p == '"') {
     142                while (++p != next - 1) {
     143                  if (*p == '\\')
     144                    ++p;
     145                  if (s == charset + CHARSET_MAX - 1) {
     146                    charset[0] = '\0';
     147                    break;
     148                  }
     149                  *s++ = *p;
     150                }
     151                *s++ = '\0';
     152              } else {
     153                if (next - p > CHARSET_MAX - 1)
     154                  break;
     155                while (p != next)
     156                  *s++ = *p++;
     157                *s = 0;
     158                break;
     159              }
     160            }
     161          }
     162        }
     163      } else
     164        p = getTok(&next);
     165    }
     166  }
     167  
     168  int
     169  main(int argc, char **argv) {
     170    char buf[CHARSET_MAX];
     171    getXMLCharset(argv[1], buf);
     172    printf("charset = \"%s\"\n", buf);
     173    return 0;
     174  }