1 /*
2 * Summary: regular expressions handling
3 * Description: basic API for libxml regular expressions handling used
4 * for XML Schemas and validation.
5 *
6 * Copy: See Copyright for the status of this software.
7 *
8 * Author: Daniel Veillard
9 */
10
11 #ifndef __XML_REGEXP_H__
12 #define __XML_REGEXP_H__
13
14 #include <stdio.h>
15 #include <libxml/xmlversion.h>
16 #include <libxml/xmlstring.h>
17
18 #ifdef LIBXML_REGEXP_ENABLED
19
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23
24 /**
25 * xmlRegexpPtr:
26 *
27 * A libxml regular expression, they can actually be far more complex
28 * thank the POSIX regex expressions.
29 */
30 typedef struct _xmlRegexp xmlRegexp;
31 typedef xmlRegexp *xmlRegexpPtr;
32
33 /**
34 * xmlRegExecCtxtPtr:
35 *
36 * A libxml progressive regular expression evaluation context
37 */
38 typedef struct _xmlRegExecCtxt xmlRegExecCtxt;
39 typedef xmlRegExecCtxt *xmlRegExecCtxtPtr;
40
41 /*
42 * The POSIX like API
43 */
44 XMLPUBFUN xmlRegexpPtr
45 xmlRegexpCompile (const xmlChar *regexp);
46 XMLPUBFUN void xmlRegFreeRegexp(xmlRegexpPtr regexp);
47 XMLPUBFUN int
48 xmlRegexpExec (xmlRegexpPtr comp,
49 const xmlChar *value);
50 XMLPUBFUN void
51 xmlRegexpPrint (FILE *output,
52 xmlRegexpPtr regexp);
53 XMLPUBFUN int
54 xmlRegexpIsDeterminist(xmlRegexpPtr comp);
55
56 /**
57 * xmlRegExecCallbacks:
58 * @exec: the regular expression context
59 * @token: the current token string
60 * @transdata: transition data
61 * @inputdata: input data
62 *
63 * Callback function when doing a transition in the automata
64 */
65 typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec,
66 const xmlChar *token,
67 void *transdata,
68 void *inputdata);
69
70 /*
71 * The progressive API
72 */
73 XMLPUBFUN xmlRegExecCtxtPtr
74 xmlRegNewExecCtxt (xmlRegexpPtr comp,
75 xmlRegExecCallbacks callback,
76 void *data);
77 XMLPUBFUN void
78 xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec);
79 XMLPUBFUN int
80 xmlRegExecPushString(xmlRegExecCtxtPtr exec,
81 const xmlChar *value,
82 void *data);
83 XMLPUBFUN int
84 xmlRegExecPushString2(xmlRegExecCtxtPtr exec,
85 const xmlChar *value,
86 const xmlChar *value2,
87 void *data);
88
89 XMLPUBFUN int
90 xmlRegExecNextValues(xmlRegExecCtxtPtr exec,
91 int *nbval,
92 int *nbneg,
93 xmlChar **values,
94 int *terminal);
95 XMLPUBFUN int
96 xmlRegExecErrInfo (xmlRegExecCtxtPtr exec,
97 const xmlChar **string,
98 int *nbval,
99 int *nbneg,
100 xmlChar **values,
101 int *terminal);
102 #ifdef LIBXML_EXPR_ENABLED
103 /*
104 * Formal regular expression handling
105 * Its goal is to do some formal work on content models
106 */
107
108 /* expressions are used within a context */
109 typedef struct _xmlExpCtxt xmlExpCtxt;
110 typedef xmlExpCtxt *xmlExpCtxtPtr;
111
112 XMLPUBFUN void
113 xmlExpFreeCtxt (xmlExpCtxtPtr ctxt);
114 XMLPUBFUN xmlExpCtxtPtr
115 xmlExpNewCtxt (int maxNodes,
116 xmlDictPtr dict);
117
118 XMLPUBFUN int
119 xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt);
120 XMLPUBFUN int
121 xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt);
122
123 /* Expressions are trees but the tree is opaque */
124 typedef struct _xmlExpNode xmlExpNode;
125 typedef xmlExpNode *xmlExpNodePtr;
126
127 typedef enum {
128 XML_EXP_EMPTY = 0,
129 XML_EXP_FORBID = 1,
130 XML_EXP_ATOM = 2,
131 XML_EXP_SEQ = 3,
132 XML_EXP_OR = 4,
133 XML_EXP_COUNT = 5
134 } xmlExpNodeType;
135
136 /*
137 * 2 core expressions shared by all for the empty language set
138 * and for the set with just the empty token
139 */
140 XMLPUBVAR xmlExpNodePtr forbiddenExp;
141 XMLPUBVAR xmlExpNodePtr emptyExp;
142
143 /*
144 * Expressions are reference counted internally
145 */
146 XMLPUBFUN void
147 xmlExpFree (xmlExpCtxtPtr ctxt,
148 xmlExpNodePtr expr);
149 XMLPUBFUN void
150 xmlExpRef (xmlExpNodePtr expr);
151
152 /*
153 * constructors can be either manual or from a string
154 */
155 XMLPUBFUN xmlExpNodePtr
156 xmlExpParse (xmlExpCtxtPtr ctxt,
157 const char *expr);
158 XMLPUBFUN xmlExpNodePtr
159 xmlExpNewAtom (xmlExpCtxtPtr ctxt,
160 const xmlChar *name,
161 int len);
162 XMLPUBFUN xmlExpNodePtr
163 xmlExpNewOr (xmlExpCtxtPtr ctxt,
164 xmlExpNodePtr left,
165 xmlExpNodePtr right);
166 XMLPUBFUN xmlExpNodePtr
167 xmlExpNewSeq (xmlExpCtxtPtr ctxt,
168 xmlExpNodePtr left,
169 xmlExpNodePtr right);
170 XMLPUBFUN xmlExpNodePtr
171 xmlExpNewRange (xmlExpCtxtPtr ctxt,
172 xmlExpNodePtr subset,
173 int min,
174 int max);
175 /*
176 * The really interesting APIs
177 */
178 XMLPUBFUN int
179 xmlExpIsNillable(xmlExpNodePtr expr);
180 XMLPUBFUN int
181 xmlExpMaxToken (xmlExpNodePtr expr);
182 XMLPUBFUN int
183 xmlExpGetLanguage(xmlExpCtxtPtr ctxt,
184 xmlExpNodePtr expr,
185 const xmlChar**langList,
186 int len);
187 XMLPUBFUN int
188 xmlExpGetStart (xmlExpCtxtPtr ctxt,
189 xmlExpNodePtr expr,
190 const xmlChar**tokList,
191 int len);
192 XMLPUBFUN xmlExpNodePtr
193 xmlExpStringDerive(xmlExpCtxtPtr ctxt,
194 xmlExpNodePtr expr,
195 const xmlChar *str,
196 int len);
197 XMLPUBFUN xmlExpNodePtr
198 xmlExpExpDerive (xmlExpCtxtPtr ctxt,
199 xmlExpNodePtr expr,
200 xmlExpNodePtr sub);
201 XMLPUBFUN int
202 xmlExpSubsume (xmlExpCtxtPtr ctxt,
203 xmlExpNodePtr expr,
204 xmlExpNodePtr sub);
205 XMLPUBFUN void
206 xmlExpDump (xmlBufferPtr buf,
207 xmlExpNodePtr expr);
208 #endif /* LIBXML_EXPR_ENABLED */
209 #ifdef __cplusplus
210 }
211 #endif
212
213 #endif /* LIBXML_REGEXP_ENABLED */
214
215 #endif /*__XML_REGEXP_H__ */