2006-09-01 Miguel de Icaza <miguel@novell.com>
[mono.git] / eglib / src / gmarkup.c
1 /*
2  * gmakrup.c: Minimal XML markup reader.
3  *
4  * Unlike the GLib one, this can not be restarted with more text
5  * as the Mono use does not require it
6  *
7  * Author:
8  *   Miguel de Icaza (miguel@novell.com)
9  *
10  * (C) 2006 Novell, Inc.
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining
13  * a copy of this software and associated documentation files (the
14  * "Software"), to deal in the Software without restriction, including
15  * without limitation the rights to use, copy, modify, merge, publish,
16  * distribute, sublicense, and/or sell copies of the Software, and to
17  * permit persons to whom the Software is furnished to do so, subject to
18  * the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be
21  * included in all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
27  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
28  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30  */
31 #include <stdio.h>
32 #include <glib.h>
33
34 #define set_error(msg...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg); } while (0);
35
36 typedef enum {
37         START,
38         START_ELEMENT,
39         TEXT,
40         FLUSH_TEXT,
41         CLOSING_ELEMENT
42 } ParseState;
43
44 struct _GMarkupParseContext {
45         GMarkupParser  parser;
46         gpointer       user_data;
47         GDestroyNotify user_data_dnotify;
48         ParseState     state;
49
50         /* Stores the name of the current element, so we can issue the end_element */
51         GSList         *level;
52
53         GString        *text;
54 };
55
56 GMarkupParseContext *
57 g_markup_parse_context_new (const GMarkupParser *parser,
58                             GMarkupParseFlags flags,
59                             gpointer user_data,
60                             GDestroyNotify user_data_dnotify)
61 {
62         GMarkupParseContext *context = g_new0 (GMarkupParseContext, 1);
63
64         context->parser = *parser;
65         context->user_data = user_data;
66         context->user_data_dnotify = user_data_dnotify;
67
68         return context;
69 }
70
71 void
72 g_markup_parse_context_free (GMarkupParseContext *context)
73 {
74         g_free (context);
75 }
76
77 static const char *
78 skip_space (const char *p, const char *end)
79 {
80         for (; p < end && isspace (*p); p++)
81                 ;
82         return p;
83 }
84
85 static const char *
86 parse_value (const char *p, const char *end, char **value, GError **error)
87 {
88         const char *start;
89         int l;
90         
91         if (*p != '"'){
92                 set_error ("Expected the attribute value to start with a quote");
93                 return end;
94         }
95         start = ++p;
96         for (++p; p < end && *p != '"'; p++)
97         if (p == end)
98                 return end;
99         l = p - start;
100         p++;
101         *value = malloc (l + 1);
102         if (*value == NULL)
103                 return end;
104         strncpy (*value, start, l);
105         (*value) [l] = 0;
106         return p;
107 }
108
109 static const char *
110 parse_name (const char *p, const char *end, char **value)
111 {
112         const char *start = p;
113         int l;
114         
115         for (; p < end && isalnum (*p); p++)
116                 ;
117         if (p == end)
118                 return end;
119
120         l = p - start;
121         *value = malloc (l + 1);
122         if (*value == NULL)
123                 return end;
124         strncpy (*value, start, l);
125         (*value) [l] = 0;
126         return p;
127 }
128
129 static const char *
130 parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop)
131 {
132         int nnames = 0;
133
134         while (TRUE){
135                 p = skip_space (p, end);
136                 if (p == end)
137                         return end;
138                         
139                 if (*p == '>'){
140                         *full_stop = 0;
141                         return p; 
142                 }
143                 if (*p == '/' && ((p+1) < end && *p == '>')){
144                         *full_stop = 1;
145                         return p+1;
146                 } else {
147                         char *name, *value;
148                         
149                         p = parse_name (p, end, &name);
150                         if (p == end)
151                                 return p;
152                         p = skip_space (p, end);
153                         if (p == end)
154                                 return p;
155                         if (*p != '='){
156                                 set_error ("Expected an = after the attribute name `%s'", name);
157                                 return end;
158                         }
159                         p++;
160                         p = skip_space (p, end);
161                         if (p == end)
162                                 return end;
163
164                         p = parse_value (p, end, &value, error);
165                         if (p == end)
166                                 return p;
167
168                         ++nnames;
169                         *names = g_realloc (*names, sizeof (char **) * (nnames+1));
170                         *values = g_realloc (*values, sizeof (char **) * (nnames+1));
171                         (*names) [nnames-1] = name;
172                         (*values) [nnames-1] = name;                    
173                         (*names) [nnames] = NULL;
174                         (*values) [nnames] = NULL;                      
175                 }
176         } 
177 }
178
179 static void
180 destroy_parse_state (GMarkupParseContext *context)
181 {
182         GSList *p;
183
184         for (p = context->level; p != NULL; p = p->next)
185                 g_free (p->data);
186         
187         g_slist_free (context->level);
188         if (context->text != NULL)
189                 g_string_free (context->text, TRUE);
190         context->text = NULL;
191         context->level = NULL;
192 }
193
194 gboolean
195 g_markup_parse_context_parse (GMarkupParseContext *context,
196                               const gchar *text, gssize text_len,
197                               GError **error)
198 {
199         const char *p,  *end;
200         
201         g_return_val_if_fail (context != NULL, FALSE);
202         g_return_val_if_fail (text != NULL, FALSE);
203         g_return_val_if_fail (text_len >= 0, FALSE);
204
205         end = text + text_len;
206         
207         for (p = text; p < end; p++){
208                 char c = *p;
209                 
210                 switch (context->state){
211                 case START:
212                         if (c == ' ' || c == '\t' || c == '\f' || c == '\n')
213                                 continue;
214                         if (c == '<'){
215                                 context->state = START_ELEMENT;
216                                 continue;
217                         }
218                         set_error ("Expected < to start the document");
219                         goto fail;
220
221
222                 case START_ELEMENT: {
223                         const char *element_start = p, *element_end;
224                         char *ename = NULL;
225                         int full_stop = 0, l;
226                         gchar **names = NULL, **values = NULL;
227
228                         for (; p < end && isspace (*p); p++)
229                                 ;
230                         if (p == end){
231                                 set_error ("Unfinished element");
232                                 goto fail;
233                         }
234                         if (!(isascii (*p) && isalpha (*p))){
235                                 set_error ("Expected an element name");
236                                 goto fail;
237                         }
238                         
239                         for (++p; p < end && isalnum (*p); p++)
240                                 ;
241                         if (p == end){
242                                 set_error ("Expected an element");
243                                 goto fail;
244                         }
245                         element_end = p;
246                         
247                         for (; p < end && isspace (*p); p++)
248                                 ;
249                         if (p == end){
250                                 set_error ("Unfinished element");
251                                 goto fail;
252                         }
253                         p = parse_attributes (p, end, &names, &values, error, &full_stop);
254                         if (p == end){
255                                 if (names != NULL) {
256                                         g_strfreev (names);
257                                         g_strfreev (values);
258                                 }
259                                 
260                                 set_error ("Unfinished sequence");
261                                 goto fail;
262                         }
263                         l = element_end - element_start;
264                         ename = malloc (l + 1);
265                         if (ename == NULL)
266                                 goto fail;
267                         strncpy (ename, element_start, l);
268                         ename [l] = 0;
269                         
270                         if (context->parser.start_element != NULL)
271                                 context->parser.start_element (context, ename,
272                                                                (const gchar **) names,
273                                                                (const gchar **) values,
274                                                                context->user_data, error);
275
276                         if (names != NULL){
277                                 g_strfreev (names);
278                                 g_strfreev (values);
279                         }
280
281                         if (*error != NULL)
282                                 goto fail;
283                         
284                         if (full_stop){
285                                 if (context->parser.end_element != NULL){
286                                         context->parser.end_element (context, ename, context->user_data, error);
287                                         if (*error != NULL)
288                                                 goto fail;
289                                 }
290                         } else
291                                 context->level = g_slist_prepend (context->level, ename);
292                         
293                         context->state = TEXT;
294                         break;
295                 } /* case START_ELEMENT */
296
297                 case TEXT: {
298                         if (c == '<'){
299                                 context->state = FLUSH_TEXT;
300                                 break;
301                         }
302                         if (context->parser.text != NULL){
303                                 if (context->text == NULL)
304                                         context->text = g_string_new ("");
305                                 g_string_append_c (context->text, c);
306                         }
307                         break;
308                 }
309
310                 case FLUSH_TEXT:
311                         if (context->parser.text != NULL){
312                                 context->parser.text (context, context->text->str, context->text->len,
313                                                       context->user_data, error);
314                                 if (*error != NULL)
315                                         goto fail;
316                         }
317                         
318                         if (c == '/')
319                                 context->state = CLOSING_ELEMENT;
320                         else {
321                                 p--;
322                                 context->state = START_ELEMENT;
323                         }
324                         break;
325
326                 case CLOSING_ELEMENT: {
327                         GSList *current = context->level;
328
329                         if (context->level == NULL){
330                                 set_error ("Too many closing tags, not enough open tags");
331                                 goto fail;
332                         }
333                         
334                         if (context->parser.end_element != NULL){
335                                 char *text = current->data;
336                                 
337                                 context->parser.end_element (context, text, context->user_data, error);
338                                 if (*error != NULL)
339                                         goto fail;
340                         }
341                         context->level = context->level->next;
342                         g_slist_free (current);
343                         break;
344                 } /* case CLOSING_ELEMENT */
345                         
346                 } /* switch */
347         }
348
349
350         return TRUE;
351  fail:
352         if (context->parser.error)
353                 context->parser.error (context, *error, context->user_data);
354         
355         destroy_parse_state (context);
356         return FALSE;
357 }
358
359