2007-08-30 Jonathan Pobst <monkey@jpobst.com>
[mono.git] / eglib / src / gmarkup.c
index a7b03d0237bddfd1fd0b9ed0b657aecdbee11651..5fcf0a786eb01db8e957048bbb597dd41796616e 100644 (file)
@@ -2,7 +2,18 @@
  * gmakrup.c: Minimal XML markup reader.
  *
  * Unlike the GLib one, this can not be restarted with more text
- * as the Mono use does not require it
+ * as the Mono use does not require it.
+ *
+ * Actually, with further thought, I think that this could be made
+ * to restart very easily.  The pos == end condition would mean
+ * "return to caller" and only at end parse this would be a fatal
+ * error.
+ *
+ * Not that it matters to Mono, but it is very simple to change, there
+ * is a tricky situation: there are a few places where we check p+n
+ * in the source, and that would have to change to be progressive, instead
+ * of depending on the string to be complete at that point, so we would
+ * have to introduce extra states to cope with that.
  *
  * Author:
  *   Miguel de Icaza (miguel@novell.com)
 #include <stdio.h>
 #include <glib.h>
 
-#define set_error(msg...) do { if (error != NULL) *error = g_error_new (1, 1, msg); } while (0);
+#define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
 
 typedef enum {
        START,
+       START_ELEMENT,
+       TEXT,
+       FLUSH_TEXT,
+       CLOSING_ELEMENT,
+       COMMENT,
+       SKIP_XML_DECLARATION
 } ParseState;
 
 struct _GMarkupParseContext {
@@ -42,6 +59,11 @@ struct _GMarkupParseContext {
        gpointer       user_data;
        GDestroyNotify user_data_dnotify;
        ParseState     state;
+
+       /* Stores the name of the current element, so we can issue the end_element */
+       GSList         *level;
+
+       GString        *text;
 };
 
 GMarkupParseContext *
@@ -62,14 +84,158 @@ g_markup_parse_context_new (const GMarkupParser *parser,
 void
 g_markup_parse_context_free (GMarkupParseContext *context)
 {
+       GSList *l;
+       
+       g_return_if_fail (context != NULL);
+
+       if (context->user_data_dnotify != NULL)
+               (context->user_data_dnotify) (context->user_data);
+       
+       if (context->text != NULL)
+               g_string_free (context->text, TRUE);
+       for (l = context->level; l; l = l->next)
+               g_free (l->data);
+       g_slist_free (context->level);
        g_free (context);
 }
 
+static const char *
+skip_space (const char *p, const char *end)
+{
+       for (; p < end && isspace (*p); p++)
+               ;
+       return p;
+}
+
+static const char *
+parse_value (const char *p, const char *end, char **value, GError **error)
+{
+       const char *start;
+       int l;
+       
+       if (*p != '"'){
+               set_error ("%s", "Expected the attribute value to start with a quote");
+               return end;
+       }
+       start = ++p;
+       for (; p < end && *p != '"'; p++)
+               ;
+       if (p == end)
+               return end;
+       l = (int)(p - start);
+       p++;
+       *value = malloc (l + 1);
+       if (*value == NULL)
+               return end;
+       strncpy (*value, start, l);
+       (*value) [l] = 0;
+       return p;
+}
+
+static const char *
+parse_name (const char *p, const char *end, char **value)
+{
+       const char *start = p;
+       int l;
+       
+       for (; p < end && isalnum (*p); p++)
+               ;
+       if (p == end)
+               return end;
+
+       l = (int)(p - start);
+       *value = malloc (l + 1);
+       if (*value == NULL)
+               return end;
+       strncpy (*value, start, l);
+       (*value) [l] = 0;
+       return p;
+}
+
+static const char *
+parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop, int state)
+{
+       int nnames = 0;
+
+       while (TRUE){
+               p = skip_space (p, end);
+               if (p == end)
+                       return end;
+                       
+               if (*p == '>'){
+                       *full_stop = 0;
+                       return p; 
+               }
+               if (state == SKIP_XML_DECLARATION && *p == '?' && ((p+1) < end) && *(p+1) == '>'){
+                       *full_stop = 0;
+                       return p+1;
+               }
+               
+               if (*p == '/' && ((p+1) < end && *(p+1) == '>')){
+                       *full_stop = 1;
+                       return p+1;
+               } else {
+                       char *name, *value;
+                       
+                       p = parse_name (p, end, &name);
+                       if (p == end)
+                               return p;
+
+                       p = skip_space (p, end);
+                       if (p == end){
+                               free (name);
+                               return p;
+                       }
+                       if (*p != '='){
+                               set_error ("Expected an = after the attribute name `%s'", name);
+                               free (name);
+                               return end;
+                       }
+                       p++;
+                       p = skip_space (p, end);
+                       if (p == end){
+                               free (name);
+                               return end;
+                       }
+
+                       p = parse_value (p, end, &value, error);
+                       if (p == end){
+                               free (name);
+                               return p;
+                       }
+
+                       ++nnames;
+                       *names = g_realloc (*names, sizeof (char **) * (nnames+1));
+                       *values = g_realloc (*values, sizeof (char **) * (nnames+1));
+                       (*names) [nnames-1] = name;
+                       (*values) [nnames-1] = value;
+                       (*names) [nnames] = NULL;
+                       (*values) [nnames] = NULL;                      
+               }
+       } 
+}
+
+static void
+destroy_parse_state (GMarkupParseContext *context)
+{
+       GSList *p;
+
+       for (p = context->level; p != NULL; p = p->next)
+               g_free (p->data);
+       
+       g_slist_free (context->level);
+       if (context->text != NULL)
+               g_string_free (context->text, TRUE);
+       context->text = NULL;
+       context->level = NULL;
+}
+
 gboolean
 g_markup_parse_context_parse (GMarkupParseContext *context,
-                             const gchar *text, gssize text_len, GError **error)
+                             const gchar *text, gssize text_len,
+                             GError **error)
 {
-       char *p, *end;
+       const char *p,  *end;
        
        g_return_val_if_fail (context != NULL, FALSE);
        g_return_val_if_fail (text != NULL, FALSE);
@@ -79,47 +245,202 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
        
        for (p = text; p < end; p++){
                char c = *p;
-               
+
                switch (context->state){
                case START:
                        if (c == ' ' || c == '\t' || c == '\f' || c == '\n')
                                continue;
                        if (c == '<'){
-                               context->state = START_ELEMENT;
+                               if (p+1 < end && p [1] == '?'){
+                                       context->state = SKIP_XML_DECLARATION;
+                                       p++;
+                               } else
+                                       context->state = START_ELEMENT;
                                continue;
                        }
-                       set_error ("Expected < to start the document");
-                       
-                       return FALSE;
-
+                       set_error ("%s", "Expected < to start the document");
+                       goto fail;
 
+               case SKIP_XML_DECLARATION:
                case START_ELEMENT: {
-                       char *element_start = p;
-                       char **names, *values;
+                       const char *element_start = p, *element_end;
+                       char *ename = NULL;
+                       int full_stop = 0, l;
+                       gchar **names = NULL, **values = NULL;
+
+                       for (; p < end && isspace (*p); p++)
+                               ;
+                       if (p == end){
+                               set_error ("%s", "Unfinished element");
+                               goto fail;
+                       }
 
-                       if (!(isascii (*p) && isalpha (*p)))
-                               set_error ("Must start with a letter");
+                       if (*p == '!' && (p+2 < end) && (p [1] == '-') && (p [2] == '-')){
+                               context->state = COMMENT;
+                               p += 2;
+                               break;
+                       }
                        
-                       for (++p; p < end && isalnum (*p); p++)
+                       if (!(isascii (*p) && isalpha (*p))){
+                               set_error ("%s", "Expected an element name");
+                               goto fail;
+                       }
+                       
+                       for (++p; p < end && (isalnum (*p) || (*p == '.')); p++)
                                ;
                        if (p == end){
-                               set_error ("Expected an element");
-                               return FALSE;
+                               set_error ("%s", "Expected an element");
+                               goto fail;
                        }
+                       element_end = p;
+                       
                        for (; p < end && isspace (*p); p++)
                                ;
                        if (p == end){
-                               set_error ("Unfinished element");
-                               return FALSE;
+                               set_error ("%s", "Unfinished element");
+                               goto fail;
                        }
-                       p = parse_attributes (p, end, &names, &values);
+                       p = parse_attributes (p, end, &names, &values, error, &full_stop, context->state);
                        if (p == end){
-                               set_error ("unfinished element");
-                               return FALSE;
+                               if (names != NULL) {
+                                       g_strfreev (names);
+                                       g_strfreev (values);
+                               }
+                               /* Only set the error if parse_attributes did not */
+                               if (error != NULL && *error == NULL)
+                                       set_error ("%s", "Unfinished sequence");
+                               goto fail;
+                       }
+                       l = (int)(element_end - element_start);
+                       ename = malloc (l + 1);
+                       if (ename == NULL)
+                               goto fail;
+                       strncpy (ename, element_start, l);
+                       ename [l] = 0;
+
+                       if (context->state == START_ELEMENT)
+                               if (context->parser.start_element != NULL)
+                                       context->parser.start_element (context, ename,
+                                                                      (const gchar **) names,
+                                                                      (const gchar **) values,
+                                                                      context->user_data, error);
+
+                       if (names != NULL){
+                               g_strfreev (names);
+                               g_strfreev (values);
+                       }
+
+                       if (error != NULL && *error != NULL){
+                               free (ename);
+                               goto fail;
                        }
                        
+                       if (full_stop){
+                               if (context->parser.end_element != NULL &&  context->state == START_ELEMENT){
+                                       context->parser.end_element (context, ename, context->user_data, error);
+                                       if (error != NULL && *error != NULL){
+                                               free (ename);
+                                               goto fail;
+                                       }
+                               }
+                               free (ename);
+                       } else {
+                               context->level = g_slist_prepend (context->level, ename);
+                       }
+                       
+                       context->state = TEXT;
+                       break;
+               } /* case START_ELEMENT */
+
+               case TEXT: {
+                       if (c == '<'){
+                               context->state = FLUSH_TEXT;
+                               break;
+                       }
+                       if (context->parser.text != NULL){
+                               if (context->text == NULL)
+                                       context->text = g_string_new ("");
+                               g_string_append_c (context->text, c);
+                       }
+                       break;
                }
-               }
+
+               case COMMENT:
+                       if (*p != '-')
+                               break;
+                       if (p+2 < end && (p [1] == '-') && (p [2] == '>')){
+                               context->state = TEXT;
+                               p += 2;
+                               break;
+                       }
+                       break;
+                       
+               case FLUSH_TEXT:
+                       if (context->parser.text != NULL){
+                               context->parser.text (context, context->text->str, context->text->len,
+                                                     context->user_data, error);
+                               if (error != NULL && *error != NULL)
+                                       goto fail;
+                       }
+                       
+                       if (c == '/')
+                               context->state = CLOSING_ELEMENT;
+                       else {
+                               p--;
+                               context->state = START_ELEMENT;
+                       }
+                       break;
+
+               case CLOSING_ELEMENT: {
+                       GSList *current = context->level;
+                       char *text;
+
+                       if (context->level == NULL){
+                               set_error ("%s", "Too many closing tags, not enough open tags");
+                               goto fail;
+                       }
+                       
+                       text = current->data;
+                       if (context->parser.end_element != NULL){
+                               context->parser.end_element (context, text, context->user_data, error);
+                               if (error != NULL && *error != NULL){
+                                       free (text);
+                                       goto fail;
+                               }
+                       }
+                       free (text);
+
+                       while (p < end && *p != '>')
+                               p++;
+                       
+                       context->level = context->level->next;
+                       g_slist_free_1 (current);
+                       context->state = TEXT;
+                       break;
+               } /* case CLOSING_ELEMENT */
+                       
+               } /* switch */
        }
+
+
+       return TRUE;
+ fail:
+       if (context->parser.error && error != NULL && *error)
+               context->parser.error (context, *error, context->user_data);
+       
+       destroy_parse_state (context);
+       return FALSE;
 }
 
+gboolean
+g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **error)
+{
+       g_return_val_if_fail (context != NULL, FALSE);
+
+       /*
+        * In our case, we always signal errors during parse, not at the end
+        * see the notes at the top of this file for details on how this
+        * could be moved here
+        */
+       return TRUE;
+}