Enabled g_mem_set_vtable through the configure option --with-overridable-allocators...
[mono.git] / eglib / src / gmarkup.c
index ed0b79b77ad0be42b1b5c061dbfe0ac67b2a2152..4e6c6641fef542ac4fb758a51d27281baf556340 100644 (file)
@@ -2,7 +2,18 @@
  * gmakrup.c: Minimal XML markup reader.
  *
  * Unlike the GLib one, this can not be restarted with more text
- * as the Mono use does not require it
+ * as the Mono use does not require it.
+ *
+ * Actually, with further thought, I think that this could be made
+ * to restart very easily.  The pos == end condition would mean
+ * "return to caller" and only at end parse this would be a fatal
+ * error.
+ *
+ * Not that it matters to Mono, but it is very simple to change, there
+ * is a tricky situation: there are a few places where we check p+n
+ * in the source, and that would have to change to be progressive, instead
+ * of depending on the string to be complete at that point, so we would
+ * have to introduce extra states to cope with that.
  *
  * Author:
  *   Miguel de Icaza (miguel@novell.com)
  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <stdio.h>
+#include <ctype.h>
 #include <glib.h>
 
-#define set_error(msg...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg); } while (0);
+#define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
 
 typedef enum {
        START,
        START_ELEMENT,
-       TEXT
+       TEXT,
+       FLUSH_TEXT,
+       CLOSING_ELEMENT,
+       COMMENT,
+       SKIP_XML_DECLARATION
 } ParseState;
 
 struct _GMarkupParseContext {
@@ -44,6 +60,11 @@ struct _GMarkupParseContext {
        gpointer       user_data;
        GDestroyNotify user_data_dnotify;
        ParseState     state;
+
+       /* Stores the name of the current element, so we can issue the end_element */
+       GSList         *level;
+
+       GString        *text;
 };
 
 GMarkupParseContext *
@@ -64,13 +85,52 @@ g_markup_parse_context_new (const GMarkupParser *parser,
 void
 g_markup_parse_context_free (GMarkupParseContext *context)
 {
+       GSList *l;
+       
+       g_return_if_fail (context != NULL);
+
+       if (context->user_data_dnotify != NULL)
+               (context->user_data_dnotify) (context->user_data);
+       
+       if (context->text != NULL)
+               g_string_free (context->text, TRUE);
+       for (l = context->level; l; l = l->next)
+               g_free (l->data);
+       g_slist_free (context->level);
        g_free (context);
 }
 
+static gboolean
+my_isspace (char c)
+{
+       if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v')
+               return TRUE;
+       return FALSE;
+}
+
+static gboolean
+my_isalnum (char c)
+{
+       if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+               return TRUE;
+       if (c >= '0' && c <= '9')
+               return TRUE;
+
+       return FALSE;
+}
+
+static gboolean
+my_isalpha (char c)
+{
+       if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+               return TRUE;
+       return FALSE;
+}
+
 static const char *
 skip_space (const char *p, const char *end)
 {
-       for (; p < end && isspace (*p); p++)
+       for (; p < end && my_isspace (*p); p++)
                ;
        return p;
 }
@@ -82,16 +142,17 @@ parse_value (const char *p, const char *end, char **value, GError **error)
        int l;
        
        if (*p != '"'){
-               set_error ("Expected the attribute value to start with a quote");
+               set_error ("%s", "Expected the attribute value to start with a quote");
                return end;
        }
        start = ++p;
-       for (++p; p < end && *p != '"'; p++)
+       for (; p < end && *p != '"'; p++)
+               ;
        if (p == end)
                return end;
-       l = p - start;
+       l = (int)(p - start);
        p++;
-       *value = malloc (l + 1);
+       *value = g_malloc (l + 1);
        if (*value == NULL)
                return end;
        strncpy (*value, start, l);
@@ -105,13 +166,13 @@ parse_name (const char *p, const char *end, char **value)
        const char *start = p;
        int l;
        
-       for (; p < end && isalnum (*p); p++)
+       for (; p < end && my_isalnum (*p); p++)
                ;
        if (p == end)
                return end;
 
-       l = p - start;
-       *value = malloc (l + 1);
+       l = (int)(p - start);
+       *value = g_malloc (l + 1);
        if (*value == NULL)
                return end;
        strncpy (*value, start, l);
@@ -120,7 +181,7 @@ parse_name (const char *p, const char *end, char **value)
 }
 
 static const char *
-parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop)
+parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop, int state)
 {
        int nnames = 0;
 
@@ -133,7 +194,12 @@ parse_attributes (const char *p, const char *end, char ***names, char ***values,
                        *full_stop = 0;
                        return p; 
                }
-               if (*p == '/' && ((p+1) < end && *p == '>')){
+               if (state == SKIP_XML_DECLARATION && *p == '?' && ((p+1) < end) && *(p+1) == '>'){
+                       *full_stop = 0;
+                       return p+1;
+               }
+               
+               if (*p == '/' && ((p+1) < end && *(p+1) == '>')){
                        *full_stop = 1;
                        return p+1;
                } else {
@@ -142,33 +208,56 @@ parse_attributes (const char *p, const char *end, char ***names, char ***values,
                        p = parse_name (p, end, &name);
                        if (p == end)
                                return p;
+
                        p = skip_space (p, end);
-                       if (p == end)
+                       if (p == end){
+                               g_free (name);
                                return p;
+                       }
                        if (*p != '='){
                                set_error ("Expected an = after the attribute name `%s'", name);
+                               g_free (name);
                                return end;
                        }
                        p++;
                        p = skip_space (p, end);
-                       if (p == end)
+                       if (p == end){
+                               g_free (name);
                                return end;
+                       }
 
                        p = parse_value (p, end, &value, error);
-                       if (p == end)
+                       if (p == end){
+                               g_free (name);
                                return p;
+                       }
 
                        ++nnames;
                        *names = g_realloc (*names, sizeof (char **) * (nnames+1));
                        *values = g_realloc (*values, sizeof (char **) * (nnames+1));
                        (*names) [nnames-1] = name;
-                       (*values) [nnames-1] = name;                    
+                       (*values) [nnames-1] = value;
                        (*names) [nnames] = NULL;
                        (*values) [nnames] = NULL;                      
                }
        } 
 }
 
+static void
+destroy_parse_state (GMarkupParseContext *context)
+{
+       GSList *p;
+
+       for (p = context->level; p != NULL; p = p->next)
+               g_free (p->data);
+       
+       g_slist_free (context->level);
+       if (context->text != NULL)
+               g_string_free (context->text, TRUE);
+       context->text = NULL;
+       context->level = NULL;
+}
+
 gboolean
 g_markup_parse_context_parse (GMarkupParseContext *context,
                              const gchar *text, gssize text_len,
@@ -184,81 +273,202 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
        
        for (p = text; p < end; p++){
                char c = *p;
-               
+
                switch (context->state){
                case START:
-                       if (c == ' ' || c == '\t' || c == '\f' || c == '\n')
+                       if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || (c & 0x80))
                                continue;
                        if (c == '<'){
-                               context->state = START_ELEMENT;
+                               if (p+1 < end && p [1] == '?'){
+                                       context->state = SKIP_XML_DECLARATION;
+                                       p++;
+                               } else
+                                       context->state = START_ELEMENT;
                                continue;
                        }
-                       set_error ("Expected < to start the document");
-                       
-                       return FALSE;
-
+                       set_error ("%s", "Expected < to start the document");
+                       goto fail;
 
+               case SKIP_XML_DECLARATION:
                case START_ELEMENT: {
                        const char *element_start = p, *element_end;
-                       int full_stop = 0;
+                       char *ename = NULL;
+                       int full_stop = 0, l;
                        gchar **names = NULL, **values = NULL;
 
-                       if (!(isascii (*p) && isalpha (*p)))
-                               set_error ("Must start with a letter");
+                       for (; p < end && my_isspace (*p); p++)
+                               ;
+                       if (p == end){
+                               set_error ("%s", "Unfinished element");
+                               goto fail;
+                       }
+
+                       if (*p == '!' && (p+2 < end) && (p [1] == '-') && (p [2] == '-')){
+                               context->state = COMMENT;
+                               p += 2;
+                               break;
+                       }
+                       
+                       if (!my_isalpha (*p)){
+                               set_error ("%s", "Expected an element name");
+                               goto fail;
+                       }
                        
-                       for (++p; p < end && isalnum (*p); p++)
+                       for (++p; p < end && (my_isalnum (*p) || (*p == '.')); p++)
                                ;
                        if (p == end){
-                               set_error ("Expected an element");
-                               return FALSE;
+                               set_error ("%s", "Expected an element");
+                               goto fail;
                        }
                        element_end = p;
                        
-                       for (; p < end && isspace (*p); p++)
+                       for (; p < end && my_isspace (*p); p++)
                                ;
                        if (p == end){
-                               set_error ("Unfinished element");
-                               return FALSE;
+                               set_error ("%s", "Unfinished element");
+                               goto fail;
                        }
-                       p = parse_attributes (p, end, &names, &values, error, &full_stop);
+                       p = parse_attributes (p, end, &names, &values, error, &full_stop, context->state);
                        if (p == end){
-                               if (*error == NULL)
-                                       set_error ("Unfinished sequence");
-                               
-                               return FALSE;
-                       }
-                       if (context->parser.start_element != NULL){
-                               int l = element_end - element_start;
-                               char *ename = malloc (l + 1);
-
-                               if (ename == NULL)
-                                       return FALSE;
-                               strncpy (ename, element_start, l);
-                               ename [l] = 0;
-                               
-                               context->parser.start_element (context, ename,
-                                                              (const gchar **) names,
-                                                              (const gchar **) values,
-                                                              context->user_data, error);
-                               free (ename);
+                               if (names != NULL) {
+                                       g_strfreev (names);
+                                       g_strfreev (values);
+                               }
+                               /* Only set the error if parse_attributes did not */
+                               if (error != NULL && *error == NULL)
+                                       set_error ("%s", "Unfinished sequence");
+                               goto fail;
                        }
+                       l = (int)(element_end - element_start);
+                       ename = g_malloc (l + 1);
+                       if (ename == NULL)
+                               goto fail;
+                       strncpy (ename, element_start, l);
+                       ename [l] = 0;
+
+                       if (context->state == START_ELEMENT)
+                               if (context->parser.start_element != NULL)
+                                       context->parser.start_element (context, ename,
+                                                                      (const gchar **) names,
+                                                                      (const gchar **) values,
+                                                                      context->user_data, error);
+
                        if (names != NULL){
                                g_strfreev (names);
                                g_strfreev (values);
                        }
-                       if (*error != NULL)
-                               return FALSE;
-                       context->state = full_stop ? START : TEXT;
+
+                       if (error != NULL && *error != NULL){
+                               g_free (ename);
+                               goto fail;
+                       }
+                       
+                       if (full_stop){
+                               if (context->parser.end_element != NULL &&  context->state == START_ELEMENT){
+                                       context->parser.end_element (context, ename, context->user_data, error);
+                                       if (error != NULL && *error != NULL){
+                                               g_free (ename);
+                                               goto fail;
+                                       }
+                               }
+                               g_free (ename);
+                       } else {
+                               context->level = g_slist_prepend (context->level, ename);
+                       }
+                       
+                       context->state = TEXT;
                        break;
                } /* case START_ELEMENT */
 
                case TEXT: {
+                       if (c == '<'){
+                               context->state = FLUSH_TEXT;
+                               break;
+                       }
+                       if (context->parser.text != NULL){
+                               if (context->text == NULL)
+                                       context->text = g_string_new ("");
+                               g_string_append_c (context->text, c);
+                       }
                        break;
                }
+
+               case COMMENT:
+                       if (*p != '-')
+                               break;
+                       if (p+2 < end && (p [1] == '-') && (p [2] == '>')){
+                               context->state = TEXT;
+                               p += 2;
+                               break;
+                       }
+                       break;
                        
-               }
+               case FLUSH_TEXT:
+                       if (context->parser.text != NULL && context->text != NULL){
+                               context->parser.text (context, context->text->str, context->text->len,
+                                                     context->user_data, error);
+                               if (error != NULL && *error != NULL)
+                                       goto fail;
+                       }
+                       
+                       if (c == '/')
+                               context->state = CLOSING_ELEMENT;
+                       else {
+                               p--;
+                               context->state = START_ELEMENT;
+                       }
+                       break;
+
+               case CLOSING_ELEMENT: {
+                       GSList *current = context->level;
+                       char *text;
+
+                       if (context->level == NULL){
+                               set_error ("%s", "Too many closing tags, not enough open tags");
+                               goto fail;
+                       }
+                       
+                       text = current->data;
+                       if (context->parser.end_element != NULL){
+                               context->parser.end_element (context, text, context->user_data, error);
+                               if (error != NULL && *error != NULL){
+                                       g_free (text);
+                                       goto fail;
+                               }
+                       }
+                       g_free (text);
+
+                       while (p < end && *p != '>')
+                               p++;
+                       
+                       context->level = context->level->next;
+                       g_slist_free_1 (current);
+                       context->state = TEXT;
+                       break;
+               } /* case CLOSING_ELEMENT */
+                       
+               } /* switch */
        }
 
+
        return TRUE;
+ fail:
+       if (context->parser.error && error != NULL && *error)
+               context->parser.error (context, *error, context->user_data);
+       
+       destroy_parse_state (context);
+       return FALSE;
 }
 
+gboolean
+g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **error)
+{
+       g_return_val_if_fail (context != NULL, FALSE);
+
+       /*
+        * In our case, we always signal errors during parse, not at the end
+        * see the notes at the top of this file for details on how this
+        * could be moved here
+        */
+       return TRUE;
+}