Enabled g_mem_set_vtable through the configure option --with-overridable-allocators...
[mono.git] / eglib / src / gmarkup.c
index 47d859aa697f16ef2f7b29b2dd3565d7299ec4f4..4e6c6641fef542ac4fb758a51d27281baf556340 100644 (file)
@@ -2,7 +2,18 @@
  * gmakrup.c: Minimal XML markup reader.
  *
  * Unlike the GLib one, this can not be restarted with more text
- * as the Mono use does not require it
+ * as the Mono use does not require it.
+ *
+ * Actually, with further thought, I think that this could be made
+ * to restart very easily.  The pos == end condition would mean
+ * "return to caller" and only at end parse this would be a fatal
+ * error.
+ *
+ * Not that it matters to Mono, but it is very simple to change, there
+ * is a tricky situation: there are a few places where we check p+n
+ * in the source, and that would have to change to be progressive, instead
+ * of depending on the string to be complete at that point, so we would
+ * have to introduce extra states to cope with that.
  *
  * Author:
  *   Miguel de Icaza (miguel@novell.com)
  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <stdio.h>
+#include <ctype.h>
 #include <glib.h>
 
-#define set_error(msg...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg); } while (0);
+#define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
 
 typedef enum {
        START,
        START_ELEMENT,
        TEXT,
        FLUSH_TEXT,
-       CLOSING_ELEMENT
+       CLOSING_ELEMENT,
+       COMMENT,
+       SKIP_XML_DECLARATION
 } ParseState;
 
 struct _GMarkupParseContext {
@@ -71,13 +85,52 @@ g_markup_parse_context_new (const GMarkupParser *parser,
 void
 g_markup_parse_context_free (GMarkupParseContext *context)
 {
+       GSList *l;
+       
+       g_return_if_fail (context != NULL);
+
+       if (context->user_data_dnotify != NULL)
+               (context->user_data_dnotify) (context->user_data);
+       
+       if (context->text != NULL)
+               g_string_free (context->text, TRUE);
+       for (l = context->level; l; l = l->next)
+               g_free (l->data);
+       g_slist_free (context->level);
        g_free (context);
 }
 
+static gboolean
+my_isspace (char c)
+{
+       if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v')
+               return TRUE;
+       return FALSE;
+}
+
+static gboolean
+my_isalnum (char c)
+{
+       if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+               return TRUE;
+       if (c >= '0' && c <= '9')
+               return TRUE;
+
+       return FALSE;
+}
+
+static gboolean
+my_isalpha (char c)
+{
+       if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+               return TRUE;
+       return FALSE;
+}
+
 static const char *
 skip_space (const char *p, const char *end)
 {
-       for (; p < end && isspace (*p); p++)
+       for (; p < end && my_isspace (*p); p++)
                ;
        return p;
 }
@@ -89,16 +142,17 @@ parse_value (const char *p, const char *end, char **value, GError **error)
        int l;
        
        if (*p != '"'){
-               set_error ("Expected the attribute value to start with a quote");
+               set_error ("%s", "Expected the attribute value to start with a quote");
                return end;
        }
        start = ++p;
-       for (++p; p < end && *p != '"'; p++)
+       for (; p < end && *p != '"'; p++)
+               ;
        if (p == end)
                return end;
-       l = p - start;
+       l = (int)(p - start);
        p++;
-       *value = malloc (l + 1);
+       *value = g_malloc (l + 1);
        if (*value == NULL)
                return end;
        strncpy (*value, start, l);
@@ -112,13 +166,13 @@ parse_name (const char *p, const char *end, char **value)
        const char *start = p;
        int l;
        
-       for (; p < end && isalnum (*p); p++)
+       for (; p < end && my_isalnum (*p); p++)
                ;
        if (p == end)
                return end;
 
-       l = p - start;
-       *value = malloc (l + 1);
+       l = (int)(p - start);
+       *value = g_malloc (l + 1);
        if (*value == NULL)
                return end;
        strncpy (*value, start, l);
@@ -127,7 +181,7 @@ parse_name (const char *p, const char *end, char **value)
 }
 
 static const char *
-parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop)
+parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop, int state)
 {
        int nnames = 0;
 
@@ -140,7 +194,12 @@ parse_attributes (const char *p, const char *end, char ***names, char ***values,
                        *full_stop = 0;
                        return p; 
                }
-               if (*p == '/' && ((p+1) < end && *p == '>')){
+               if (state == SKIP_XML_DECLARATION && *p == '?' && ((p+1) < end) && *(p+1) == '>'){
+                       *full_stop = 0;
+                       return p+1;
+               }
+               
+               if (*p == '/' && ((p+1) < end && *(p+1) == '>')){
                        *full_stop = 1;
                        return p+1;
                } else {
@@ -149,27 +208,35 @@ parse_attributes (const char *p, const char *end, char ***names, char ***values,
                        p = parse_name (p, end, &name);
                        if (p == end)
                                return p;
+
                        p = skip_space (p, end);
-                       if (p == end)
+                       if (p == end){
+                               g_free (name);
                                return p;
+                       }
                        if (*p != '='){
                                set_error ("Expected an = after the attribute name `%s'", name);
+                               g_free (name);
                                return end;
                        }
                        p++;
                        p = skip_space (p, end);
-                       if (p == end)
+                       if (p == end){
+                               g_free (name);
                                return end;
+                       }
 
                        p = parse_value (p, end, &value, error);
-                       if (p == end)
+                       if (p == end){
+                               g_free (name);
                                return p;
+                       }
 
                        ++nnames;
                        *names = g_realloc (*names, sizeof (char **) * (nnames+1));
                        *values = g_realloc (*values, sizeof (char **) * (nnames+1));
                        (*names) [nnames-1] = name;
-                       (*values) [nnames-1] = name;                    
+                       (*values) [nnames-1] = value;
                        (*names) [nnames] = NULL;
                        (*values) [nnames] = NULL;                      
                }
@@ -206,89 +273,108 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
        
        for (p = text; p < end; p++){
                char c = *p;
-               
+
                switch (context->state){
                case START:
-                       if (c == ' ' || c == '\t' || c == '\f' || c == '\n')
+                       if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || (c & 0x80))
                                continue;
                        if (c == '<'){
-                               context->state = START_ELEMENT;
+                               if (p+1 < end && p [1] == '?'){
+                                       context->state = SKIP_XML_DECLARATION;
+                                       p++;
+                               } else
+                                       context->state = START_ELEMENT;
                                continue;
                        }
-                       set_error ("Expected < to start the document");
+                       set_error ("%s", "Expected < to start the document");
                        goto fail;
 
-
+               case SKIP_XML_DECLARATION:
                case START_ELEMENT: {
                        const char *element_start = p, *element_end;
                        char *ename = NULL;
                        int full_stop = 0, l;
                        gchar **names = NULL, **values = NULL;
 
-                       for (; p < end && isspace (*p); p++)
+                       for (; p < end && my_isspace (*p); p++)
                                ;
                        if (p == end){
-                               set_error ("Unfinished element");
+                               set_error ("%s", "Unfinished element");
                                goto fail;
                        }
-                       if (!(isascii (*p) && isalpha (*p))){
-                               set_error ("Expected an element name");
+
+                       if (*p == '!' && (p+2 < end) && (p [1] == '-') && (p [2] == '-')){
+                               context->state = COMMENT;
+                               p += 2;
+                               break;
+                       }
+                       
+                       if (!my_isalpha (*p)){
+                               set_error ("%s", "Expected an element name");
                                goto fail;
                        }
                        
-                       for (++p; p < end && isalnum (*p); p++)
+                       for (++p; p < end && (my_isalnum (*p) || (*p == '.')); p++)
                                ;
                        if (p == end){
-                               set_error ("Expected an element");
+                               set_error ("%s", "Expected an element");
                                goto fail;
                        }
                        element_end = p;
                        
-                       for (; p < end && isspace (*p); p++)
+                       for (; p < end && my_isspace (*p); p++)
                                ;
                        if (p == end){
-                               set_error ("Unfinished element");
+                               set_error ("%s", "Unfinished element");
                                goto fail;
                        }
-                       p = parse_attributes (p, end, &names, &values, error, &full_stop);
+                       p = parse_attributes (p, end, &names, &values, error, &full_stop, context->state);
                        if (p == end){
                                if (names != NULL) {
                                        g_strfreev (names);
                                        g_strfreev (values);
                                }
-                               
-                               set_error ("Unfinished sequence");
+                               /* Only set the error if parse_attributes did not */
+                               if (error != NULL && *error == NULL)
+                                       set_error ("%s", "Unfinished sequence");
                                goto fail;
                        }
-                       l = element_end - element_start;
-                       ename = malloc (l + 1);
+                       l = (int)(element_end - element_start);
+                       ename = g_malloc (l + 1);
                        if (ename == NULL)
                                goto fail;
                        strncpy (ename, element_start, l);
                        ename [l] = 0;
-                       
-                       if (context->parser.start_element != NULL)
-                               context->parser.start_element (context, ename,
-                                                              (const gchar **) names,
-                                                              (const gchar **) values,
-                                                              context->user_data, error);
+
+                       if (context->state == START_ELEMENT)
+                               if (context->parser.start_element != NULL)
+                                       context->parser.start_element (context, ename,
+                                                                      (const gchar **) names,
+                                                                      (const gchar **) values,
+                                                                      context->user_data, error);
 
                        if (names != NULL){
                                g_strfreev (names);
                                g_strfreev (values);
                        }
 
-                       if (*error != NULL)
+                       if (error != NULL && *error != NULL){
+                               g_free (ename);
                                goto fail;
+                       }
                        
                        if (full_stop){
-                               if (context->parser.end_element != NULL){
+                               if (context->parser.end_element != NULL &&  context->state == START_ELEMENT){
                                        context->parser.end_element (context, ename, context->user_data, error);
-                                       if (*error != NULL)
+                                       if (error != NULL && *error != NULL){
+                                               g_free (ename);
                                                goto fail;
+                                       }
                                }
-                       } else
+                               g_free (ename);
+                       } else {
                                context->level = g_slist_prepend (context->level, ename);
+                       }
                        
                        context->state = TEXT;
                        break;
@@ -307,11 +393,21 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
                        break;
                }
 
+               case COMMENT:
+                       if (*p != '-')
+                               break;
+                       if (p+2 < end && (p [1] == '-') && (p [2] == '>')){
+                               context->state = TEXT;
+                               p += 2;
+                               break;
+                       }
+                       break;
+                       
                case FLUSH_TEXT:
-                       if (context->parser.text != NULL){
+                       if (context->parser.text != NULL && context->text != NULL){
                                context->parser.text (context, context->text->str, context->text->len,
                                                      context->user_data, error);
-                               if (*error != NULL)
+                               if (error != NULL && *error != NULL)
                                        goto fail;
                        }
                        
@@ -325,21 +421,29 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
 
                case CLOSING_ELEMENT: {
                        GSList *current = context->level;
+                       char *text;
 
                        if (context->level == NULL){
-                               set_error ("Too many closing tags, not enough open tags");
+                               set_error ("%s", "Too many closing tags, not enough open tags");
                                goto fail;
                        }
                        
+                       text = current->data;
                        if (context->parser.end_element != NULL){
-                               char *text = current->data;
-                               
                                context->parser.end_element (context, text, context->user_data, error);
-                               if (*error != NULL)
+                               if (error != NULL && *error != NULL){
+                                       g_free (text);
                                        goto fail;
+                               }
                        }
+                       g_free (text);
+
+                       while (p < end && *p != '>')
+                               p++;
+                       
                        context->level = context->level->next;
-                       g_slist_free (current);
+                       g_slist_free_1 (current);
+                       context->state = TEXT;
                        break;
                } /* case CLOSING_ELEMENT */
                        
@@ -349,11 +453,22 @@ g_markup_parse_context_parse (GMarkupParseContext *context,
 
        return TRUE;
  fail:
-       if (context->parser.error)
+       if (context->parser.error && error != NULL && *error)
                context->parser.error (context, *error, context->user_data);
        
        destroy_parse_state (context);
        return FALSE;
 }
 
+gboolean
+g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **error)
+{
+       g_return_val_if_fail (context != NULL, FALSE);
 
+       /*
+        * In our case, we always signal errors during parse, not at the end
+        * see the notes at the top of this file for details on how this
+        * could be moved here
+        */
+       return TRUE;
+}