* gmakrup.c: Minimal XML markup reader.
*
* Unlike the GLib one, this can not be restarted with more text
- * as the Mono use does not require it
+ * as the Mono use does not require it.
+ *
+ * Actually, with further thought, I think that this could be made
+ * to restart very easily. The pos == end condition would mean
+ * "return to caller" and only at end parse this would be a fatal
+ * error.
+ *
+ * Not that it matters to Mono, but it is very simple to change, there
+ * is a tricky situation: there are a few places where we check p+n
+ * in the source, and that would have to change to be progressive, instead
+ * of depending on the string to be complete at that point, so we would
+ * have to introduce extra states to cope with that.
*
* Author:
* Miguel de Icaza (miguel@novell.com)
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
+#include <ctype.h>
#include <glib.h>
-#define set_error(msg...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg); } while (0);
+#define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
typedef enum {
START,
START_ELEMENT,
- TEXT
+ TEXT,
+ FLUSH_TEXT,
+ CLOSING_ELEMENT,
+ COMMENT,
+ SKIP_XML_DECLARATION
} ParseState;
struct _GMarkupParseContext {
gpointer user_data;
GDestroyNotify user_data_dnotify;
ParseState state;
+
+ /* Stores the name of the current element, so we can issue the end_element */
+ GSList *level;
+
+ GString *text;
};
GMarkupParseContext *
void
g_markup_parse_context_free (GMarkupParseContext *context)
{
+ GSList *l;
+
+ g_return_if_fail (context != NULL);
+
+ if (context->user_data_dnotify != NULL)
+ (context->user_data_dnotify) (context->user_data);
+
+ if (context->text != NULL)
+ g_string_free (context->text, TRUE);
+ for (l = context->level; l; l = l->next)
+ g_free (l->data);
+ g_slist_free (context->level);
g_free (context);
}
+static gboolean
+my_isspace (char c)
+{
+ if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v')
+ return TRUE;
+ return FALSE;
+}
+
+static gboolean
+my_isalnum (char c)
+{
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ return TRUE;
+ if (c >= '0' && c <= '9')
+ return TRUE;
+
+ return FALSE;
+}
+
+static gboolean
+my_isalpha (char c)
+{
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ return TRUE;
+ return FALSE;
+}
+
static const char *
skip_space (const char *p, const char *end)
{
- for (; p < end && isspace (*p); p++)
+ for (; p < end && my_isspace (*p); p++)
;
return p;
}
int l;
if (*p != '"'){
- set_error ("Expected the attribute value to start with a quote");
+ set_error ("%s", "Expected the attribute value to start with a quote");
return end;
}
start = ++p;
- for (++p; p < end && *p != '"'; p++)
+ for (; p < end && *p != '"'; p++)
+ ;
if (p == end)
return end;
- l = p - start;
+ l = (int)(p - start);
p++;
- *value = malloc (l + 1);
+ *value = g_malloc (l + 1);
if (*value == NULL)
return end;
strncpy (*value, start, l);
const char *start = p;
int l;
- for (; p < end && isalnum (*p); p++)
+ for (; p < end && my_isalnum (*p); p++)
;
if (p == end)
return end;
- l = p - start;
- *value = malloc (l + 1);
+ l = (int)(p - start);
+ *value = g_malloc (l + 1);
if (*value == NULL)
return end;
strncpy (*value, start, l);
}
static const char *
-parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop)
+parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **error, int *full_stop, int state)
{
int nnames = 0;
*full_stop = 0;
return p;
}
- if (*p == '/' && ((p+1) < end && *p == '>')){
+ if (state == SKIP_XML_DECLARATION && *p == '?' && ((p+1) < end) && *(p+1) == '>'){
+ *full_stop = 0;
+ return p+1;
+ }
+
+ if (*p == '/' && ((p+1) < end && *(p+1) == '>')){
*full_stop = 1;
return p+1;
} else {
p = parse_name (p, end, &name);
if (p == end)
return p;
+
p = skip_space (p, end);
- if (p == end)
+ if (p == end){
+ g_free (name);
return p;
+ }
if (*p != '='){
set_error ("Expected an = after the attribute name `%s'", name);
+ g_free (name);
return end;
}
p++;
p = skip_space (p, end);
- if (p == end)
+ if (p == end){
+ g_free (name);
return end;
+ }
p = parse_value (p, end, &value, error);
- if (p == end)
+ if (p == end){
+ g_free (name);
return p;
+ }
++nnames;
*names = g_realloc (*names, sizeof (char **) * (nnames+1));
*values = g_realloc (*values, sizeof (char **) * (nnames+1));
(*names) [nnames-1] = name;
- (*values) [nnames-1] = name;
+ (*values) [nnames-1] = value;
(*names) [nnames] = NULL;
(*values) [nnames] = NULL;
}
}
}
+static void
+destroy_parse_state (GMarkupParseContext *context)
+{
+ GSList *p;
+
+ for (p = context->level; p != NULL; p = p->next)
+ g_free (p->data);
+
+ g_slist_free (context->level);
+ if (context->text != NULL)
+ g_string_free (context->text, TRUE);
+ context->text = NULL;
+ context->level = NULL;
+}
+
gboolean
g_markup_parse_context_parse (GMarkupParseContext *context,
const gchar *text, gssize text_len,
for (p = text; p < end; p++){
char c = *p;
-
+
switch (context->state){
case START:
- if (c == ' ' || c == '\t' || c == '\f' || c == '\n')
+ if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || (c & 0x80))
continue;
if (c == '<'){
- context->state = START_ELEMENT;
+ if (p+1 < end && p [1] == '?'){
+ context->state = SKIP_XML_DECLARATION;
+ p++;
+ } else
+ context->state = START_ELEMENT;
continue;
}
- set_error ("Expected < to start the document");
-
- return FALSE;
-
+ set_error ("%s", "Expected < to start the document");
+ goto fail;
+ case SKIP_XML_DECLARATION:
case START_ELEMENT: {
const char *element_start = p, *element_end;
- int full_stop = 0;
+ char *ename = NULL;
+ int full_stop = 0, l;
gchar **names = NULL, **values = NULL;
- if (!(isascii (*p) && isalpha (*p)))
- set_error ("Must start with a letter");
+ for (; p < end && my_isspace (*p); p++)
+ ;
+ if (p == end){
+ set_error ("%s", "Unfinished element");
+ goto fail;
+ }
+
+ if (*p == '!' && (p+2 < end) && (p [1] == '-') && (p [2] == '-')){
+ context->state = COMMENT;
+ p += 2;
+ break;
+ }
+
+ if (!my_isalpha (*p)){
+ set_error ("%s", "Expected an element name");
+ goto fail;
+ }
- for (++p; p < end && isalnum (*p); p++)
+ for (++p; p < end && (my_isalnum (*p) || (*p == '.')); p++)
;
if (p == end){
- set_error ("Expected an element");
- return FALSE;
+ set_error ("%s", "Expected an element");
+ goto fail;
}
element_end = p;
- for (; p < end && isspace (*p); p++)
+ for (; p < end && my_isspace (*p); p++)
;
if (p == end){
- set_error ("Unfinished element");
- return FALSE;
+ set_error ("%s", "Unfinished element");
+ goto fail;
}
- p = parse_attributes (p, end, &names, &values, error, &full_stop);
+ p = parse_attributes (p, end, &names, &values, error, &full_stop, context->state);
if (p == end){
- if (*error == NULL)
- set_error ("Unfinished sequence");
-
- return FALSE;
- }
- if (context->parser.start_element != NULL){
- int l = element_end - element_start;
- char *ename = malloc (l + 1);
-
- if (ename == NULL)
- return FALSE;
- strncpy (ename, element_start, l);
- ename [l] = 0;
-
- context->parser.start_element (context, ename,
- (const gchar **) names,
- (const gchar **) values,
- context->user_data, error);
- free (ename);
+ if (names != NULL) {
+ g_strfreev (names);
+ g_strfreev (values);
+ }
+ /* Only set the error if parse_attributes did not */
+ if (error != NULL && *error == NULL)
+ set_error ("%s", "Unfinished sequence");
+ goto fail;
}
+ l = (int)(element_end - element_start);
+ ename = g_malloc (l + 1);
+ if (ename == NULL)
+ goto fail;
+ strncpy (ename, element_start, l);
+ ename [l] = 0;
+
+ if (context->state == START_ELEMENT)
+ if (context->parser.start_element != NULL)
+ context->parser.start_element (context, ename,
+ (const gchar **) names,
+ (const gchar **) values,
+ context->user_data, error);
+
if (names != NULL){
g_strfreev (names);
g_strfreev (values);
}
- if (*error != NULL)
- return FALSE;
- context->state = full_stop ? START : TEXT;
+
+ if (error != NULL && *error != NULL){
+ g_free (ename);
+ goto fail;
+ }
+
+ if (full_stop){
+ if (context->parser.end_element != NULL && context->state == START_ELEMENT){
+ context->parser.end_element (context, ename, context->user_data, error);
+ if (error != NULL && *error != NULL){
+ g_free (ename);
+ goto fail;
+ }
+ }
+ g_free (ename);
+ } else {
+ context->level = g_slist_prepend (context->level, ename);
+ }
+
+ context->state = TEXT;
break;
} /* case START_ELEMENT */
case TEXT: {
+ if (c == '<'){
+ context->state = FLUSH_TEXT;
+ break;
+ }
+ if (context->parser.text != NULL){
+ if (context->text == NULL)
+ context->text = g_string_new ("");
+ g_string_append_c (context->text, c);
+ }
break;
}
+
+ case COMMENT:
+ if (*p != '-')
+ break;
+ if (p+2 < end && (p [1] == '-') && (p [2] == '>')){
+ context->state = TEXT;
+ p += 2;
+ break;
+ }
+ break;
- }
+ case FLUSH_TEXT:
+ if (context->parser.text != NULL && context->text != NULL){
+ context->parser.text (context, context->text->str, context->text->len,
+ context->user_data, error);
+ if (error != NULL && *error != NULL)
+ goto fail;
+ }
+
+ if (c == '/')
+ context->state = CLOSING_ELEMENT;
+ else {
+ p--;
+ context->state = START_ELEMENT;
+ }
+ break;
+
+ case CLOSING_ELEMENT: {
+ GSList *current = context->level;
+ char *text;
+
+ if (context->level == NULL){
+ set_error ("%s", "Too many closing tags, not enough open tags");
+ goto fail;
+ }
+
+ text = current->data;
+ if (context->parser.end_element != NULL){
+ context->parser.end_element (context, text, context->user_data, error);
+ if (error != NULL && *error != NULL){
+ g_free (text);
+ goto fail;
+ }
+ }
+ g_free (text);
+
+ while (p < end && *p != '>')
+ p++;
+
+ context->level = context->level->next;
+ g_slist_free_1 (current);
+ context->state = TEXT;
+ break;
+ } /* case CLOSING_ELEMENT */
+
+ } /* switch */
}
+
return TRUE;
+ fail:
+ if (context->parser.error && error != NULL && *error)
+ context->parser.error (context, *error, context->user_data);
+
+ destroy_parse_state (context);
+ return FALSE;
}
+gboolean
+g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **error)
+{
+ g_return_val_if_fail (context != NULL, FALSE);
+
+ /*
+ * In our case, we always signal errors during parse, not at the end
+ * see the notes at the top of this file for details on how this
+ * could be moved here
+ */
+ return TRUE;
+}