| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 |
- /*
- * Copyright (C) 2008-2013 by egnite GmbH.
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the copyright holders nor the names of
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
- * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * For additional information see http://www.ethernut.de/
- *
- */
- /*
- * \file pro/uxmlstream.c
- * \brief Micro XML stream parser.
- *
- * \verbatim
- * $Id: uxmlstream.c 4917 2013-01-03 17:22:17Z haraldkipp $
- * \endverbatim
- */
- #include <sys/types.h>
- #include <sys/heap.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <memdebug.h>
- #include <pro/uxml.h>
- /*!
- * \addtogroup xgUXML
- */
- /*@{*/
- #ifndef MAX_UXMLTAG_SIZE
- /*!
- * \brief Maximum tag size including brackets.
- *
- * Larger tags will be silently discarded.
- */
- #define MAX_UXMLTAG_SIZE 512
- #endif
- #ifndef MAX_UXMLTKN_SIZE
- /*!
- * \brief Maximum token size.
- *
- * Larger tokens will be cut to the specified size. This may be fine for
- * attribute values containing lengthy descriptions, but may be disastrous
- * for tag or attribute names.
- */
- #define MAX_UXMLTKN_SIZE 64
- #endif
- #ifndef MAX_UXMLCONTENT_SIZE
- /*!
- * \brief Maximum content size.
- *
- * Larger content will be silently discarded.
- */
- #define MAX_UXMLCONTENT_SIZE 512
- #endif
- #ifndef UXML_IGNORE_CONTENT
- static int UxmlReadContent(FILE * stream, char *data, size_t size)
- {
- int rc = -1;
- int ch;
- int len = 0;
- while (1) {
- ch = fgetc(stream);
- if (ch == EOF || ch == 0) {
- break;
- }
- if (ch == '<') {
- rc = 0;
- break;
- }
- if (data && len < size) {
- if (len || !isspace(ch)) {
- data[len++] = ch;
- }
- }
- }
- if (data) {
- data[len] = 0;
- }
- return rc;
- }
- #endif
- static int UxmlReadTag(FILE * stream, char *data, size_t size)
- {
- int rc = -1;
- int ch;
- int qc = 0;
- #ifdef UXML_IGNORE_CONTENT
- int state = 1;
- char *dp = NULL;
- #else
- int state = 4;
- char *dp = data;
- #endif
- while (state) {
- ch = fgetc(stream);
- if (ch == EOF || ch == 0) {
- break;
- }
- switch (state) {
- case 1:
- /* Searching first bracket. */
- if (ch == '<') {
- /* Opening bracket found. Start collecting. */
- dp = data;
- state = 4;
- } else if (ch == '"' || ch == '\'') {
- /* Quote found. Skip quoted string. */
- qc = ch;
- state++;
- }
- break;
- case 2:
- /* Skipping quoted string. */
- case 5:
- /* Collecting quoted string. */
- if (ch == qc) {
- /* End quote found. */
- state--;
- }
- break;
- case 3:
- /* Compressing spaces. */
- if (isspace(ch)) {
- ch = 0;
- break;
- }
- state = 4;
- /* Fall through. */
- case 4:
- /* Collecting data. */
- if (ch == '>') {
- rc = 0;
- state = 0;
- } else if (ch == '"' || ch == '\'') {
- qc = ch;
- state++;
- } else if (isspace(ch)) {
- ch = ' ';
- state = 3;
- }
- break;
- }
- if (dp && ch) {
- if (size > 1) {
- size--;
- *dp++ = ch;
- } else {
- break;
- }
- }
- }
- if (dp) {
- *dp = 0;
- }
- return rc;
- }
- /*!
- * \brief Parse XML stream.
- *
- * This is the main routine of the Micro XML Stream Parser. It reads the
- * XML document from a previously opened stream and creates a
- * corresponding tree structure.
- *
- * Note, that this is a minimal and probably incomplete implementation,
- * which had been specifically created to parse the SHOUTcast radio
- * station list. On the other hand, it offers a practical XML parser
- * solution for embedded systems with very low memory resources. Unlike
- * most other implementations, it does not require to copy the whole XML
- * document into internal memory. Instead, the parser reads and interprets
- * individual tags. Furthermore, the caller may specify tag and attribute
- * filters to reduce the resulting tree size. Be aware, that because of
- * filtering the root of the tree may have siblings.
- *
- * Normally the parser will return when the end of a file is reached.
- * On TCP connections this may be either on connection close or timeout.
- * If closing and re-opening connections may create too much overhead
- * and timeouts are too slow, an EOF (ASCII 0) may be sent alternatively.
- *
- * \param stream The stream to read from.
- * \param f_tags Optional tag filter, which points to an array of tag
- * names to include. All other tags will be discarded.
- * This can be used to limit memory consumption of the
- * tree structure. Note, that this filtering may result
- * in a tree structure, which is different from the
- * structure of the original document. Set to NULL to
- * disable tag filtering.
- *
- * \param f_attr Optional attribute filter, which points to an array of
- * attribute names to include. All other attributes will be
- * discarded. Like the tag filter, it can be used to limit
- * memory consumption of the tree structure. Set to NULL
- * to disable attribute filtering.
- *
- * \return Pointer to a newly allocated UXML_NODE tree structure. NULL
- * may be returned in case of an error. The caller should use
- * UxmlTreeDestroy() to release the memory allocated by the
- * tree.
- */
- UXML_NODE *UxmlParseStream(FILE * stream, char **f_tags, char **f_attr)
- {
- char *content;
- char *tag;
- char *tkn;
- char *tp;
- #ifndef UXML_IGNORE_CONTENT
- char *cp = NULL;
- #endif
- UXML_NODE *root = NULL;
- UXML_NODE *node = NULL;
- UXML_NODE *nn;
- /* Allocate the tag buffers. */
- tag = malloc(MAX_UXMLTAG_SIZE);
- tkn = malloc(MAX_UXMLTKN_SIZE);
- #ifdef UXML_IGNORE_CONTENT
- if (tag == NULL || tkn == NULL) {
- free(tag);
- free(tkn);
- return NULL;
- }
- #else
- content = malloc(MAX_UXMLCONTENT_SIZE);
- if (tag == NULL || tkn == NULL || content == NULL) {
- free(tag);
- free(tkn);
- free(content);
- return NULL;
- }
- #endif
- for (;;) {
- if (NutHeapAvailable() < 8192) {
- break;
- }
- #ifndef UXML_IGNORE_CONTENT
- /* Read all content up to the next tag. */
- if (UxmlReadContent(stream, cp, MAX_UXMLCONTENT_SIZE)) {
- /* No more tags or error. */
- break;
- }
- if (cp) {
- if (*cp) {
- node->xmln_content = strdup(cp);
- }
- cp = NULL;
- }
- #endif
- /* Read the next tag. */
- if (UxmlReadTag(stream, tag, MAX_UXMLTAG_SIZE)) {
- /* No more tags or error. */
- break;
- }
- /* Skip declaration. */
- if (*tag == '?') {
- continue;
- }
- /* Parse the tag. */
- if ((tp = UxmlParseTag(tag, tkn, MAX_UXMLTKN_SIZE)) != NULL) {
- if (isalpha((unsigned char)*tkn) && UxmlFilterMatch(tkn, f_tags)) {
- /* Save pointer to tp because needed to determine self closing tag */
- char *old_tp = tp;
- /*
- * New node.
- */
- if ((nn = UxmlNodeCreate(tkn)) == NULL) {
- break;
- }
- if (root == NULL) {
- /* Root entry. */
- root = nn;
- node = nn;
- } else if (node == NULL) {
- /* No active node. Add root siblings. */
- node = UxmlTreeAddSibling(root, nn);
- } else {
- /* New node is a child of the currently active one. */
- node = UxmlTreeAddChild(node, nn);
- }
- /* Parse the attributes. */
- for (;;) {
- if ((tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE)) == NULL || *tkn == '>') {
- /* End of this tag or error. */
- break;
- }
- if (isalpha((unsigned char)*tkn) && UxmlFilterMatch(tkn, f_attr)) {
- char *name = strdup(tkn);
- if (name) {
- if ((tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE)) == NULL || *tkn != '=') {
- free(name);
- break;
- }
- if ((tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE)) == NULL || *tkn == '>') {
- free(name);
- break;
- }
- UxmlNodeAddAttrib(node, name, tkn);
- free(name);
- }
- }
- }
- /* Check if tag is self closing */
- if (node && strlen(old_tp) > 1 && old_tp[strlen(old_tp) - 2]=='/') {
- node = node->xmln_parent;
- } else {
- cp = content;
- }
- } else if (*tkn == '/') {
- /*
- * End of the active node.
- */
- tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE);
- if (tp && node && strcasecmp(node->xmln_name, tkn) == 0) {
- node = node->xmln_parent;
- }
- }
- }
- }
- /* Clean up. */
- free(tag);
- free(tkn);
- return root;
- }
- /*@}*/
|