uxmlstream.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /*
  2. * Copyright (C) 2008-2013 by egnite GmbH.
  3. *
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. * 3. Neither the name of the copyright holders nor the names of
  16. * contributors may be used to endorse or promote products derived
  17. * from this software without specific prior written permission.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  22. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  23. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  24. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  25. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  26. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  27. * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
  29. * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30. * SUCH DAMAGE.
  31. *
  32. * For additional information see http://www.ethernut.de/
  33. *
  34. */
  35. /*
  36. * \file pro/uxmlstream.c
  37. * \brief Micro XML stream parser.
  38. *
  39. * \verbatim
  40. * $Id: uxmlstream.c 4917 2013-01-03 17:22:17Z haraldkipp $
  41. * \endverbatim
  42. */
  43. #include <sys/types.h>
  44. #include <sys/heap.h>
  45. #include <stdlib.h>
  46. #include <string.h>
  47. #include <ctype.h>
  48. #include <memdebug.h>
  49. #include <pro/uxml.h>
  50. /*!
  51. * \addtogroup xgUXML
  52. */
  53. /*@{*/
  54. #ifndef MAX_UXMLTAG_SIZE
  55. /*!
  56. * \brief Maximum tag size including brackets.
  57. *
  58. * Larger tags will be silently discarded.
  59. */
  60. #define MAX_UXMLTAG_SIZE 512
  61. #endif
  62. #ifndef MAX_UXMLTKN_SIZE
  63. /*!
  64. * \brief Maximum token size.
  65. *
  66. * Larger tokens will be cut to the specified size. This may be fine for
  67. * attribute values containing lengthy descriptions, but may be disastrous
  68. * for tag or attribute names.
  69. */
  70. #define MAX_UXMLTKN_SIZE 64
  71. #endif
  72. #ifndef MAX_UXMLCONTENT_SIZE
  73. /*!
  74. * \brief Maximum content size.
  75. *
  76. * Larger content will be silently discarded.
  77. */
  78. #define MAX_UXMLCONTENT_SIZE 512
  79. #endif
  80. #ifndef UXML_IGNORE_CONTENT
  81. static int UxmlReadContent(FILE * stream, char *data, size_t size)
  82. {
  83. int rc = -1;
  84. int ch;
  85. int len = 0;
  86. while (1) {
  87. ch = fgetc(stream);
  88. if (ch == EOF || ch == 0) {
  89. break;
  90. }
  91. if (ch == '<') {
  92. rc = 0;
  93. break;
  94. }
  95. if (data && len < size) {
  96. if (len || !isspace(ch)) {
  97. data[len++] = ch;
  98. }
  99. }
  100. }
  101. if (data) {
  102. data[len] = 0;
  103. }
  104. return rc;
  105. }
  106. #endif
  107. static int UxmlReadTag(FILE * stream, char *data, size_t size)
  108. {
  109. int rc = -1;
  110. int ch;
  111. int qc = 0;
  112. #ifdef UXML_IGNORE_CONTENT
  113. int state = 1;
  114. char *dp = NULL;
  115. #else
  116. int state = 4;
  117. char *dp = data;
  118. #endif
  119. while (state) {
  120. ch = fgetc(stream);
  121. if (ch == EOF || ch == 0) {
  122. break;
  123. }
  124. switch (state) {
  125. case 1:
  126. /* Searching first bracket. */
  127. if (ch == '<') {
  128. /* Opening bracket found. Start collecting. */
  129. dp = data;
  130. state = 4;
  131. } else if (ch == '"' || ch == '\'') {
  132. /* Quote found. Skip quoted string. */
  133. qc = ch;
  134. state++;
  135. }
  136. break;
  137. case 2:
  138. /* Skipping quoted string. */
  139. case 5:
  140. /* Collecting quoted string. */
  141. if (ch == qc) {
  142. /* End quote found. */
  143. state--;
  144. }
  145. break;
  146. case 3:
  147. /* Compressing spaces. */
  148. if (isspace(ch)) {
  149. ch = 0;
  150. break;
  151. }
  152. state = 4;
  153. /* Fall through. */
  154. case 4:
  155. /* Collecting data. */
  156. if (ch == '>') {
  157. rc = 0;
  158. state = 0;
  159. } else if (ch == '"' || ch == '\'') {
  160. qc = ch;
  161. state++;
  162. } else if (isspace(ch)) {
  163. ch = ' ';
  164. state = 3;
  165. }
  166. break;
  167. }
  168. if (dp && ch) {
  169. if (size > 1) {
  170. size--;
  171. *dp++ = ch;
  172. } else {
  173. break;
  174. }
  175. }
  176. }
  177. if (dp) {
  178. *dp = 0;
  179. }
  180. return rc;
  181. }
  182. /*!
  183. * \brief Parse XML stream.
  184. *
  185. * This is the main routine of the Micro XML Stream Parser. It reads the
  186. * XML document from a previously opened stream and creates a
  187. * corresponding tree structure.
  188. *
  189. * Note, that this is a minimal and probably incomplete implementation,
  190. * which had been specifically created to parse the SHOUTcast radio
  191. * station list. On the other hand, it offers a practical XML parser
  192. * solution for embedded systems with very low memory resources. Unlike
  193. * most other implementations, it does not require to copy the whole XML
  194. * document into internal memory. Instead, the parser reads and interprets
  195. * individual tags. Furthermore, the caller may specify tag and attribute
  196. * filters to reduce the resulting tree size. Be aware, that because of
  197. * filtering the root of the tree may have siblings.
  198. *
  199. * Normally the parser will return when the end of a file is reached.
  200. * On TCP connections this may be either on connection close or timeout.
  201. * If closing and re-opening connections may create too much overhead
  202. * and timeouts are too slow, an EOF (ASCII 0) may be sent alternatively.
  203. *
  204. * \param stream The stream to read from.
  205. * \param f_tags Optional tag filter, which points to an array of tag
  206. * names to include. All other tags will be discarded.
  207. * This can be used to limit memory consumption of the
  208. * tree structure. Note, that this filtering may result
  209. * in a tree structure, which is different from the
  210. * structure of the original document. Set to NULL to
  211. * disable tag filtering.
  212. *
  213. * \param f_attr Optional attribute filter, which points to an array of
  214. * attribute names to include. All other attributes will be
  215. * discarded. Like the tag filter, it can be used to limit
  216. * memory consumption of the tree structure. Set to NULL
  217. * to disable attribute filtering.
  218. *
  219. * \return Pointer to a newly allocated UXML_NODE tree structure. NULL
  220. * may be returned in case of an error. The caller should use
  221. * UxmlTreeDestroy() to release the memory allocated by the
  222. * tree.
  223. */
  224. UXML_NODE *UxmlParseStream(FILE * stream, char **f_tags, char **f_attr)
  225. {
  226. char *content;
  227. char *tag;
  228. char *tkn;
  229. char *tp;
  230. #ifndef UXML_IGNORE_CONTENT
  231. char *cp = NULL;
  232. #endif
  233. UXML_NODE *root = NULL;
  234. UXML_NODE *node = NULL;
  235. UXML_NODE *nn;
  236. /* Allocate the tag buffers. */
  237. tag = malloc(MAX_UXMLTAG_SIZE);
  238. tkn = malloc(MAX_UXMLTKN_SIZE);
  239. #ifdef UXML_IGNORE_CONTENT
  240. if (tag == NULL || tkn == NULL) {
  241. free(tag);
  242. free(tkn);
  243. return NULL;
  244. }
  245. #else
  246. content = malloc(MAX_UXMLCONTENT_SIZE);
  247. if (tag == NULL || tkn == NULL || content == NULL) {
  248. free(tag);
  249. free(tkn);
  250. free(content);
  251. return NULL;
  252. }
  253. #endif
  254. for (;;) {
  255. if (NutHeapAvailable() < 8192) {
  256. break;
  257. }
  258. #ifndef UXML_IGNORE_CONTENT
  259. /* Read all content up to the next tag. */
  260. if (UxmlReadContent(stream, cp, MAX_UXMLCONTENT_SIZE)) {
  261. /* No more tags or error. */
  262. break;
  263. }
  264. if (cp) {
  265. if (*cp) {
  266. node->xmln_content = strdup(cp);
  267. }
  268. cp = NULL;
  269. }
  270. #endif
  271. /* Read the next tag. */
  272. if (UxmlReadTag(stream, tag, MAX_UXMLTAG_SIZE)) {
  273. /* No more tags or error. */
  274. break;
  275. }
  276. /* Skip declaration. */
  277. if (*tag == '?') {
  278. continue;
  279. }
  280. /* Parse the tag. */
  281. if ((tp = UxmlParseTag(tag, tkn, MAX_UXMLTKN_SIZE)) != NULL) {
  282. if (isalpha((unsigned char)*tkn) && UxmlFilterMatch(tkn, f_tags)) {
  283. /* Save pointer to tp because needed to determine self closing tag */
  284. char *old_tp = tp;
  285. /*
  286. * New node.
  287. */
  288. if ((nn = UxmlNodeCreate(tkn)) == NULL) {
  289. break;
  290. }
  291. if (root == NULL) {
  292. /* Root entry. */
  293. root = nn;
  294. node = nn;
  295. } else if (node == NULL) {
  296. /* No active node. Add root siblings. */
  297. node = UxmlTreeAddSibling(root, nn);
  298. } else {
  299. /* New node is a child of the currently active one. */
  300. node = UxmlTreeAddChild(node, nn);
  301. }
  302. /* Parse the attributes. */
  303. for (;;) {
  304. if ((tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE)) == NULL || *tkn == '>') {
  305. /* End of this tag or error. */
  306. break;
  307. }
  308. if (isalpha((unsigned char)*tkn) && UxmlFilterMatch(tkn, f_attr)) {
  309. char *name = strdup(tkn);
  310. if (name) {
  311. if ((tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE)) == NULL || *tkn != '=') {
  312. free(name);
  313. break;
  314. }
  315. if ((tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE)) == NULL || *tkn == '>') {
  316. free(name);
  317. break;
  318. }
  319. UxmlNodeAddAttrib(node, name, tkn);
  320. free(name);
  321. }
  322. }
  323. }
  324. /* Check if tag is self closing */
  325. if (node && strlen(old_tp) > 1 && old_tp[strlen(old_tp) - 2]=='/') {
  326. node = node->xmln_parent;
  327. } else {
  328. cp = content;
  329. }
  330. } else if (*tkn == '/') {
  331. /*
  332. * End of the active node.
  333. */
  334. tp = UxmlParseTag(tp, tkn, MAX_UXMLTKN_SIZE);
  335. if (tp && node && strcasecmp(node->xmln_name, tkn) == 0) {
  336. node = node->xmln_parent;
  337. }
  338. }
  339. }
  340. }
  341. /* Clean up. */
  342. free(tag);
  343. free(tkn);
  344. return root;
  345. }
  346. /*@}*/