From aaae19eb8462c784daab0cf9afddc934fdbd1b75 Mon Sep 17 00:00:00 2001 From: defanor Date: Fri, 8 Sep 2023 17:23:54 +0300 Subject: Support libexpat as an alternative XML parser --- src/Makefile.am | 18 +-- src/rexmpp.c | 135 +++++++++----------- src/rexmpp.h | 8 +- src/rexmpp_jingle.c | 16 +-- src/rexmpp_roster.c | 2 - src/rexmpp_tls.c | 11 +- src/rexmpp_xml.c | 276 ++++++++++++++++++----------------------- src/rexmpp_xml.h | 56 ++++++--- src/rexmpp_xml.rs | 26 ++-- src/rexmpp_xml_parser.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++++ src/rexmpp_xml_parser.h | 103 ++++++++++++++++ 11 files changed, 676 insertions(+), 293 deletions(-) create mode 100644 src/rexmpp_xml_parser.c create mode 100644 src/rexmpp_xml_parser.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 5a5cbc3..eea4f8f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,11 +1,4 @@ -AM_CFLAGS = -Werror -Wall -Wextra -pedantic -std=gnu99 \ - -Wno-pointer-sign - -# -Wno-pointer-sign is used to suppress libxml2-related warnings. -# Since we only care about UTF-8, and in almost all cases just its -# ASCII subset (comparing or setting fixed namespaces, element names, -# etc), it shouldn't matter. Later it would be nice to abstract XML -# manipulations anyway, to allow libexpat as an alternative. +AM_CFLAGS = -Werror -Wall -Wextra -pedantic -std=gnu99 lib_LTLIBRARIES = librexmpp.la @@ -22,18 +15,19 @@ librexmpp_la_SOURCES = rexmpp_roster.h rexmpp_roster.c \ rexmpp_base64.h rexmpp_base64.c \ rexmpp_sasl.h rexmpp_sasl.c \ rexmpp_xml.h rexmpp_xml.c \ - rexmpp_utf8.h + rexmpp_utf8.h \ + rexmpp_xml_parser.h rexmpp_xml_parser.c include_HEADERS = config.h rexmpp_roster.h rexmpp_tcp.h rexmpp_socks.h rexmpp.h \ rexmpp_dns.h rexmpp_tls.h rexmpp_jid.h rexmpp_openpgp.h rexmpp_console.h \ rexmpp_pubsub.h rexmpp_http_upload.h rexmpp_jingle.h rexmpp_base64.h \ - rexmpp_sasl.h rexmpp_xml.h -librexmpp_la_CFLAGS = $(AM_CFLAGS) $(LIBXML_CFLAGS) \ + rexmpp_sasl.h rexmpp_xml.h rexmpp_utf8.h rexmpp_xml_parser.h +librexmpp_la_CFLAGS = $(AM_CFLAGS) $(LIBXML2_CFLAGS) $(EXPAT_CFLAGS) \ $(GNUTLS_CFLAGS) $(LIBDANE_CFLAGS) $(OPENSSL_CFLAGS) \ $(GSASL_CFLAGS) $(UNBOUND_CFLAGS) $(CARES_CFLAGS) $(GPGME_CFLAGS) \ $(ICU_I18N_CFLAGS) $(LIBGCRYPT_CFLAGS) $(CURL_CFLAGS) \ $(NICE_CFLAGS) $(GLIB_CFLAGS) $(SRTP_CFLAGS) -librexmpp_la_LIBADD = $(LIBXML_LIBS) \ +librexmpp_la_LIBADD = $(LIBXML2_LIBS) $(EXPAT_LIBS) \ $(GNUTLS_LIBS) $(LIBDANE_LIBS) $(OPENSSL_LIBS) \ $(GSASL_LIBS) $(UNBOUND_LIBS) $(CARES_LIBS) $(GPGME_LIBS) $(ICU_I18N_LIBS) \ $(LIBGCRYPT_LIBS) $(CURL_LIBS) $(NICE_LIBS) $(GLIB_LIBS) $(SRTP_LIBS) diff --git a/src/rexmpp.c b/src/rexmpp.c index bdbfb94..886f091 100644 --- a/src/rexmpp.c +++ b/src/rexmpp.c @@ -18,8 +18,6 @@ #include "config.h" #include -#include -#include #include #ifdef HAVE_GPGME #include @@ -89,21 +87,13 @@ const char *rexmpp_strerror (rexmpp_err_t error) { } void rexmpp_sax_start_elem_ns (rexmpp_t *s, - const char *localname, - const char *prefix, - const char *URI, - int nb_namespaces, - const char **namespaces, - int nb_attributes, - int nb_defaulted, - const char **attributes); - -void rexmpp_sax_end_elem_ns(rexmpp_t *s, - const char *localname, - const char *prefix, - const char *URI); - -void rexmpp_sax_characters (rexmpp_t *s, const char * ch, int len); + const char *name, + const char *namespace, + rexmpp_xml_attr_t *attributes); + +void rexmpp_sax_end_elem_ns(rexmpp_t *s); + +void rexmpp_sax_characters (rexmpp_t *s, const char * ch, size_t len); void rexmpp_log (rexmpp_t *s, int priority, const char *format, ...) { @@ -479,17 +469,17 @@ rexmpp_xml_t *rexmpp_disco_info (rexmpp_t *s) { return s->disco_info; } +struct rexmpp_xml_parser_handlers sax = { + (rexmpp_xml_parser_element_start)rexmpp_sax_start_elem_ns, + (rexmpp_xml_parser_element_end)rexmpp_sax_end_elem_ns, + (rexmpp_xml_parser_characters)rexmpp_sax_characters +}; + rexmpp_err_t rexmpp_init (rexmpp_t *s, const char *jid, log_function_t log_func) { int err; - xmlSAXHandler sax = { - .initialized = XML_SAX2_MAGIC, - .characters = (charactersSAXFunc)rexmpp_sax_characters, - .startElementNs = (startElementNsSAX2Func)rexmpp_sax_start_elem_ns, - .endElementNs = (endElementNsSAX2Func)rexmpp_sax_end_elem_ns, - }; s->tcp_state = REXMPP_TCP_NONE; s->resolver_state = REXMPP_RESOLVER_NONE; @@ -625,7 +615,7 @@ rexmpp_err_t rexmpp_init (rexmpp_t *s, gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0); } - s->xml_parser = xmlCreatePushParserCtxt(&sax, s, "", 0, NULL); + s->xml_parser = rexmpp_xml_parser_new(&sax, s); if (s->xml_parser == NULL) { rexmpp_log(s, LOG_CRIT, "Failed to create an XML parser context."); @@ -633,13 +623,13 @@ rexmpp_err_t rexmpp_init (rexmpp_t *s, } if (rexmpp_dns_ctx_init(s)) { - xmlFreeParserCtxt(s->xml_parser); + rexmpp_xml_parser_free(s->xml_parser); return REXMPP_E_DNS; } if (rexmpp_tls_init(s)) { rexmpp_dns_ctx_deinit(s); - xmlFreeParserCtxt(s->xml_parser); + rexmpp_xml_parser_free(s->xml_parser); return REXMPP_E_TLS; } @@ -647,7 +637,7 @@ rexmpp_err_t rexmpp_init (rexmpp_t *s, if (err) { rexmpp_tls_deinit(s); rexmpp_dns_ctx_deinit(s); - xmlFreeParserCtxt(s->xml_parser); + rexmpp_xml_parser_free(s->xml_parser); return REXMPP_E_SASL; } @@ -655,7 +645,7 @@ rexmpp_err_t rexmpp_init (rexmpp_t *s, rexmpp_sasl_ctx_deinit(s); rexmpp_tls_deinit(s); rexmpp_dns_ctx_deinit(s); - xmlFreeParserCtxt(s->xml_parser); + rexmpp_xml_parser_free(s->xml_parser); } #ifdef HAVE_GPGME @@ -668,7 +658,7 @@ rexmpp_err_t rexmpp_init (rexmpp_t *s, rexmpp_tls_deinit(s); rexmpp_dns_ctx_deinit(s); rexmpp_jingle_stop(s); - xmlFreeParserCtxt(s->xml_parser); + rexmpp_xml_parser_free(s->xml_parser); return REXMPP_E_PGP; } #else @@ -774,7 +764,7 @@ void rexmpp_done (rexmpp_t *s) { rexmpp_sasl_ctx_deinit(s); rexmpp_tls_deinit(s); rexmpp_dns_ctx_deinit(s); - xmlFreeParserCtxt(s->xml_parser); + rexmpp_xml_parser_free(s->xml_parser); if (s->jingle_rtp_description != NULL) { rexmpp_xml_free(s->jingle_rtp_description); s->jingle_rtp_description = NULL; @@ -941,7 +931,7 @@ rexmpp_err_t rexmpp_send_continue (rexmpp_t *s) s->send_buffer = NULL; if (s->send_queue != NULL) { rexmpp_xml_t *node = s->send_queue; - unsigned char *buf = rexmpp_xml_serialize(node, 0); + char *buf = rexmpp_xml_serialize(node, 0); ret = rexmpp_send_start(s, buf, strlen(buf)); free(buf); if (ret != REXMPP_SUCCESS) { @@ -1034,7 +1024,7 @@ rexmpp_err_t rexmpp_send (rexmpp_t *s, rexmpp_xml_t *node) } if (s->send_buffer == NULL) { - unsigned char *buf = rexmpp_xml_serialize(node, 0); + char *buf = rexmpp_xml_serialize(node, 0); ret = rexmpp_send_raw(s, buf, strlen(buf)); free(buf); rexmpp_xml_free(node); @@ -1207,7 +1197,8 @@ rexmpp_err_t rexmpp_process_element (rexmpp_t *s, rexmpp_xml_t *elem); rexmpp_err_t rexmpp_recv (rexmpp_t *s) { char chunk_raw[4096], *chunk; - ssize_t chunk_raw_len, chunk_len; + size_t chunk_len; + ssize_t chunk_raw_len; int sasl_err; rexmpp_tls_err_t recv_err; rexmpp_err_t err = REXMPP_SUCCESS; @@ -1234,7 +1225,7 @@ rexmpp_err_t rexmpp_recv (rexmpp_t *s) { chunk = chunk_raw; chunk_len = chunk_raw_len; } - xmlParseChunk(s->xml_parser, chunk, chunk_len, 0); + rexmpp_xml_parser_feed(s->xml_parser, chunk, chunk_len); if (chunk != chunk_raw && chunk != NULL) { free(chunk); } @@ -1399,7 +1390,7 @@ rexmpp_process_tls_conn_err (rexmpp_t *s, return rexmpp_stream_open(s); } else { /* A STARTTLS connection, restart the stream. */ - xmlCtxtResetPush(s->xml_parser, "", 0, "", "utf-8"); + s->xml_parser = rexmpp_xml_parser_reset(s->xml_parser); return rexmpp_stream_open(s); } } else { @@ -1414,7 +1405,7 @@ rexmpp_err_t rexmpp_connected_to_server (rexmpp_t *s) { "Connected to the server, the used address record was %s", s->server_socket_dns_secure ? "secure" : "not secure"); s->reconnect_number = 0; - xmlCtxtResetPush(s->xml_parser, "", 0, "", "utf-8"); + s->xml_parser = rexmpp_xml_parser_reset(s->xml_parser); if (s->tls_state == REXMPP_TLS_AWAITING_DIRECT) { return rexmpp_process_tls_conn_err(s, rexmpp_tls_connect(s)); } else { @@ -2135,7 +2126,7 @@ rexmpp_err_t rexmpp_process_element (rexmpp_t *s, rexmpp_xml_t *elem) { return REXMPP_E_SASL; } s->sasl_state = REXMPP_SASL_ACTIVE; - xmlCtxtResetPush(s->xml_parser, "", 0, "", "utf-8"); + s->xml_parser = rexmpp_xml_parser_reset(s->xml_parser); return rexmpp_stream_open(s); } else if (rexmpp_xml_match(elem, "urn:ietf:params:xml:ns:xmpp-sasl", "failure")) { @@ -2221,37 +2212,37 @@ rexmpp_err_t rexmpp_process_element (rexmpp_t *s, rexmpp_xml_t *elem) { } -void rexmpp_sax_characters (rexmpp_t *s, const char *ch, int len) +/* These SAX handlers are similar to those in rexmpp_xml.c, might be + nice to reuse them. */ +void rexmpp_sax_characters (rexmpp_t *s, const char *ch, size_t len) { if (s->current_element != NULL) { - rexmpp_xml_t *text_node = rexmpp_xml_new_text_len(ch, len); - if (text_node != NULL) { - text_node->next = s->current_element->alt.elem.children; - s->current_element->alt.elem.children = text_node; + rexmpp_xml_t *last_node = s->current_element->alt.elem.children; + if (last_node != NULL && last_node->type == REXMPP_XML_TEXT) { + /* The last child is textual as well, just extend it */ + size_t last_len = strlen(last_node->alt.text); + last_node->alt.text = realloc(last_node->alt.text, last_len + len + 1); + strncpy(last_node->alt.text + last_len, ch, len); + last_node->alt.text[last_len + len] = '\0'; + } else { + rexmpp_xml_t *text_node = rexmpp_xml_new_text_len(ch, len); + if (text_node != NULL) { + text_node->next = s->current_element->alt.elem.children; + s->current_element->alt.elem.children = text_node; + } } } } void rexmpp_sax_start_elem_ns (rexmpp_t *s, - const char *localname, - const char *prefix, - const char *URI, - int nb_namespaces, - const char **namespaces, - int nb_attributes, - int nb_defaulted, - const char **attributes) + const char *name, + const char *namespace, + rexmpp_xml_attr_t *attributes) { - /* Not checking namespaces beyond URI. */ - (void)nb_namespaces; - (void)namespaces; - (void)nb_defaulted; - (void)prefix; - - int i; if (s->stream_state == REXMPP_STREAM_OPENING && - strcmp(localname, "stream") == 0 && - strcmp(URI, "http://etherx.jabber.org/streams") == 0) { + s->current_element == NULL && + strcmp(name, "stream") == 0 && + strcmp(namespace, "http://etherx.jabber.org/streams") == 0) { rexmpp_log(s, LOG_DEBUG, "stream start"); s->stream_state = REXMPP_STREAM_NEGOTIATION; return; @@ -2259,37 +2250,23 @@ void rexmpp_sax_start_elem_ns (rexmpp_t *s, if (s->stream_state != REXMPP_STREAM_OPENING) { if (s->current_element == NULL) { - s->current_element = rexmpp_xml_new_elem(localname, URI); + s->current_element = rexmpp_xml_new_elem(name, namespace); s->current_element_root = s->current_element; } else { - rexmpp_xml_t *node = rexmpp_xml_new_elem(localname, URI); + rexmpp_xml_t *node = rexmpp_xml_new_elem(name, namespace); node->next = s->current_element->alt.elem.children; s->current_element->alt.elem.children = node; s->current_element = node; } - for (i = 0; i < nb_attributes; i++) { - size_t attr_len = attributes[i * 5 + 4] - attributes[i * 5 + 3]; - char *attr_val = malloc(attr_len + 1); - attr_val[attr_len] = '\0'; - strncpy(attr_val, attributes[i * 5 + 3], attr_len); - rexmpp_xml_add_attr_ns(s->current_element, - attributes[i * 5], - NULL, attr_val); - free(attr_val); - } + s->current_element->alt.elem.attributes = attributes; } } -void rexmpp_sax_end_elem_ns (rexmpp_t *s, - const char *localname, - const char *prefix, - const char *URI) +void rexmpp_sax_end_elem_ns (rexmpp_t *s) { - (void)prefix; /* Not interested in prefix here. */ if ((s->stream_state == REXMPP_STREAM_CLOSING || s->stream_state == REXMPP_STREAM_ERROR) && - strcmp(localname, "stream") == 0 && - strcmp(URI, "http://etherx.jabber.org/streams") == 0) { + s->current_element == NULL) { rexmpp_log(s, LOG_DEBUG, "stream end"); if (s->sasl_state == REXMPP_SASL_ACTIVE) { rexmpp_sasl_ctx_cleanup(s); @@ -2318,7 +2295,7 @@ void rexmpp_sax_end_elem_ns (rexmpp_t *s, } else { /* Done parsing this element; reverse all the lists of children and queue it. */ - s->current_element = rexmpp_xml_reverse_all(s->current_element); + rexmpp_xml_reverse_children(s->current_element); if (s->input_queue == NULL) { s->input_queue = s->current_element; s->input_queue_last = s->current_element; diff --git a/src/rexmpp.h b/src/rexmpp.h index 4cde101..a3df757 100644 --- a/src/rexmpp.h +++ b/src/rexmpp.h @@ -13,7 +13,6 @@ #include "config.h" -#include #ifdef HAVE_GPGME #include #endif @@ -188,6 +187,7 @@ enum tls_pol { typedef enum rexmpp_err rexmpp_err_t; #include "rexmpp_xml.h" +#include "rexmpp_xml_parser.h" #include "rexmpp_tcp.h" #include "rexmpp_socks.h" #include "rexmpp_dns.h" @@ -362,8 +362,8 @@ struct rexmpp NULL if there is anything in the send queue). Not appending data to it, see send_queue for queuing. */ char *send_buffer; - ssize_t send_buffer_len; - ssize_t send_buffer_sent; + size_t send_buffer_len; + size_t send_buffer_sent; /* A queue of XML elements to send. */ rexmpp_xml_t *send_queue; @@ -374,7 +374,7 @@ struct rexmpp /* XML parser context, and current element pointer for building XML nodes with a SAX2 parser interface. */ - xmlParserCtxtPtr xml_parser; + rexmpp_xml_parser_ctx_t xml_parser; /* The children are stored in reverse order during building. */ rexmpp_xml_t *current_element_root; diff --git a/src/rexmpp_jingle.c b/src/rexmpp_jingle.c index c91c35d..0351f1d 100644 --- a/src/rexmpp_jingle.c +++ b/src/rexmpp_jingle.c @@ -454,7 +454,7 @@ rexmpp_jingle_send_file (rexmpp_t *s, char *hash_base64 = NULL; size_t hash_base64_len = 0; - rexmpp_base64_to(gcry_md_read(hd, GCRY_MD_SHA256), + rexmpp_base64_to((char*)gcry_md_read(hd, GCRY_MD_SHA256), gcry_md_get_algo_dlen(GCRY_MD_SHA256), &hash_base64, &hash_base64_len); @@ -467,7 +467,7 @@ rexmpp_jingle_send_file (rexmpp_t *s, hash_base64 = NULL; hash_base64_len = 0; - rexmpp_base64_to(gcry_md_read(hd, GCRY_MD_SHA3_256), + rexmpp_base64_to((char*)gcry_md_read(hd, GCRY_MD_SHA3_256), gcry_md_get_algo_dlen(GCRY_MD_SHA3_256), &hash_base64, &hash_base64_len); @@ -740,7 +740,7 @@ rexmpp_jingle_candidate_gathering_done_cb (NiceAgent *agent, rexmpp_jingle_session_t *sess = data; gnutls_x509_crt_t *cert_list; - int cert_list_size = 0; + unsigned int cert_list_size = 0; /* We'll need a certificate a bit later, but checking it before allocating other things. */ int err = gnutls_certificate_get_x509_crt(sess->s->tls->dtls_cred, 0, @@ -755,7 +755,7 @@ rexmpp_jingle_candidate_gathering_done_cb (NiceAgent *agent, char fp[32], fp_str[97]; size_t fp_size = 32; gnutls_x509_crt_get_fingerprint(cert_list[0], GNUTLS_DIG_SHA256, fp, &fp_size); - int i; + unsigned int i; for (i = 0; i < 32; i++) { snprintf(fp_str + i * 3, 4, "%02X:", fp[i] & 0xFF); } @@ -1166,15 +1166,15 @@ rexmpp_jingle_ice_recv_cb (NiceAgent *agent, guint stream_id, guint component_id } uint16_t port_out = comp->udp_port_out; if (component_id == 1) { - err = srtp_unprotect(srtp_in, buf, &len); + err = srtp_unprotect(srtp_in, buf, (int*)&len); if (err == srtp_err_status_auth_fail && comp->session->rtcp_mux) { /* Try to demultiplex. Maybe there's a better way to do it, but this will do for now. */ - err = srtp_unprotect_rtcp(srtp_in, buf, &len); + err = srtp_unprotect_rtcp(srtp_in, buf, (int*)&len); port_out = comp->session->component[0].udp_port_out; } } else { - err = srtp_unprotect_rtcp(srtp_in, buf, &len); + err = srtp_unprotect_rtcp(srtp_in, buf, (int*)&len); } if (err) { rexmpp_log(comp->s, LOG_ERR, "SRT(C)P unprotect error %d on component %d", @@ -1818,7 +1818,7 @@ rexmpp_jingle_run (rexmpp_t *s, char key_mat[4096]; int err; gnutls_datum_t client_key, client_salt, server_key, server_salt; - char client_sess_key[SRTP_AES_ICM_128_KEY_LEN_WSALT * 2], + unsigned char client_sess_key[SRTP_AES_ICM_128_KEY_LEN_WSALT * 2], server_sess_key[SRTP_AES_ICM_128_KEY_LEN_WSALT * 2]; for (sess = s->jingle->sessions; sess != NULL; sess = sess->next) { char input[4096 + SRTP_MAX_TRAILER_LEN]; diff --git a/src/rexmpp_roster.c b/src/rexmpp_roster.c index 6bc05fc..0048848 100644 --- a/src/rexmpp_roster.c +++ b/src/rexmpp_roster.c @@ -10,8 +10,6 @@ #include "rexmpp_xml.h" #include #include -#include -#include rexmpp_xml_t * rexmpp_roster_find_item (rexmpp_t *s, diff --git a/src/rexmpp_tls.c b/src/rexmpp_tls.c index 96c042a..e1de5ea 100644 --- a/src/rexmpp_tls.c +++ b/src/rexmpp_tls.c @@ -139,7 +139,8 @@ rexmpp_tls_err_t rexmpp_tls_connect (rexmpp_t *s) { #if defined(USE_GNUTLS) if (s->tls_state != REXMPP_TLS_HANDSHAKE) { - gnutls_datum_t xmpp_client_protocol = {"xmpp-client", strlen("xmpp-client")}; + gnutls_datum_t xmpp_client_protocol = + {(unsigned char*)"xmpp-client", strlen("xmpp-client")}; rexmpp_log(s, LOG_DEBUG, "starting TLS"); gnutls_init(&s->tls->gnutls_session, GNUTLS_CLIENT); gnutls_session_set_ptr(s->tls->gnutls_session, s); @@ -172,7 +173,7 @@ rexmpp_tls_connect (rexmpp_t *s) { rexmpp_log(s, LOG_DEBUG, "Waiting for TLS handshake to complete"); return REXMPP_TLS_E_AGAIN; } else if (ret == 0) { - int status; + unsigned int status; int srv_is_secure = 0; if (s->stream_state == REXMPP_STREAM_NONE && @@ -329,7 +330,8 @@ rexmpp_tls_send (rexmpp_t *s, void *data, size_t data_size, ssize_t *written) } #elif defined(USE_OPENSSL) *written = -1; - int ret = SSL_write_ex(s->tls->openssl_conn, data, data_size, written); + int ret = SSL_write_ex(s->tls->openssl_conn, data, data_size, + (size_t*)written); if (ret > 0) { return REXMPP_TLS_SUCCESS; } else { @@ -360,7 +362,8 @@ rexmpp_tls_recv (rexmpp_t *s, void *data, size_t data_size, ssize_t *received) { } #elif defined(USE_OPENSSL) *received = -1; - int ret = SSL_read_ex(s->tls->openssl_conn, data, data_size, received); + int ret = SSL_read_ex(s->tls->openssl_conn, data, data_size, + (size_t*)received); if (ret > 0) { return REXMPP_TLS_SUCCESS; } else { diff --git a/src/rexmpp_xml.c b/src/rexmpp_xml.c index 862a783..4907d4e 100644 --- a/src/rexmpp_xml.c +++ b/src/rexmpp_xml.c @@ -8,8 +8,6 @@ #include #include -#include -#include #include "rexmpp.h" #include "rexmpp_utf8.h" #include "rexmpp_xml.h" @@ -119,135 +117,7 @@ rexmpp_xml_t *rexmpp_xml_clone_list (rexmpp_xml_t *node) { } return first; } -#endif - -rexmpp_xml_t *rexmpp_xml_from_libxml2 (xmlNodePtr from) { - if (from == NULL) { - return NULL; - } - - rexmpp_xml_t *to = NULL; - if (from->type == XML_ELEMENT_NODE) { - to = malloc(sizeof(rexmpp_xml_t)); - - /* Type */ - to->type = REXMPP_XML_ELEMENT; - - /* Name and namespace */ - to->alt.elem.qname.name = strdup(from->name); - if (from->nsDef != NULL && from->nsDef->href != NULL) { - to->alt.elem.qname.namespace = strdup(from->nsDef->href); - } else { - to->alt.elem.qname.namespace = NULL; - } - - /* Attributes */ - to->alt.elem.attributes = NULL; - struct _xmlAttr *from_attr; - rexmpp_xml_attr_t **to_next_attr = &(to->alt.elem.attributes); - for (from_attr = from->properties; - from_attr != NULL; - from_attr = from_attr->next) - { - rexmpp_xml_attr_t *to_attr = - malloc(sizeof(rexmpp_xml_attr_t)); - to_attr->qname.name = strdup(from_attr->name); - to_attr->qname.namespace = NULL; - if (from_attr->ns != NULL && from_attr->ns->href != NULL) { - to_attr->qname.namespace = strdup(from_attr->ns->href); - to_attr->value = - xmlGetNsProp(from, to_attr->qname.name, to_attr->qname.namespace); - } else { - to_attr->value = xmlGetProp(from, to_attr->qname.name); - } - to_attr->next = NULL; - - *to_next_attr = to_attr; - to_next_attr = &(to_attr->next); - } - - /* Children */ - to->alt.elem.children = NULL; - xmlNodePtr from_child; - rexmpp_xml_t **to_next_child = &(to->alt.elem.children); - for (from_child = from->children; - from_child != NULL; - from_child = from_child->next) - { - rexmpp_xml_t *next_child = rexmpp_xml_from_libxml2(from_child); - if (next_child != NULL) { - *to_next_child = next_child; - to_next_child = &(next_child->next); - } - } - - /* Next */ - to->next = NULL; - - } else if (from->type == XML_TEXT_NODE) { - to = malloc(sizeof(rexmpp_xml_t)); - to->type = REXMPP_XML_TEXT; - to->alt.text = xmlNodeGetContent(from); - to->next = NULL; - } - return to; -} - -rexmpp_xml_t *rexmpp_xml_from_libxml2_list (xmlNodePtr from) { - if (from == NULL) { - return NULL; - } - rexmpp_xml_t *to = rexmpp_xml_from_libxml2(from); - if (from->next != NULL) { - to->next = rexmpp_xml_from_libxml2_list(from->next); - } - return to; -} - -xmlNodePtr rexmpp_xml_to_libxml2 (rexmpp_xml_t *from) { - if (from == NULL) { - return NULL; - } - - if (from->type == REXMPP_XML_TEXT) { - xmlNodePtr to = xmlNewText(from->alt.text); - to->next = rexmpp_xml_to_libxml2(from->next); - return to; - } - - /* Name and namespace */ - xmlNodePtr to = xmlNewNode(NULL, from->alt.elem.qname.name); - if (from->alt.elem.qname.namespace != NULL) { - xmlNewNs(to, from->alt.elem.qname.namespace, NULL); - } - - /* Attributes */ - rexmpp_xml_attr_t *attr = from->alt.elem.attributes; - while (attr != NULL) { - /* TODO: Would be nice to take namespaces into account, though - they are currently not used for attributes. */ - xmlNewProp(to, attr->qname.name, attr->value); - attr = attr->next; - } - - /* Children */ - rexmpp_xml_t *child = from->alt.elem.children; - while (child != NULL) { - xmlAddChild(to, rexmpp_xml_to_libxml2(child)); - child = child->next; - } - return to; -} - -xmlNodePtr rexmpp_xml_to_libxml2_list (rexmpp_xml_t *from) { - xmlNodePtr to = rexmpp_xml_to_libxml2(from); - if (from->next != NULL) { - xmlAddNextSibling(to, rexmpp_xml_to_libxml2_list(from->next)); - } - return to; -} -#ifndef USE_RUST rexmpp_xml_t *rexmpp_xml_new_text (const char *str) { rexmpp_xml_t *node = malloc(sizeof(rexmpp_xml_t)); node->type = REXMPP_XML_TEXT; @@ -590,32 +460,126 @@ rexmpp_xml_add_id (rexmpp_t *s, } #endif -xmlNodePtr rexmpp_xml_parse_libxml2 (const char *str, int str_len) { - xmlNodePtr elem = NULL; - xmlDocPtr doc = xmlReadMemory(str, str_len, "", "utf-8", XML_PARSE_NONET); - if (doc != NULL) { - elem = xmlCopyNode(xmlDocGetRootElement(doc), 1); - xmlFreeDoc(doc); +/* These SAX handlers are similar to those from rexmpp.c, and perhaps + can be reused. */ +void rexmpp_xml_parse_sax_characters (struct rexmpp_xml_builder *builder, + const char *ch, + size_t len) +{ + if (builder->current != NULL) { + rexmpp_xml_t *last_node = builder->current->alt.elem.children; + if (last_node != NULL && last_node->type == REXMPP_XML_TEXT) { + /* The last child is textual as well, just extend it */ + size_t last_len = strlen(last_node->alt.text); + last_node->alt.text = realloc(last_node->alt.text, last_len + len + 1); + strncpy(last_node->alt.text + last_len, ch, len); + last_node->alt.text[last_len + len] = '\0'; + } else { + rexmpp_xml_t *text_node = rexmpp_xml_new_text_len(ch, len); + if (text_node != NULL) { + text_node->next = builder->current->alt.elem.children; + builder->current->alt.elem.children = text_node; + } + } + } +} + +void rexmpp_xml_parse_sax_start_elem_ns (struct rexmpp_xml_builder *builder, + const char *name, + const char *namespace, + rexmpp_xml_attr_t *attributes) +{ + if (builder->current == NULL && builder->root == NULL) { + /* Just started */ + builder->current = rexmpp_xml_new_elem(name, namespace); + builder->root = builder->current; + } else if (builder->current != NULL) { + /* Parsing is in progress */ + rexmpp_xml_t *node = rexmpp_xml_new_elem(name, namespace); + node->next = builder->current->alt.elem.children; + builder->current->alt.elem.children = node; + builder->current = node; + } else { + /* The parsind is over, but we are receiving these events + still. Just free the attribute lists, ignore the rest. */ + rexmpp_xml_attribute_free_list(attributes); } - return elem; + builder->current->alt.elem.attributes = attributes; } +void rexmpp_xml_parse_sax_end_elem_ns (struct rexmpp_xml_builder *builder) +{ + if (builder->current != builder->root) { + /* Find the parent, set it as current element. */ + rexmpp_xml_t *parent = builder->root; + while (parent->alt.elem.children != builder->current) { + parent = parent->alt.elem.children; + } + builder->current = parent; + } else { + /* Done parsing this element; reverse all the lists of children. */ + builder->current = NULL; + rexmpp_xml_reverse_children(builder->root); + } +} + +struct rexmpp_xml_parser_handlers builder_sax = { + (rexmpp_xml_parser_element_start)rexmpp_xml_parse_sax_start_elem_ns, + (rexmpp_xml_parser_element_end)rexmpp_xml_parse_sax_end_elem_ns, + (rexmpp_xml_parser_characters)rexmpp_xml_parse_sax_characters +}; + rexmpp_xml_t *rexmpp_xml_parse (const char *str, int str_len) { - xmlNodePtr node_lxml2 = rexmpp_xml_parse_libxml2(str, str_len); - if (node_lxml2 != NULL) { - rexmpp_xml_t *node = rexmpp_xml_from_libxml2(node_lxml2); - xmlFreeNode(node_lxml2); - return node; + struct rexmpp_xml_builder builder = { NULL, NULL }; + rexmpp_xml_parser_ctx_t parser = + rexmpp_xml_parser_new(&builder_sax, &builder); + rexmpp_xml_parser_feed(parser, str, str_len); + rexmpp_xml_parser_free(parser); + if (builder.current != NULL) { + /* The parsing is not complete. */ + rexmpp_xml_free(builder.root); + return NULL; } - return NULL; + return builder.root; +} + +rexmpp_xml_t *rexmpp_xml_read_fd (FILE *fd) { + struct rexmpp_xml_builder builder = { NULL, NULL }; + rexmpp_xml_parser_ctx_t parser = + rexmpp_xml_parser_new(&builder_sax, &builder); + if (parser == NULL) { + return NULL; + } + + char *buf; + size_t len = 0; + do { + len = getline(&buf, &len, fd); + if (len > 0) { + rexmpp_xml_parser_feed(parser, buf, len); + } + } while (len > 0 && + ! (builder.root != NULL && builder.current == NULL) ); + + rexmpp_xml_parser_free(parser); + + if (builder.current != NULL) { + /* The parsing is not complete. */ + rexmpp_xml_free(builder.root); + return NULL; + } + + return builder.root; } rexmpp_xml_t *rexmpp_xml_read_file (const char *path) { - xmlDocPtr doc = xmlReadFile(path, "utf-8", XML_PARSE_NONET); - xmlNodePtr lxml2 = xmlDocGetRootElement(doc); - rexmpp_xml_t *ret = rexmpp_xml_from_libxml2(lxml2); - xmlFreeDoc(doc); - return ret; + FILE *fd = fopen(path, "r"); + if (fd == NULL) { + return NULL; + } + rexmpp_xml_t *node = rexmpp_xml_read_fd(fd); + fclose(fd); + return node; } #ifndef USE_RUST @@ -806,7 +770,7 @@ char *rexmpp_xml_text_child (rexmpp_xml_t *node) { return rexmpp_xml_text(rexmpp_xml_children(node)); } -rexmpp_xml_t *rexmpp_xml_reverse (rexmpp_xml_t *node) { +rexmpp_xml_t *rexmpp_xml_reverse_list (rexmpp_xml_t *node) { rexmpp_xml_t *next, *prev = NULL; while (node != NULL) { next = node->next; @@ -817,15 +781,17 @@ rexmpp_xml_t *rexmpp_xml_reverse (rexmpp_xml_t *node) { return prev; } -rexmpp_xml_t *rexmpp_xml_reverse_all (rexmpp_xml_t *node) { - node = rexmpp_xml_reverse(node); +void rexmpp_xml_reverse_children (rexmpp_xml_t *node) { + if (node == NULL || node->type != REXMPP_XML_ELEMENT) { + return; + } + node->alt.elem.children = rexmpp_xml_reverse_list(node->alt.elem.children); rexmpp_xml_t *cur; - for (cur = node; cur != NULL; cur = cur->next) { - if (cur->type == REXMPP_XML_ELEMENT) { - cur->alt.elem.children = rexmpp_xml_reverse_all(cur->alt.elem.children); + for (cur = node->alt.elem.children; cur != NULL; cur = cur->next) { + if (cur->type == REXMPP_XML_ELEMENT && cur->alt.elem.children != NULL) { + rexmpp_xml_reverse_children(cur); } } - return node; } #endif diff --git a/src/rexmpp_xml.h b/src/rexmpp_xml.h index 972c75c..d5a9af1 100644 --- a/src/rexmpp_xml.h +++ b/src/rexmpp_xml.h @@ -9,8 +9,6 @@ #ifndef REXMPP_XML_H #define REXMPP_XML_H -#include - typedef struct rexmpp_xml_qname rexmpp_xml_qname_t; typedef struct rexmpp_xml_attribute rexmpp_xml_attr_t; typedef struct rexmpp_xml_node rexmpp_xml_t; @@ -46,6 +44,10 @@ struct rexmpp_xml_node { rexmpp_xml_t *next; }; +struct rexmpp_xml_builder { + rexmpp_xml_t *current; + rexmpp_xml_t *root; +}; void rexmpp_xml_qname_free (rexmpp_xml_qname_t *qname); void rexmpp_xml_attribute_free (rexmpp_xml_attr_t *attr); @@ -71,22 +73,6 @@ rexmpp_xml_t *rexmpp_xml_clone (rexmpp_xml_t *node); */ rexmpp_xml_t *rexmpp_xml_clone_list (rexmpp_xml_t *node); -/** - @brief Creates a single ::rexmpp_xml_t XML node out of libxml2's - xmlNode, without siblings. -*/ -rexmpp_xml_t *rexmpp_xml_from_libxml2 (xmlNodePtr from); - -/** - @brief Creates a ::rexmpp_xml_t XML node out of libxml2's xmlNode, - with siblings. -*/ -rexmpp_xml_t *rexmpp_xml_from_libxml2_list (xmlNodePtr from); - -xmlNodePtr rexmpp_xml_to_libxml2 (rexmpp_xml_t *from); - -xmlNodePtr rexmpp_xml_to_libxml2_list (rexmpp_xml_t *from); - /** @brief Creates a textual ::rexmpp_xml_t XML node (with type = ::REXMPP_XML_TEXT). @@ -243,10 +229,40 @@ int rexmpp_xml_eq (rexmpp_xml_t *n1, rexmpp_xml_t *n2); */ rexmpp_xml_t *rexmpp_xml_parse (const char *str, int str_len); +/** + @brief Reads XML from a file stream, reading the stream line by + line. + @param[in] fd A file stream + @returns Parsed XML, or NULL on failure. +*/ +rexmpp_xml_t *rexmpp_xml_read_fd (FILE *fd); + +/** + @brief Reads XML from a file + @param[in] path A file path + @returns Parsed XML, or NULL on failure. +*/ rexmpp_xml_t *rexmpp_xml_read_file (const char *path); + +/** + @brief Writes XML into a file + @param[in] path A file path + @param[in] node XML to write + @returns 0 on success, -1 on failure. +*/ int rexmpp_xml_write_file (const char *path, rexmpp_xml_t* node); -rexmpp_xml_t *rexmpp_xml_reverse (rexmpp_xml_t *node); -rexmpp_xml_t *rexmpp_xml_reverse_all (rexmpp_xml_t *node); +/** + @brief Reverses a linked list of XML nodes + @param[in,out] node The head of the list to reverse + @returns The new head of the list +*/ +rexmpp_xml_t *rexmpp_xml_reverse_list (rexmpp_xml_t *node); + +/** + @brief Recursively reverses children of an XML element + @param[in,out] node The root XML element +*/ +void rexmpp_xml_reverse_children (rexmpp_xml_t *node); #endif diff --git a/src/rexmpp_xml.rs b/src/rexmpp_xml.rs index 1f9f4d6..717501d 100644 --- a/src/rexmpp_xml.rs +++ b/src/rexmpp_xml.rs @@ -975,8 +975,8 @@ fn rexmpp_xml_text_child (node: *mut RexmppXML) #[no_mangle] extern "C" -fn rexmpp_xml_reverse (mut node: *mut RexmppXML) - -> *mut RexmppXML { +fn rexmpp_xml_reverse_list (mut node: *mut RexmppXML) + -> *mut RexmppXML { let mut next; let mut prev = ptr::null_mut(); while node != ptr::null_mut() { @@ -992,14 +992,22 @@ fn rexmpp_xml_reverse (mut node: *mut RexmppXML) #[no_mangle] extern "C" -fn rexmpp_xml_reverse_all (node: *mut RexmppXML) - -> *mut RexmppXML { - let mut cur = node; - while cur != ptr::null_mut() { - unsafe { - if (*cur).node_type == NodeType::Element { +fn rexmpp_xml_reverse_children (node: *mut RexmppXML) + -> *mut RexmppXML { + unsafe { + if node == ptr::null_mut() || (*node).node_type != NodeType::Element { + return node; + } + (*node).alt.elem.children = + rexmpp_xml_reverse_list((*node).alt.elem.children);; + + let mut cur = node; + while cur != ptr::null_mut() { + if (*cur).node_type == NodeType::Element && + (*cur).alt.elem.children != ptr::null_mut() + { (*cur).alt.elem.children = - rexmpp_xml_reverse_all((*cur).alt.elem.children); + rexmpp_xml_reverse_children((*cur).alt.elem.children); } cur = (*cur).next; } diff --git a/src/rexmpp_xml_parser.c b/src/rexmpp_xml_parser.c new file mode 100644 index 0000000..d30d630 --- /dev/null +++ b/src/rexmpp_xml_parser.c @@ -0,0 +1,318 @@ +/** + @file rexmpp_xml_parser.c + @brief XML parsing for rexmpp + @author defanor + @date 2023 + @copyright MIT license. +*/ + +#include "rexmpp.h" +#include "rexmpp_xml.h" +#include "rexmpp_xml_parser.h" +#include "config.h" + +#if defined(USE_LIBXML2) + +void rexmpp_xml_sax_characters (rexmpp_xml_parser_ctx_t ctx, + const char *ch, + int len) +{ + ctx->handlers->text(ctx->user_data, ch, len); +} + +void rexmpp_xml_sax_elem_start (rexmpp_xml_parser_ctx_t ctx, + const char *localname, + const char *prefix, + const char *URI, + int nb_namespaces, + const char **namespaces, + int nb_attributes, + int nb_defaulted, + const char **attributes) +{ + (void)prefix; + (void)nb_namespaces; + (void)namespaces; + (void)nb_defaulted; + rexmpp_xml_attr_t *attrs = NULL; + int i; + for (i = nb_attributes - 1; i >= 0; i--) { + size_t attr_len = attributes[i * 5 + 4] - attributes[i * 5 + 3]; + char *attr_val = malloc(attr_len + 1); + attr_val[attr_len] = '\0'; + strncpy(attr_val, attributes[i * 5 + 3], attr_len); + rexmpp_xml_attr_t *attr = + rexmpp_xml_attr_new(attributes[i * 5], NULL, attr_val); + free(attr_val); + attr->next = attrs; + attrs = attr; + } + + ctx->handlers->elem_start(ctx->user_data, localname, URI, attrs); +} + +void rexmpp_xml_sax_elem_end (rexmpp_xml_parser_ctx_t ctx, + const char *localname, + const char *prefix, + const char *URI) +{ + (void)localname; + (void)prefix; + (void)URI; + ctx->handlers->elem_end(ctx->user_data); +} + +xmlSAXHandler rexmpp_xml_parser_sax = { + .initialized = XML_SAX2_MAGIC, + .characters = (charactersSAXFunc)rexmpp_xml_sax_characters, + .startElementNs = (startElementNsSAX2Func)rexmpp_xml_sax_elem_start, + .endElementNs = (endElementNsSAX2Func)rexmpp_xml_sax_elem_end, +}; + + +/* rexmpp_xml_t *rexmpp_xml_from_libxml2 (xmlNodePtr from) { */ +/* if (from == NULL) { */ +/* return NULL; */ +/* } */ + +/* rexmpp_xml_t *to = NULL; */ +/* if (from->type == XML_ELEMENT_NODE) { */ +/* to = malloc(sizeof(rexmpp_xml_t)); */ + +/* /\* Type *\/ */ +/* to->type = REXMPP_XML_ELEMENT; */ + +/* /\* Name and namespace *\/ */ +/* to->alt.elem.qname.name = strdup(from->name); */ +/* if (from->nsDef != NULL && from->nsDef->href != NULL) { */ +/* to->alt.elem.qname.namespace = strdup(from->nsDef->href); */ +/* } else { */ +/* to->alt.elem.qname.namespace = NULL; */ +/* } */ + +/* /\* Attributes *\/ */ +/* to->alt.elem.attributes = NULL; */ +/* struct _xmlAttr *from_attr; */ +/* rexmpp_xml_attr_t **to_next_attr = &(to->alt.elem.attributes); */ +/* for (from_attr = from->properties; */ +/* from_attr != NULL; */ +/* from_attr = from_attr->next) */ +/* { */ +/* rexmpp_xml_attr_t *to_attr = */ +/* malloc(sizeof(rexmpp_xml_attr_t)); */ +/* to_attr->qname.name = strdup(from_attr->name); */ +/* to_attr->qname.namespace = NULL; */ +/* if (from_attr->ns != NULL && from_attr->ns->href != NULL) { */ +/* to_attr->qname.namespace = strdup(from_attr->ns->href); */ +/* to_attr->value = */ +/* xmlGetNsProp(from, to_attr->qname.name, to_attr->qname.namespace); */ +/* } else { */ +/* to_attr->value = xmlGetProp(from, to_attr->qname.name); */ +/* } */ +/* to_attr->next = NULL; */ + +/* *to_next_attr = to_attr; */ +/* to_next_attr = &(to_attr->next); */ +/* } */ + +/* /\* Children *\/ */ +/* to->alt.elem.children = NULL; */ +/* xmlNodePtr from_child; */ +/* rexmpp_xml_t **to_next_child = &(to->alt.elem.children); */ +/* for (from_child = from->children; */ +/* from_child != NULL; */ +/* from_child = from_child->next) */ +/* { */ +/* rexmpp_xml_t *next_child = rexmpp_xml_from_libxml2(from_child); */ +/* if (next_child != NULL) { */ +/* *to_next_child = next_child; */ +/* to_next_child = &(next_child->next); */ +/* } */ +/* } */ + +/* /\* Next *\/ */ +/* to->next = NULL; */ + +/* } else if (from->type == XML_TEXT_NODE) { */ +/* to = malloc(sizeof(rexmpp_xml_t)); */ +/* to->type = REXMPP_XML_TEXT; */ +/* to->alt.text = xmlNodeGetContent(from); */ +/* to->next = NULL; */ +/* } */ +/* return to; */ +/* } */ + +/* rexmpp_xml_t *rexmpp_xml_from_libxml2_list (xmlNodePtr from) { */ +/* if (from == NULL) { */ +/* return NULL; */ +/* } */ +/* rexmpp_xml_t *to = rexmpp_xml_from_libxml2(from); */ +/* if (from->next != NULL) { */ +/* to->next = rexmpp_xml_from_libxml2_list(from->next); */ +/* } */ +/* return to; */ +/* } */ + +/* xmlNodePtr rexmpp_xml_to_libxml2 (rexmpp_xml_t *from) { */ +/* if (from == NULL) { */ +/* return NULL; */ +/* } */ + +/* if (from->type == REXMPP_XML_TEXT) { */ +/* xmlNodePtr to = xmlNewText(from->alt.text); */ +/* to->next = rexmpp_xml_to_libxml2(from->next); */ +/* return to; */ +/* } */ + +/* /\* Name and namespace *\/ */ +/* xmlNodePtr to = xmlNewNode(NULL, from->alt.elem.qname.name); */ +/* if (from->alt.elem.qname.namespace != NULL) { */ +/* xmlNewNs(to, from->alt.elem.qname.namespace, NULL); */ +/* } */ + +/* /\* Attributes *\/ */ +/* rexmpp_xml_attr_t *attr = from->alt.elem.attributes; */ +/* while (attr != NULL) { */ +/* /\* TODO: Would be nice to take namespaces into account, though */ +/* they are currently not used for attributes. *\/ */ +/* xmlNewProp(to, attr->qname.name, attr->value); */ +/* attr = attr->next; */ +/* } */ + +/* /\* Children *\/ */ +/* rexmpp_xml_t *child = from->alt.elem.children; */ +/* while (child != NULL) { */ +/* xmlAddChild(to, rexmpp_xml_to_libxml2(child)); */ +/* child = child->next; */ +/* } */ +/* return to; */ +/* } */ + +/* xmlNodePtr rexmpp_xml_to_libxml2_list (rexmpp_xml_t *from) { */ +/* xmlNodePtr to = rexmpp_xml_to_libxml2(from); */ +/* if (from->next != NULL) { */ +/* xmlAddNextSibling(to, rexmpp_xml_to_libxml2_list(from->next)); */ +/* } */ +/* return to; */ +/* } */ + +#elif defined(USE_EXPAT) + +void XMLCALL +rexmpp_xml_sax_elem_start (rexmpp_xml_parser_ctx_t ctx, + const char *el, + const char **attributes) +{ + char *buf = strdup(el); + char *name = NULL, *namespace = buf; + size_t i; + for (i = 0; i < strlen(namespace); i++) { + if (namespace[i] == '\xff') { + name = namespace + i + 1; + namespace[i] = '\0'; + } + } + if (name == NULL) { + name = namespace; + namespace = NULL; + } + rexmpp_xml_attr_t *attrs = NULL; + for (i = 0; attributes[i] != NULL; i += 2) { + rexmpp_xml_attr_t *attr = + rexmpp_xml_attr_new(attributes[i], NULL, attributes[i + 1]); + attr->next = attrs; + attrs = attr; + } + + ctx->handlers->elem_start(ctx->user_data, name, namespace, attrs); + free(buf); +} + +void XMLCALL +rexmpp_xml_sax_elem_end(rexmpp_xml_parser_ctx_t ctx, + const XML_Char *name) +{ + (void)name; + ctx->handlers->elem_end(ctx->user_data); +} + +void XMLCALL +rexmpp_xml_sax_characters (rexmpp_xml_parser_ctx_t ctx, + const XML_Char *ch, + int len) +{ + ctx->handlers->text(ctx->user_data, ch, len); +} + +#endif + + + +rexmpp_xml_parser_ctx_t +rexmpp_xml_parser_new (rexmpp_xml_parser_handlers_t handlers, + void *data) +{ + rexmpp_xml_parser_ctx_t ctx = malloc(sizeof(struct rexmpp_xml_parser_ctx)); + if (ctx == NULL) { + return NULL; + } +#if defined(USE_LIBXML2) + xmlParserCtxtPtr p = + xmlCreatePushParserCtxt(&rexmpp_xml_parser_sax, ctx, "", 0, NULL); +#elif defined(USE_EXPAT) + XML_Parser p = XML_ParserCreateNS("utf-8", '\xff'); + XML_SetUserData(p, ctx); + XML_SetStartElementHandler(p, (XML_StartElementHandler) + rexmpp_xml_sax_elem_start); + XML_SetEndElementHandler(p, (XML_EndElementHandler) + rexmpp_xml_sax_elem_end); + XML_SetCharacterDataHandler(p, (XML_CharacterDataHandler) + rexmpp_xml_sax_characters); +#endif + if (p == NULL) { + free(ctx); + return NULL; + } + + ctx->xml_parser = p; + ctx->handlers = handlers; + ctx->user_data = data; + return ctx; +} + +void rexmpp_xml_parser_free (rexmpp_xml_parser_ctx_t ctx) { +#if defined(USE_LIBXML2) + xmlFreeParserCtxt(ctx->xml_parser); +#elif defined(USE_EXPAT) + XML_ParserFree(ctx->xml_parser); +#endif + free(ctx); +} + +rexmpp_xml_parser_ctx_t rexmpp_xml_parser_reset (rexmpp_xml_parser_ctx_t ctx) { +#if defined(USE_LIBXML2) + xmlCtxtResetPush(ctx->xml_parser, "", 0, "", "utf-8"); +#elif defined(USE_EXPAT) + XML_ParserReset(ctx->xml_parser, "utf-8"); + XML_SetUserData(ctx->xml_parser, ctx); + XML_SetStartElementHandler(ctx->xml_parser, (XML_StartElementHandler) + rexmpp_xml_sax_elem_start); + XML_SetEndElementHandler(ctx->xml_parser, (XML_EndElementHandler) + rexmpp_xml_sax_elem_end); + XML_SetCharacterDataHandler(ctx->xml_parser, (XML_CharacterDataHandler) + rexmpp_xml_sax_characters); +#endif + return ctx; +} + +void +rexmpp_xml_parser_feed (rexmpp_xml_parser_ctx_t ctx, + const char *chunk, + size_t len) +{ +#if defined(USE_LIBXML2) + xmlParseChunk(ctx->xml_parser, chunk, len, 0); +#elif defined(USE_EXPAT) + XML_Parse(ctx->xml_parser, chunk, len, 0); +#endif +} diff --git a/src/rexmpp_xml_parser.h b/src/rexmpp_xml_parser.h new file mode 100644 index 0000000..07464a2 --- /dev/null +++ b/src/rexmpp_xml_parser.h @@ -0,0 +1,103 @@ +/** + @file rexmpp_xml_parser.h + @brief XML parsing for rexmpp + @author defanor + @date 2023 + @copyright MIT license. +*/ + +#ifndef REXMPP_XML_PARSER_H +#define REXMPP_XML_PARSER_H + + +#if defined(USE_LIBXML2) + #include +#elif defined(USE_EXPAT) + #include +#endif + +#include "config.h" + +typedef void (*rexmpp_xml_parser_element_start) (void *data, + const char *name, + const char *namespace, + rexmpp_xml_attr_t *attributes); +typedef void (*rexmpp_xml_parser_element_end) (void *data); +typedef void (*rexmpp_xml_parser_characters) (void *data, + const char *ch, + size_t len); + +struct rexmpp_xml_parser_handlers { + rexmpp_xml_parser_element_start elem_start; + rexmpp_xml_parser_element_end elem_end; + rexmpp_xml_parser_characters text; +}; + + +typedef struct rexmpp_xml_parser_ctx* rexmpp_xml_parser_ctx_t; +typedef struct rexmpp_xml_parser_handlers* rexmpp_xml_parser_handlers_t; + +struct rexmpp_xml_parser_ctx { +#if defined(USE_LIBXML2) + xmlParserCtxtPtr xml_parser; +#elif defined(USE_EXPAT) + XML_Parser xml_parser; +#endif + rexmpp_xml_parser_handlers_t handlers; + void *user_data; +}; + +/** + @brief Allocates a new XML parser context + @param[in] handlers SAX-like parser event handlers + @param[in] data User-provided data to pass to the handlers + @returns A parser context pointer, or NULL on failure. +*/ +rexmpp_xml_parser_ctx_t +rexmpp_xml_parser_new (rexmpp_xml_parser_handlers_t handlers, + void *data); + +/** + @brief Frees an XML parser context + @param[in] ctx An XML parser context +*/ +void rexmpp_xml_parser_free (rexmpp_xml_parser_ctx_t ctx); + +/** + @brief Feeds data to parse into an XML parser + @param[in] ctx An XML parser context + @param[in] chunk A chunk of data to parse + @param[in] len Length of the data chunk +*/ +void +rexmpp_xml_parser_feed (rexmpp_xml_parser_ctx_t ctx, + const char *chunk, + size_t len); + +/** + @brief Resets a parser context + @param[in] ctx An XML parser context + @returns A new pointer, since it may change during a reset +*/ +rexmpp_xml_parser_ctx_t rexmpp_xml_parser_reset (rexmpp_xml_parser_ctx_t ctx); + + +/* #if defined(USE_LIBXML2) */ +/* /\** */ +/* @brief Creates a single ::rexmpp_xml_t XML node out of libxml2's */ +/* xmlNode, without siblings. */ +/* *\/ */ +/* rexmpp_xml_t *rexmpp_xml_from_libxml2 (xmlNodePtr from); */ + +/* /\** */ +/* @brief Creates a ::rexmpp_xml_t XML node out of libxml2's xmlNode, */ +/* with siblings. */ +/* *\/ */ +/* rexmpp_xml_t *rexmpp_xml_from_libxml2_list (xmlNodePtr from); */ + +/* xmlNodePtr rexmpp_xml_to_libxml2 (rexmpp_xml_t *from); */ + +/* xmlNodePtr rexmpp_xml_to_libxml2_list (rexmpp_xml_t *from); */ +/* #endif */ + +#endif -- cgit v1.2.3