Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,28 @@ else
fi
AC_SUBST(FEATURE_REGEXP)

# XML parsing
AC_ARG_ENABLE(xml,
[AS_HELP_STRING([--enable-xml],[Enable XML parsing @<:@default=no@:>@])],
[case "${enableval}" in
yes) enable_xml="yes" ;;
no) enable_xml="no" ;;
*) AC_MSG_ERROR(bad value ${enableval} for --enable-xml) ;;
esac],
[enable_xml="no"]
)
AM_CONDITIONAL(ENABLE_XML, test x$enable_xml = xyes)
if test "$enable_xml" = "yes"; then
PKG_CHECK_MODULES(LIBXML2, libxml2,,
[PKG_CHECK_MODULES(LIBXML2, libxml-2.0,,)]
)
AC_DEFINE(FEATURE_XML, 1, [XML parsing support enabled.])
FEATURE_XML=1
else
FEATURE_XML=0
fi
AC_SUBST(FEATURE_XML)

# debug mode settings
AC_ARG_ENABLE(debug,
[AS_HELP_STRING([--enable-debug],[Enable debug mode @<:@default=no@:>@])],
Expand Down Expand Up @@ -189,6 +211,7 @@ echo "*****************************************************"
echo "liblognorm will be compiled with the following settings:"
echo
echo "Regex enabled: $enable_regexp"
echo "XML enabled: $enable_xml"
echo "Advanced Statistics enabled: $enable_advstats"
echo "Testbench enabled: $enable_testbench"
echo "Valgrind enabled: $enable_valgrind"
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ liblognorm_la_SOURCES += \
v1_ptree.c \
v1_samp.c

liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS)
liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) -lestr
liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS) $(LIBXML2_CFLAGS)
liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) $(LIBXML2_LIBS) -lestr
# info on version-info:
# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
# Note: v2 now starts at version 5, as v1 previously also had 4
Expand Down
105 changes: 105 additions & 0 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@
#include <errno.h>
#endif

#ifdef FEATURE_XML
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#endif


/* how should output values be formatted? */
enum FMT_MODE {
Expand Down Expand Up @@ -75,6 +80,41 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
return i;
}


#ifdef FEATURE_XML
/* Credits to https://github.com/katie-snow/xml2json-c
This code is under GPL-3.0 License
*/
static inline void
xml2jsonc_convert_elements(xmlNode *anode, json_object *jobj)
{
xmlNode *cur_node = NULL;
json_object *cur_jobj = NULL;
json_object *cur_jstr = NULL;

for (cur_node = anode; cur_node; cur_node = cur_node->next)
{
if (cur_node->type == XML_ELEMENT_NODE)
{
if (xmlChildElementCount(cur_node) == 0)
{
/* JSON string object */
cur_jobj = json_object_new_object();
cur_jstr = json_object_new_string((const char *)xmlNodeGetContent(cur_node));
json_object_object_add(jobj, (const char *)cur_node->name, cur_jstr);
}
else
{
/* JSON object */
cur_jobj = json_object_new_object();
json_object_object_add(jobj, (const char *)cur_node->name, json_object_get(cur_jobj));
}
}
xml2jsonc_convert_elements(cur_node->children, cur_jobj);
}
}
#endif /* #ifdef FEATURE_XML */

/* parser _parse interface
*
* All parsers receive
Expand Down Expand Up @@ -2325,6 +2365,71 @@ PARSER_Parse(v2IPTables)
return r;
}

#ifdef FEATURE_XML
/**
* Parse XML. This parser tries to find XML data inside a message.
* If it finds valid XML, it will extract it.
*
* Note: The XML Parser expects a string that begins with '<' and
* ends with '>'. whitespace or any other character at the
* beginning or at the end of the string will cause a parse failure
*
* Note: Is there is extra content after the XML content
* the parser will fail. A hack consist of finding the
* last '>' in the string and ignore the rest.
*
* added 2021-02-01 by jeremie.jourdin@advens.fr
*/
PARSER_Parse(XML)
xmlDocPtr doc = NULL;
xmlNodePtr root_element = NULL;

/* Find the last occurence of '>' in the string */
char * pch;
pch=strrchr((const char *) npb->str + *offs, '>');

/* Truncate the string after the last occurence of '>' */
int newLen = pch - (npb->str + *offs) + 1;
char *cstr = strndup(npb->str + *offs, newLen);
CHKN(cstr);

doc=xmlParseDoc((xmlChar*) cstr);
free(cstr);

/* Invalid XML string */
if (doc == NULL) {
goto done;
}

/* Now convert XML document into JSON document */
root_element = xmlDocGetRootElement(doc);
json_object *json = NULL;
json = json_object_new_object();
xml2jsonc_convert_elements(root_element, json);

if(json == NULL)
goto done;

/* parsing OK */
*parsed = newLen ;
r = 0;

if(value == NULL) {
json_object_put(json);
} else {
*value = json;
}

done:
if(doc != NULL)
xmlFreeDoc(doc);
xmlCleanupParser();
return r;
}
#endif /* #ifdef FEATURE_XML */



/**
* Parse JSON. This parser tries to find JSON data inside a message.
* If it finds valid JSON, it will extract it. Extra data after the
Expand Down
3 changes: 3 additions & 0 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ PARSERDEF_NO_DATA(MAC48);
PARSERDEF_NO_DATA(CEF);
PARSERDEF(CheckpointLEA);
PARSERDEF(NameValue);
#ifdef FEATURE_XML
PARSERDEF_NO_DATA(XML);
#endif

#undef PARSERDEF_NO_DATA

Expand Down
5 changes: 4 additions & 1 deletion src/pdag.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@ static struct ln_parser_info parser_lookup_table[] = {
PARSER_ENTRY("string-to", StringTo, 32),
PARSER_ENTRY("char-to", CharTo, 32),
PARSER_ENTRY("char-sep", CharSeparated, 32),
PARSER_ENTRY("string", String, 32)
PARSER_ENTRY("string", String, 32),
#ifdef FEATURE_XML
PARSER_ENTRY_NO_DATA("xml", XML, 4),
#endif
};
#define NPARSERS (sizeof(parser_lookup_table)/sizeof(struct ln_parser_info))
#define DFLT_USR_PARSER_PRIO 30000 /**< default priority if user has not specified it */
Expand Down
1 change: 1 addition & 0 deletions src/v1_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
return i;
}


/* parsers for the primitive types
*
* All parsers receive
Expand Down
8 changes: 8 additions & 0 deletions tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ REGEXP_TESTS = \
field_tokenized_with_regex.sh \
field_regex_while_regex_support_is_disabled.sh

XML_TESTS = \
field_xml.sh \
field_xml_jsoncnf.sh

EXTRA_DIST = exec.sh \
$(TESTS_SHELLSCRIPTS) \
$(REGEXP_TESTS) \
Expand All @@ -167,3 +171,7 @@ EXTRA_DIST = exec.sh \
if ENABLE_REGEXP
TESTS += $(REGEXP_TESTS)
endif

if ENABLE_XML
TESTS += $(XML_TESTS)
endif
36 changes: 36 additions & 0 deletions tests/field_xml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
# added 2021-11-14 by Theo Bertin
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh

test_def $0 "XML field"
add_rule 'version=2'
add_rule 'rule=:%field:xml%'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note>'
assert_output_json_eq '{ "field": { "note": "This is a simple note"} }'

execute '<?xml version="1.0" encoding="UTF-8"?><note><one>first note</one><two>second note</two></note>'
assert_output_json_eq '{ "field": { "note": { "one": "first note", "two": "second note" } } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

#
# Things that MUST NOT work
#
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note> ' # note the trailing space
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note<\/note> ", "unparsed-data": " " }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note" }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note2>'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>" }'


cleanup_tmp_files

36 changes: 36 additions & 0 deletions tests/field_xml_jsoncnf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
# added 2021-11-14 by Theo Bertin
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh

test_def $0 "XML field"
add_rule 'version=2'
add_rule 'rule=:%{"name":"field", "type":"xml"}%'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note>'
assert_output_json_eq '{ "field": { "note": "This is a simple note"} }'

execute '<?xml version="1.0" encoding="UTF-8"?><note><one>first note</one><two>second note</two></note>'
assert_output_json_eq '{ "field": { "note": { "one": "first note", "two": "second note" } } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

#
# Things that MUST NOT work
#
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note> ' # note the trailing space
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note<\/note> ", "unparsed-data": " " }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note" }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note2>'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>" }'


cleanup_tmp_files