From 8073c4aa6a047a5b344af16487c2f2c7f8004733 Mon Sep 17 00:00:00 2001 From: Juliana Fajardini Date: Fri, 7 Mar 2025 07:37:04 -0300 Subject: [PATCH 1/7] readme: update goals --- README | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/README b/README index 90bce81c..e8b05d49 100644 --- a/README +++ b/README @@ -5,31 +5,15 @@ Copyright 2010-2013 Qualys, Inc. ============================================================================ LibHTP is a security-aware parser for the HTTP protocol and the related bits -and pieces. The goals of the project, in the order of importance, are as -follows: +and pieces. The goal of the project is mainly to support the Suricata use case. +Other use cases might not fully be supported, and we encourage you to cover these. - 1. Completeness of coverage; LibHTP must be able to parse virtually all - traffic that is found in practice. - - 2. Permissive parsing; LibHTP must never fail to parse a stream that would - be parsed by some other web server. - - 3. Awareness of evasion techniques; LibHTP must be able to detect and - effectively deal with various evasion techniques, producing, where - practical, identical or practically identical results as the web - server processing the same traffic stream. - - 4. Performance; The performance must be adequate for the desired tasks. - Completeness and security are often detrimental to performance. Our - idea of handling the conflicting requirements is to put the library - user in control, allowing him to choose the most desired library - characteristic. | STATUS LIBHTP IS VERY YOUNG AT THIS POINT. IT WILL BE SOME TIME BEFORE | IT CAN BE CONSIDER COMPLETE. AT THE MOMENT, THE FOCUS OF DEVELOPMENT | IS ON ACHIEVING THE FIRST TWO GOALS. -See the LICENSE, COPYING and NOTICE files distributed with this work for +See the LICENSE, COPYING, and NOTICE files distributed with this work for information regarding licensing, copying and copyright ownership. From 5e9db6f1f62943b6abcb91d8ebf60d5d44e681e4 Mon Sep 17 00:00:00 2001 From: Juliana Fajardini Date: Fri, 7 Mar 2025 07:39:22 -0300 Subject: [PATCH 2/7] readme: update status --- README | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README b/README index e8b05d49..5172245d 100644 --- a/README +++ b/README @@ -8,10 +8,10 @@ LibHTP is a security-aware parser for the HTTP protocol and the related bits and pieces. The goal of the project is mainly to support the Suricata use case. Other use cases might not fully be supported, and we encourage you to cover these. - - | STATUS LIBHTP IS VERY YOUNG AT THIS POINT. IT WILL BE SOME TIME BEFORE - | IT CAN BE CONSIDER COMPLETE. AT THE MOMENT, THE FOCUS OF DEVELOPMENT - | IS ON ACHIEVING THE FIRST TWO GOALS. + | STATUS + | + | We are currently in the process of migrating LibHTP to a Rust version and thus + | support will be discontinued. See the LICENSE, COPYING, and NOTICE files distributed with this work for information regarding licensing, copying and copyright ownership. From 226580d502ae98c148aaecc4846f78694b5e253c Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Tue, 11 Mar 2025 16:45:35 +0100 Subject: [PATCH 3/7] decompressors: do not take data after end --- htp/htp_core.h | 5 ++++- htp/htp_decompressors.c | 20 +++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/htp/htp_core.h b/htp/htp_core.h index d07f9913..c2eeb160 100644 --- a/htp/htp_core.h +++ b/htp/htp_core.h @@ -212,7 +212,10 @@ enum htp_content_encoding_t { HTP_COMPRESSION_DEFLATE = 3, /** LZMA compression. */ - HTP_COMPRESSION_LZMA = 4 + HTP_COMPRESSION_LZMA = 4, + + /** No more data. */ + HTP_COMPRESSION_OVER = 5 }; /** diff --git a/htp/htp_decompressors.c b/htp/htp_decompressors.c index 02ebae3e..d66b6e82 100644 --- a/htp/htp_decompressors.c +++ b/htp/htp_decompressors.c @@ -203,6 +203,8 @@ htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec1, htp_tx_ } return HTP_OK; + } else if (drec->zlib_initialized == HTP_COMPRESSION_OVER) { + return HTP_ERROR; } if (d->data == NULL) { @@ -316,15 +318,8 @@ htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec1, htp_tx_ // no initialization means previous error on stream return HTP_ERROR; } - if (GZIP_BUF_SIZE > drec->stream.avail_out) { - if (rc == Z_DATA_ERROR && drec->restart == 0) { - // There is data even if there is an error - // So use this data and log a warning - htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc); - rc = Z_STREAM_END; - } - } - if (rc == Z_STREAM_END) { + int error_after_data = (rc == Z_DATA_ERROR && drec->restart == 0 && GZIP_BUF_SIZE > drec->stream.avail_out); + if (rc == Z_STREAM_END || error_after_data) { // How many bytes do we have? size_t len = GZIP_BUF_SIZE - drec->stream.avail_out; @@ -351,6 +346,13 @@ htp_status_t htp_gzip_decompressor_decompress(htp_decompressor_t *drec1, htp_tx_ drec->stream.next_out = drec->buffer; // TODO Handle trailer. + if (error_after_data) { + // There is data even if there is an error + // So use this data and log a warning + htp_log(d->tx->connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "GZip decompressor: inflate failed with %d", rc); + drec->zlib_initialized = HTP_COMPRESSION_OVER; + return HTP_ERROR; + } return HTP_OK; } else if (rc != Z_OK) { From 12795a9415c001917ee4913de889641dafa0bdad Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 13 Mar 2025 09:05:38 +0100 Subject: [PATCH 4/7] response: end decompressors in chunked content When a response ends in the middle of a chunk, signal it to the decompressors to let them finalize the data they got so far, as is done with content-length. Do this for all states of chunked content. --- htp/htp_response.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/htp/htp_response.c b/htp/htp_response.c index abb4d590..68e9f374 100644 --- a/htp/htp_response.c +++ b/htp/htp_response.c @@ -289,6 +289,12 @@ static void htp_connp_res_clear_buffer(htp_connp_t *connp) { htp_status_t htp_connp_RES_BODY_CHUNKED_DATA_END(htp_connp_t *connp) { // TODO We shouldn't really see anything apart from CR and LF, // so we should warn about anything else. + if (connp->out_status == HTP_STREAM_CLOSED) { + connp->out_state = htp_connp_RES_FINALIZE; + // Sends close signal to decompressors + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0); + return rc; + } for (;;) { OUT_NEXT_BYTE_OR_RETURN(connp); @@ -402,6 +408,13 @@ static inline int data_probe_chunk_length(htp_connp_t *connp) { * @returns HTP_OK on state change, HTP_ERROR on error, or HTP_DATA when more data is needed. */ htp_status_t htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp) { + if (connp->out_status == HTP_STREAM_CLOSED) { + connp->out_state = htp_connp_RES_FINALIZE; + // Sends close signal to decompressors + htp_status_t rc = htp_tx_res_process_body_data_ex(connp->out_tx, NULL, 0); + return rc; + } + for (;;) { OUT_COPY_BYTE_OR_RETURN(connp); From c3683364c4a5d3c984f6337ce3a199491d5aca44 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 13 Mar 2025 10:20:10 +0100 Subject: [PATCH 5/7] chunks: abort asap on invalid chunk length --- htp/htp_response.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/htp/htp_response.c b/htp/htp_response.c index 68e9f374..374219ff 100644 --- a/htp/htp_response.c +++ b/htp/htp_response.c @@ -375,11 +375,6 @@ static inline int is_chunked_ctl_char(const unsigned char c) { * @returns 1 if it looks valid, 0 if it looks invalid */ static inline int data_probe_chunk_length(htp_connp_t *connp) { - if (connp->out_current_read_offset - connp->out_current_consume_offset < 8) { - // not enough data so far, consider valid still - return 1; - } - unsigned char *data = connp->out_current_data + connp->out_current_consume_offset; size_t len = connp->out_current_read_offset - connp->out_current_consume_offset; From 91c0334be4d18d74744b020232be9d1db71516d5 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 13 Mar 2025 16:04:03 +0100 Subject: [PATCH 6/7] chunks: probe validity if data was not buffered If a chunk length line was split in two packets, we buffered the end of the first packet/beginning of the line, and checked its validity. So, do not check further on second packet. --- htp/htp_response.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htp/htp_response.c b/htp/htp_response.c index 374219ff..7cef96ec 100644 --- a/htp/htp_response.c +++ b/htp/htp_response.c @@ -415,7 +415,7 @@ htp_status_t htp_connp_RES_BODY_CHUNKED_LENGTH(htp_connp_t *connp) { // Have we reached the end of the line? Or is this not chunked after all? if (connp->out_next_byte == LF || - (!is_chunked_ctl_char((unsigned char) connp->out_next_byte) && !data_probe_chunk_length(connp))) { + (!is_chunked_ctl_char((unsigned char) connp->out_next_byte) && !data_probe_chunk_length(connp) && connp->out_buf == NULL)) { unsigned char *data; size_t len; From 437f75797f5446a90c94519391ea5256b96f8f4c Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Thu, 13 Mar 2025 16:24:28 +0100 Subject: [PATCH 7/7] response: do not error on gap finishing content-length Passing a gap to a decompressor closes it, but if we also finish known content-length, we try to reclose decompressor and fail --- htp/htp_transaction.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/htp/htp_transaction.c b/htp/htp_transaction.c index 7220459d..9cde4155 100644 --- a/htp/htp_transaction.c +++ b/htp/htp_transaction.c @@ -972,8 +972,13 @@ htp_status_t htp_tx_res_process_body_data_ex(htp_tx_t *tx, const void *data, siz case HTP_COMPRESSION_DEFLATE: case HTP_COMPRESSION_LZMA: // In severe memory stress these could be NULL - if (tx->connp->out_decompressor == NULL) + if (tx->connp->out_decompressor == NULL) { + if (data == NULL) { + // we were already stopped on a gap finishing CL + return HTP_OK; + } return HTP_ERROR; + } struct timeval after; gettimeofday(&tx->connp->out_decompressor->time_before, NULL);