使用ESP32解压gzip压缩的HTTP响应

在使用和风天气API获取天气数据时,发现其返回的数据默认经过了gzip压缩。本文介绍如何在ESP32上对其进行解压缩。

和风天气提供了免费的API用于天气信息,且个人开发者只需要注册账号即可使用,非常方便。但其为了节约流量,返回的响应默认启用了gzip压缩,且自2022年3月1日开始无法更改。为了使用这一好用的服务,同时提前对未来越来越广泛的gzip压缩响应做准备,有必要研究如何在ESP32上解压gzip压缩的响应。

要解压HTTP响应中的gzip,需要inflate算法的解压函数。ESP32内置的ROM里面其实已经包含了用于gzip压缩与解压缩的miniz,下面是ESP32-C3的ROM中关于miniz函数的声明。

/***************************************
 Group miniz
 ***************************************/

/* Functions */
mz_adler32 = 0x400000c0;
mz_crc32 = 0x400000c4;
mz_free = 0x400000c8;
tdefl_compress = 0x400000cc;
tdefl_compress_buffer = 0x400000d0;
tdefl_compress_mem_to_heap = 0x400000d4;
tdefl_compress_mem_to_mem = 0x400000d8;
tdefl_compress_mem_to_output = 0x400000dc;
tdefl_get_adler32 = 0x400000e0;
tdefl_get_prev_return_status = 0x400000e4;
tdefl_init = 0x400000e8;
tdefl_write_image_to_png_file_in_memory = 0x400000ec;
tdefl_write_image_to_png_file_in_memory_ex = 0x400000f0;
tinfl_decompress = 0x400000f4;
tinfl_decompress_mem_to_callback = 0x400000f8;
tinfl_decompress_mem_to_heap = 0x400000fc;
tinfl_decompress_mem_to_mem = 0x40000100;

由于内置的miniz在编译时打开了MINIZ_NO_ZLIB_APIS宏,因此ROM中的库只提供了顶层的压缩与解压缩函数,没有提供inflate函数用来解压z_stream流。因此,内置的miniz无法被用于解压HTTP响应中的gzip数据。

好在,我们还有zlib可供使用。zlib提供了inflate函数,可以解压gzip压缩后的HTTP响应中的z_stream流。只需要将其移植到ESP32上,即可解压数据。

首先,前往官网下载zlib的源码。下载好后,在ESP-IDF工程中的components目录中新建一个zlib文件夹,并将源码解压到components/zlib/zlib中。

技巧
将源码解压到components/zlib/zlib中的原因是,可以在上一级目录中添加需要的文件进入ESP-IDF工程,而不需要对zlib源码目录做出任何修改,因此可以很方便的将zlib的Git仓库作为子仓库包含进自己的工程中。

然后,我们需要编写一个CmakeLists.txt文件,将zlib源码中的部分源文件加入IDF工程。由于zlib中的某些文件是为Linux环境下准备的,若将这些文件加入编译则会报错,因此只需要在components/zlib/CmakeLists.txt文件中加入下列内容来添加这7个源文件即可:

idf_component_register(SRCS "zlib/adler32.c" "zlib/crc32.c" "zlib/infback.c" "zlib/inffast.c"
                            "zlib/inflate.c" "zlib/inftrees.c" "zlib/zutil.c"
                       INCLUDE_DIRS zlib
)

ESP-IDF工程的路径看起来像这样:

├─components
│  ├─......
│  └─zlib
│      ├─CMakeLists.txt
│      └─zlib
│          ├─adler32.c
│          ├─crc32.c
│          ├─infback.c
│          ├─inffast.c
│          ├─zlib/inflate.c
│          ├─zlib/inftrees.c
│          ├─zutil.c
│          ├─CMakeLists.txt
│          └─......
└─...

添加源码之后,直接编译即可将zlib库加入工程。

接下来,只需要编写代码调用zlib中的函数,即可解压gzip压缩后的HTTP响应。

#include "zlib.h"
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"

static int network_gzip_decompress(void *in_buf, size_t in_size, void *out_buf, size_t *out_size, size_t out_buf_size)
{
    int err = 0;
    z_stream d_stream = {0}; /* decompression stream */
    d_stream.zalloc = NULL;
    d_stream.zfree = NULL;
    d_stream.opaque = NULL;
    d_stream.next_in  = in_buf;
    d_stream.avail_in = 0;
    d_stream.next_out = out_buf;

    if((err=inflateInit2(&d_stream, 47)) != Z_OK) {
        return err;
    }
    while(d_stream.total_out < out_buf_size-1 && d_stream.total_in < in_size) {
        d_stream.avail_in = d_stream.avail_out = 1;
        if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) {
            break;
        }
        if(err != Z_OK) {
            return err;
        }
    }

    if((err=inflateEnd(&d_stream)) != Z_OK) {
        return err;
    }

    *out_size = d_stream.total_out;
    ((char*)out_buf)[*out_size] = '\0';

    return Z_OK;
}

下面一段代码提供了请求API,判断其是否被gzip压缩并解压的函数例子:

size_t network_https_request(void *out_buf, size_t out_buf_size, const char *host, const char *url, const void *ca_cert_begin, const void* ca_cert_end)
{
    size_t out_size = 0;

    esp_tls_cfg_t tls_cfg_ca_cert = {
        .cacert_buf = ca_cert_begin,
        .cacert_bytes = ca_cert_end - ca_cert_begin,
    };

    /* 申请内存 */
    void *http_response = malloc(out_buf_size);
    if(http_response == NULL) {
        ESP_LOGE(TAG, "error while allocting memory for https request");
        out_size = -1;
        goto exit;
    }
    /* 发送https请求 */
    size_t http_resp_size = network_tls_transfer(&tls_cfg_ca_cert, http_response, out_buf_size, host, url);
    if(http_resp_size == 0) {
        out_size = -1;
        goto exit;
    }

    /* 取得响应的长度与起始地址 */
    void *resp_body = strstr(http_response, "\r\n\r\n")+strlen("\r\n\r\n");
    size_t resp_length;
    char *length_string = strcasestr(http_response, "Content-Length:");
    if(length_string) {
        sscanf(length_string, "%*s%d", &resp_length);
    } else {
        ESP_LOGE(TAG, "unsupported chunked transfer encoding");
        goto exit;
    }

    /* 检查响应内容是否被gzip压缩 */
    uint8_t gzip_encoded = 0;
    char cotent_encoding_string[16];
    char *content_encoding_line = strcasestr(http_response, "Content-Encoding:");
    if(content_encoding_line) {
        sscanf(content_encoding_line, "%*s%s", cotent_encoding_string);
        if(strcasestr(cotent_encoding_string, "gzip")) {
            gzip_encoded = 1;
        }
    }
    /* 判断内容是否过长 */
    ESP_LOGI(TAG, "https response length: %d bytes", resp_length);
    if(resp_length > out_buf_size-(resp_body-http_response)) {
        resp_length = out_buf_size-(resp_body-http_response);
        ESP_LOGW(TAG, "response too long, shrinking to %d bytes", resp_length);
        if(gzip_encoded) {
            ESP_LOGE(TAG, "gzip decode is not possible on shrinked buffer");
            goto exit;
        }
    }

    if(gzip_encoded) { //gzip压缩后的响应内容
        ESP_LOGD(TAG, "gzip encoded response, decompressing...");
        /* 解压请求内容 */
        int ret = network_gzip_decompress(resp_body, resp_length, out_buf, &out_size, out_buf_size);
        if(ret != ESP_OK) {
            out_size = -1;
            ESP_LOGE(TAG, "gzip data decompression failed, code=%d", ret);
        }
        ESP_LOGD(TAG, "response size after decompression: %d bytes", out_size);
    } else { //无压缩的响应内容
        memcpy(out_buf, resp_body, resp_length);
        out_size = resp_length;
    }

exit:
    free(http_response);
    return out_size;
}

其中,network_tls_transfer()函数的原型如下:

static const char HTTPS_REQUEST[] = "GET %s HTTP/1.1\r\n"
                                    "Host: %s\r\n"
                                    "User-Agent: ESP32 HTTP Client/1.0\r\n"
                                    "\r\n";

static size_t network_tls_transfer(esp_tls_cfg_t *cfg, void *out_buf, size_t out_buf_size, const char *host, const char *url)
{
    size_t out_size = 0;
    /* 建立新的TLS连接 */
    struct esp_tls *tls = esp_tls_init();
    if (tls == NULL) {
        ESP_LOGE(TAG, "error while initialize tls connection");
        goto exit;
    }

    if(esp_tls_conn_new_sync(host, strlen(host), 443, cfg, tls) == 0) {
        ESP_LOGE(TAG, "error while creating tls connection");
        goto exit;
    }
    ESP_LOGI(TAG, "connected to %s, sending request...", host);

    /* 构造http请求 */
    int request_len = snprintf(out_buf, out_buf_size, HTTPS_REQUEST, url, host);
    /* 通过TLS连接发送数据 */
    size_t written = 0;
    do {
        ssize_t ret = esp_tls_conn_write(tls, out_buf + written, request_len - written); //发送数据
        if (ret >= 0) { //发送成功 返回实际写入的数据大小
            written += ret;
        } else if (ret != ESP_TLS_ERR_SSL_WANT_READ && ret != ESP_TLS_ERR_SSL_WANT_WRITE) { //发送失败
            ESP_LOGE(TAG, "error while sending request");
            goto exit;
        }
    } while(written < request_len);

    ESP_LOGI(TAG, "request sent, reading response...");
    /* 通过TLS连接接收数据 */
    size_t remaining = out_buf_size - 1; //缓冲区剩余大小
    do {
        ssize_t ret = esp_tls_conn_read(tls, out_buf+(out_buf_size-remaining-1), remaining); //接收数据
        if(ret == ESP_TLS_ERR_SSL_WANT_WRITE || ret == ESP_TLS_ERR_SSL_WANT_READ) {
            continue;
        } else if (ret < 0) { //接收错误
            ESP_LOGE(TAG, "error while reading request");
            break;
        } else if (ret == 0) { //接收完毕
            out_size = out_buf_size - remaining - 1;
            ESP_LOGI(TAG, "connection closed, %d bytes read", out_size);
            break;
        }
        remaining -= ret; //剩余缓冲区空间减去本次接收到的大小
    } while(1);

exit:
    esp_tls_conn_delete(tls);
    ((char*)out_buf)[out_size] = '\0';
    return out_size;
}

程序在ESP32-C3上的实际运行效果如下。从日志中可以看到,gzip压缩将416字节的响应压缩到了304字节,确实可以节约许多流量。

https://img.yuanze.wang/posts/esp32-unzip-gzip-http-response/log.png
运行日志