diff options
| -rw-r--r-- | lib/include/net.h | 58 | ||||
| -rw-r--r-- | lib/net.c | 280 |
2 files changed, 307 insertions, 31 deletions
diff --git a/lib/include/net.h b/lib/include/net.h index d5f1482..5ee7fc6 100644 --- a/lib/include/net.h +++ b/lib/include/net.h @@ -3,6 +3,62 @@ #include "arena.h" -int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_path); +enum { + /* The operation completed successfully */ + NET_OK = 0, + + /* There was some kind of error relating to I/O on the local system */ + NET_EIO = -1, + + /* There was some kind of network error downloading the file. */ + NET_ENETWORK = -2, + + /* The server gave us a bad status code while downloading the file. */ + NET_ESTATUS = -3, + + /* An unrecoverable integrity error occurred. */ + NET_EINTEGRITY = -4, + + /* An unspecified error occurred. */ + NET_EUNSPEC = -5, + + /* An unspecified (remote) error occurred. */ + NET_EREMOTEUNSPEC = -6 +}; + +enum { + VERIFY_SIZE = 1u, + VERIFY_SHA1 = 2u +}; + +/* Ensures the latest version of the file available at 'url' is downloaded to 'target_path'. + * Will avoid downloading the file if it hasn't changed. + * + * Never returns NET_EINTEGRITY. */ +int vl_net_ensure_cached(vl_arena *scratch, const char *url, const char *target_path); + +/* Will verify that the file at 'target_path' hasn't been tampered with since being updated + * by vl_net_ensure_cached. + * + * Returns NET_EIO if the file doesn't exist or couldn't be read. + * Returns NET_EINTEGRITY if the file on disk doesn't match. */ +int vl_net_verify_cached(vl_arena *scratch, const char *target_path); + +/* Downloads a file for which you know the integrity information (SHA1 and/or size) from 'url' to + * 'target_path'. Will not download the file if the file already exists with correct SHA1 and size + * (as specified). + * + * Pass variadic arguments in the following order: + * - size_t size (if VERIFY_SIZE is set) + * - const uint8_t *hash (if VERIFY_SHA1 is set) + * + * Returns NET_EINTEGRITY if the file at 'url' doesn't match the provided integrity information. */ +int vl_net_ensure_verified(const char *url, const char *target_path, unsigned flags, ...); + +/* See vl_net_ensure_verified for information on the variadic argument list. + * + * Returns NET_EINTEGRITY if the file at 'target_path' doesn't match the provided integrity information. + * Returns NET_EIO if the downloaded file is not readable or could not be verified due to an I/O issue. */ +int vl_net_verify(const char *target_path, unsigned flags, ...); #endif @@ -9,9 +9,39 @@ #include <jansson.h> #include <errno.h> #include <string.h> +#include <stdarg.h> +#include <unistd.h> /* TODO: also support etag/if-not-match rather than just last-modified/if-not-modified */ +static int translate_curlcode(CURLcode code) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch-enum" + switch (code) { +#pragma GCC diagnostic pop + case CURLE_COULDNT_RESOLVE_PROXY: + case CURLE_COULDNT_RESOLVE_HOST: + case CURLE_COULDNT_CONNECT: + case CURLE_OPERATION_TIMEDOUT: + case CURLE_SEND_ERROR: + case CURLE_RECV_ERROR: + return NET_ENETWORK; + case CURLE_WEIRD_SERVER_REPLY: + case CURLE_PARTIAL_FILE: + case CURLE_TOO_MANY_REDIRECTS: + case CURLE_GOT_NOTHING: + return NET_EREMOTEUNSPEC; + case CURLE_HTTP_RETURNED_ERROR: + case CURLE_REMOTE_FILE_NOT_FOUND: + return NET_ESTATUS; + case CURLE_READ_ERROR: + case CURLE_WRITE_ERROR: + return NET_EIO; + default: + return NET_EUNSPEC; + } +} + /* memory usage is about 3 * strlen(path) -- what da heeeellll */ static char *derive_meta_path(vl_arena *arena, const char *path) { @@ -22,34 +52,40 @@ static char *derive_meta_path(vl_arena *arena, const char *path) #define NET_BUFSIZE (4096) -static int hash_file(const char *fname, vl_sha1 ohash) +static int hash_file(const char *fname, vl_sha1 ohash, size_t *osz) { FILE *file; unsigned char data[NET_BUFSIZE]; - size_t nread; - int ret = -1; + size_t nread, total = 0; + int ret; vl_sha1_st hashst; file = fopen(fname, "rb"); if (!file) { vl_debug("failed to hash file %s: %s", fname, strerror(errno)); - return -1; + return NET_EIO; } vl_sha1_init(&hashst); while ((nread = fread(data, 1, NET_BUFSIZE, file)) > 0) { vl_sha1_update(&hashst, data, nread); + total += nread; } if (ferror(file)) { vl_debug("failed to read file %s :(", fname); + ret = NET_EIO; goto cleanup; } vl_sha1_finalize(&hashst, ohash); - ret = 0; + if (osz) { + *osz = total; + } + + ret = NET_OK; cleanup: fclose(file); @@ -59,7 +95,8 @@ cleanup: static int check_cache_consistent(vl_arena *arena, const char *path, const char *meta_path, struct curl_slist **headers) { json_error_t jerr; - int ret = -1; + int ret = NET_EUNSPEC; + int ret2; json_t *j = json_load_file(meta_path, 0, &jerr); char *header = NULL; @@ -75,7 +112,6 @@ static int check_cache_consistent(vl_arena *arena, const char *path, const char size_t sha1_len; if (json_unpack_ex(j, &jerr, JSON_STRICT, "{s:s, s:s%}", "lm", &lm, "sha1", &sha1_hex, &sha1_len) < 0) { - vl_debug("failed to unpack json meta %s: %s", meta_path, jerr.text); goto cleanup; } @@ -90,7 +126,8 @@ static int check_cache_consistent(vl_arena *arena, const char *path, const char goto cleanup; } - if (hash_file(path, hashgot) < 0) { + if ((ret2 = hash_file(path, hashgot, NULL)) < 0) { + ret = ret2; goto cleanup; } @@ -113,7 +150,7 @@ static int check_cache_consistent(vl_arena *arena, const char *path, const char header = vl_arena_sprintf(arena, "If-Modified-Since: %s", lm); *headers = curl_slist_append(*headers, header); - ret = 0; + ret = NET_OK; cleanup: if (j) json_decref(j); @@ -158,6 +195,7 @@ struct write_ctx const char *opath; FILE *ofile; vl_sha1_st sha1_state; + size_t total_read; }; static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user) @@ -170,6 +208,11 @@ static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user) * fopen for writing truncates the file, so we can't open the file in the main function. */ if (!ctx->ofile) { + if (!ctx->opath) { + vl_debug("downloaded file has either an open file handle nor a file path??"); + return CURL_WRITEFUNC_ERROR; + } + ctx->ofile = fopen(ctx->opath, "wb"); if (!ctx->ofile) { vl_warn("Failed to open output file: fopen(%s, wb): %s", ctx->opath, strerror(errno)); @@ -177,19 +220,28 @@ static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user) } } + ctx->total_read += sz * nmemb; vl_sha1_update(&ctx->sha1_state, ptr, sz * nmemb); return fwrite(ptr, sz, nmemb, ctx->ofile); } +#define CHECK(_ex, _exs, _cv, _ebv, _cu, _rv) do { \ + if ((_cv = (_ex)) != CURLE_OK) { \ + vl_warn("%s failed: %s (%s)", _exs, curl_easy_strerror(_cv), _ebv); \ + _rv = translate_curlcode(_cv); \ + goto _cu; \ + } \ +} while (0) + int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_path) { char *meta_path = derive_meta_path(arena, target_path); - int ret = -1; + int ret = NET_EUNSPEC; char errbuf[CURL_ERROR_SIZE]; CURLcode ecode; CURL *easy = NULL; struct curl_slist *headers = NULL; - struct write_ctx wrctx = { .opath = target_path, .ofile = NULL }; + struct write_ctx wrctx = { .opath = target_path, .ofile = NULL, .total_read = 0 }; vl_sha1 ohash; long response_code; @@ -210,42 +262,39 @@ int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_pa goto cleanup; } -#define CHECK(_ex) do { \ - if ((ecode = (_ex)) != CURLE_OK) { \ - vl_warn("%s failed: %s (%s)", #_ex, curl_easy_strerror(ecode), errbuf); \ - goto cleanup; \ - } \ -} while (0) +#define C(_ex) CHECK(_ex, #_ex, ecode, errbuf, cleanup, ret) - CHECK(curl_easy_setopt(easy, CURLOPT_URL, url)); - CHECK(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); - CHECK(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); - CHECK(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); - CHECK(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60)); + C(curl_easy_setopt(easy, CURLOPT_URL, url)); + C(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); + C(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); + C(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); + C(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60L)); + C(curl_easy_setopt(easy, CURLOPT_FAILONERROR, 1L)); if (headers) { - CHECK(curl_easy_setopt(easy, CURLOPT_HTTPHEADER, headers)); + C(curl_easy_setopt(easy, CURLOPT_HTTPHEADER, headers)); } vl_sha1_init(&wrctx.sha1_state); - CHECK(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); - CHECK(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); + C(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); + C(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); - CHECK(curl_easy_perform(easy)); + C(curl_easy_perform(easy)); - CHECK(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); + C(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); -#undef CHECK +#undef C if (response_code == 304) { vl_info("Downloaded file %s not modified.", target_path); - ret = 0; + ret = NET_OK; goto cleanup; } else if (response_code / 100 == 2) { vl_trace("download %s success: %ld", url, response_code); } else { vl_warn("Bad HTTP response code %ld downloading %s", response_code, url); + ret = NET_ESTATUS; goto cleanup; } @@ -257,18 +306,189 @@ int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_pa vl_sha1_finalize(&wrctx.sha1_state, ohash); write_transfer_meta(easy, ohash, meta_path); - ret = 0; + ret = NET_OK; cleanup: if (easy) curl_easy_cleanup(easy); if (headers) curl_slist_free_all(headers); if (wrctx.ofile) { + fclose(wrctx.ofile); if (ret < 0 && remove(target_path) < 0) { vl_debug("... failed to clean up after failed download: unlink(%s): %s", target_path, strerror(errno)); } + } + + return ret; +} + +static int cmp_integrity(const vl_sha1 hash_actual, size_t sz_actual, unsigned flags, va_list ap) +{ + if (flags & VERIFY_SIZE) { + size_t sz_exp = va_arg(ap, size_t); + if (sz_exp != sz_actual) { + vl_debug("Size mismatch: expected %zu bytes, got %zu bytes.", sz_exp, sz_actual); + return NET_EINTEGRITY; + } + } + + if (flags & VERIFY_SHA1) { + const uint8_t *sha1_exp = va_arg(ap, const uint8_t *); + if (memcmp(sha1_exp, hash_actual, sizeof(vl_sha1))) { +#ifdef LOG_DEBUG_ENABLED + char hex_exp[VL_SHA1_DIGEST_HEX_STRLEN + 1]; + char hex_got[VL_SHA1_DIGEST_HEX_STRLEN + 1]; + hex_exp[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; + hex_got[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; + + vl_sha1_encode(sha1_exp, hex_exp); + vl_sha1_encode(hash_actual, hex_got); + + vl_debug("SHA1 mismatch: expected %s, got %s.", hex_exp, hex_got); +#endif + return NET_EINTEGRITY; + } + } + + return NET_OK; +} + +static int verifyv(const char *target_path, unsigned flags, va_list ap) +{ + vl_sha1 hash; + size_t sz = 0; + int ret = NET_EUNSPEC; + + if (flags == 0) { + /* no need to open the file -- we don't care about its size or anything */ + if (access(target_path, R_OK) < 0) { + return NET_EIO; + } + return NET_OK; + } + + if ((ret = hash_file(target_path, hash, &sz)) != NET_OK) { + return ret; + } + + return cmp_integrity(hash, sz, flags, ap); +} + +int vl_net_verify(const char *target_path, unsigned flags, ...) +{ + int ret = 0; + va_list ap; + + va_start(ap, flags); + ret = verifyv(target_path, flags, ap); + va_end(ap); + + return ret; +} + +static int download_verified(const char *url, const char *target, unsigned flags, va_list ap) +{ + CURL *easy = NULL; + char errbuf[CURL_ERROR_SIZE]; + int ret = NET_EUNSPEC; + struct write_ctx wrctx = { .opath = NULL, .ofile = NULL }; + long response_code = 0; + vl_sha1 ohash; + CURLcode ecode; + + vl_debug("Downloading %s to %s", url, target); + + easy = curl_easy_init(); + if (!easy) { + vl_warn("Failed to create easy handle for %s", url); + goto cleanup; + } + + errbuf[0] = '\0'; + + if ((ecode = curl_easy_setopt(easy, CURLOPT_ERRORBUFFER, errbuf)) != CURLE_OK) { + vl_warn("curl_easy_setopt(CURLOPT_ERRORBUFFER) failed: %s", curl_easy_strerror(ecode)); + goto cleanup; + } + +#define C(_ex) CHECK(_ex, #_ex, ecode, errbuf, cleanup, ret) + + C(curl_easy_setopt(easy, CURLOPT_URL, url)); + C(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); + C(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); + C(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); + C(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60L)); + C(curl_easy_setopt(easy, CURLOPT_FAILONERROR, 1L)); + + vl_sha1_init(&wrctx.sha1_state); + + wrctx.ofile = fopen(target, "wb"); + if (!wrctx.ofile) { + vl_warn("Failed to open output file %s: fopen: %s", target, strerror(errno)); + ret = NET_EIO; + goto cleanup; + } + + C(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); + C(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); + + C(curl_easy_perform(easy)); + fclose(wrctx.ofile); + wrctx.ofile = NULL; + + C(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); + +#undef C + + if (response_code / 100 == 2) { + vl_trace("download %s success: %ld", url, response_code); + } else { /* shouldn't ever happen (CURLOPT_FAILONERROR) */ + vl_warn("Bad HTTP status code %ld downloading %s", response_code, url); + ret = NET_ESTATUS; + goto cleanup; + } + + vl_sha1_finalize(&wrctx.sha1_state, ohash); + + ret = cmp_integrity(ohash, wrctx.total_read, flags, ap); + + if (ret == NET_EINTEGRITY) { + if (remove(target) < 0) { + vl_warn("Failed to remove %s with bad integrity: remove: %s", target, strerror(errno)); + } + } +cleanup: + if (easy) curl_easy_cleanup(easy); + if (wrctx.ofile) { fclose(wrctx.ofile); + if (ret != NET_OK && remove(target) < 0) { + vl_debug("...failed to clean up after failed download: remove(%s): %s", target, strerror(errno)); + } + } + + return ret; +} + +int vl_net_ensure_verified(const char *url, const char *target_path, unsigned flags, ...) +{ + int ret; + va_list ap; + + vl_trace("ensure verified %s to %s", url, target_path); + + va_start(ap, flags); + ret = verifyv(target_path, flags, ap); + va_end(ap); + + if (ret == NET_OK) { + vl_debug("No need to download %s - integrity matches.", url); + return NET_OK; } + va_start(ap, flags); + ret = download_verified(url, target_path, flags, ap); + va_end(ap); + return ret; } + |
