#include "log.h" #include "arena.h" #include "macros.h" #include "sha1.h" #include "net.h" #include /* for dirname/basename */ #include #include #include #include #include #include /* TODO: also support etag/if-not-match rather than just last-modified/if-not-modified */ static int translate_curlcode(CURLcode code) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch-enum" switch (code) { #pragma GCC diagnostic pop case CURLE_COULDNT_RESOLVE_PROXY: case CURLE_COULDNT_RESOLVE_HOST: case CURLE_COULDNT_CONNECT: case CURLE_OPERATION_TIMEDOUT: case CURLE_SEND_ERROR: case CURLE_RECV_ERROR: return NET_ENETWORK; case CURLE_WEIRD_SERVER_REPLY: case CURLE_PARTIAL_FILE: case CURLE_TOO_MANY_REDIRECTS: case CURLE_GOT_NOTHING: return NET_EREMOTEUNSPEC; case CURLE_HTTP_RETURNED_ERROR: case CURLE_REMOTE_FILE_NOT_FOUND: return NET_ESTATUS; case CURLE_READ_ERROR: case CURLE_WRITE_ERROR: return NET_EIO; default: return NET_EUNSPEC; } } /* memory usage is about 3 * strlen(path) -- what da heeeellll */ static char *derive_meta_path(vl_arena *arena, const char *path) { char *base = basename(vl_arena_strdup(arena, path)); char *dname = dirname(vl_arena_strdup(arena, path)); return vl_arena_sprintf(arena, "%s/.%s.meta", dname, base); } #define NET_BUFSIZE (4096) static int hash_file(const char *fname, vl_sha1 ohash, size_t *osz) { FILE *file; unsigned char data[NET_BUFSIZE]; size_t nread, total = 0; int ret; vl_sha1_st hashst; file = fopen(fname, "rb"); if (!file) { vl_debug("failed to hash file %s: %s", fname, strerror(errno)); return NET_EIO; } vl_sha1_init(&hashst); while ((nread = fread(data, 1, NET_BUFSIZE, file)) > 0) { vl_sha1_update(&hashst, data, nread); total += nread; } if (ferror(file)) { vl_debug("failed to read file %s :(", fname); ret = NET_EIO; goto cleanup; } vl_sha1_finalize(&hashst, ohash); if (osz) { *osz = total; } ret = NET_OK; cleanup: fclose(file); return ret; } static int check_cache_consistent(vl_arena *arena, const char *path, const char *meta_path, struct curl_slist **headers) { json_error_t jerr; int ret = NET_EUNSPEC; int ret2; json_t *j = json_load_file(meta_path, 0, &jerr); char *header = NULL; vl_trace("checking meta file %s", meta_path); if (!j) { vl_debug("failed to load json meta %s(%d:%d:%d): %s", jerr.source, jerr.line, jerr.column, jerr.position, jerr.text); goto cleanup; } const char *lm; const char *sha1_hex; size_t sha1_len; if (json_unpack_ex(j, &jerr, JSON_STRICT, "{s:s, s:s%}", "lm", &lm, "sha1", &sha1_hex, &sha1_len) < 0) { goto cleanup; } if (sha1_len != VL_SHA1_DIGEST_HEX_STRLEN) { vl_debug("failed to read json meta %s: invalid sha1 digest (is %zu chars long, expected %u)", meta_path, sha1_len, VL_SHA1_DIGEST_HEX_STRLEN); goto cleanup; } vl_sha1 hashgot, hashexp; if (vl_sha1_decode(hashexp, sha1_hex) < 0) { vl_debug("failed to read json meta %s: invalid sha1 digest (bad format)", meta_path); goto cleanup; } if ((ret2 = hash_file(path, hashgot, NULL)) < 0) { ret = ret2; goto cleanup; } if (memcmp(hashexp, hashgot, sizeof(vl_sha1)) != 0) { #ifdef LOG_DEBUG_ENABLED char hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; char hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0; hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0; vl_sha1_encode(hashexp, hash_exp_hex); vl_sha1_encode(hashgot, hash_got_hex); vl_debug("file %s tampered on disk (sha1 expect %s, got %s)", path, hash_exp_hex, hash_got_hex); #endif goto cleanup; } header = vl_arena_sprintf(arena, "If-Modified-Since: %s", lm); *headers = curl_slist_append(*headers, header); ret = NET_OK; cleanup: if (j) json_decref(j); return ret; } static void write_transfer_meta(CURL *easy, vl_sha1 ohash, const char *meta_path) { json_t *metaj = NULL; struct curl_header *hdr; CURLHcode hcode; char hash_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; vl_trace("writing transfer meta to %s", meta_path); hash_hex[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; hcode = curl_easy_header(easy, "Last-Modified", 0, CURLH_HEADER, -1, &hdr); if (hcode != CURLHE_OK) { vl_debug("Not writing meta %s: curl_easy_header(Last-Modified): %u", meta_path, hcode); goto cleanup; } vl_sha1_encode(ohash, hash_hex); metaj = json_pack("{s:s, s:s}", "lm", hdr->value, "sha1", hash_hex); if (!metaj) { vl_debug("Not writing meta %s: json_pack returned NULL (weird)", meta_path); goto cleanup; } if (json_dump_file(metaj, meta_path, JSON_COMPACT) < 0) { vl_debug("Failed writing meta to %s: json_dump_file", meta_path); goto cleanup; } cleanup: if (metaj) json_decref(metaj); } struct write_ctx { const char *opath; FILE *ofile; vl_sha1_st sha1_state; size_t total_read; }; static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user) { struct write_ctx *ctx = user; if (sz * nmemb == 0) return 0; /* Note that the output file is opened lazily, because we might receive a 304 (Not Modified) * response with an empty body (in such a case, this function could be called with nmemb == 0). * fopen for writing truncates the file, so we can't open the file in the main function. */ if (!ctx->ofile) { if (!ctx->opath) { vl_debug("downloaded file has either an open file handle nor a file path??"); return CURL_WRITEFUNC_ERROR; } ctx->ofile = fopen(ctx->opath, "wb"); if (!ctx->ofile) { vl_warn("Failed to open output file: fopen(%s, wb): %s", ctx->opath, strerror(errno)); return CURL_WRITEFUNC_ERROR; } } ctx->total_read += sz * nmemb; vl_sha1_update(&ctx->sha1_state, ptr, sz * nmemb); return fwrite(ptr, sz, nmemb, ctx->ofile); } #define CHECK(_ex, _exs, _cv, _ebv, _cu, _rv) do { \ if ((_cv = (_ex)) != CURLE_OK) { \ vl_warn("%s failed: %s (%s)", _exs, curl_easy_strerror(_cv), _ebv); \ _rv = translate_curlcode(_cv); \ goto _cu; \ } \ } while (0) int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_path) { char *meta_path = derive_meta_path(arena, target_path); int ret = NET_EUNSPEC; char errbuf[CURL_ERROR_SIZE]; CURLcode ecode; CURL *easy = NULL; struct curl_slist *headers = NULL; struct write_ctx wrctx = { .opath = target_path, .ofile = NULL, .total_read = 0 }; vl_sha1 ohash; long response_code; vl_trace("Downloading cached file from %s to %s (%s)", url, target_path, meta_path); check_cache_consistent(arena, target_path, meta_path, &headers); easy = curl_easy_init(); if (!easy) { vl_warn("Failed to set up CURL handle to download %s!", url); goto cleanup; } errbuf[0] = '\0'; if ((ecode = curl_easy_setopt(easy, CURLOPT_ERRORBUFFER, errbuf)) != CURLE_OK) { vl_warn("curl_easy_setopt(CURLOPT_ERRORBUFFER) failed: %s", curl_easy_strerror(ecode)); goto cleanup; } #define C(_ex) CHECK(_ex, #_ex, ecode, errbuf, cleanup, ret) C(curl_easy_setopt(easy, CURLOPT_URL, url)); C(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); C(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); C(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); C(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60L)); C(curl_easy_setopt(easy, CURLOPT_FAILONERROR, 1L)); if (headers) { C(curl_easy_setopt(easy, CURLOPT_HTTPHEADER, headers)); } vl_sha1_init(&wrctx.sha1_state); C(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); C(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); C(curl_easy_perform(easy)); C(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); #undef C if (response_code == 304) { vl_info("Downloaded file %s not modified.", target_path); ret = NET_OK; goto cleanup; } else if (response_code / 100 == 2) { vl_trace("download %s success: %ld", url, response_code); } else { vl_warn("Bad HTTP response code %ld downloading %s", response_code, url); ret = NET_ESTATUS; goto cleanup; } if (wrctx.ofile) { fclose(wrctx.ofile); wrctx.ofile = NULL; } vl_sha1_finalize(&wrctx.sha1_state, ohash); write_transfer_meta(easy, ohash, meta_path); ret = NET_OK; cleanup: if (easy) curl_easy_cleanup(easy); if (headers) curl_slist_free_all(headers); if (wrctx.ofile) { fclose(wrctx.ofile); if (ret < 0 && remove(target_path) < 0) { vl_debug("... failed to clean up after failed download: unlink(%s): %s", target_path, strerror(errno)); } } return ret; } static int cmp_integrity(const vl_sha1 hash_actual, size_t sz_actual, unsigned flags, va_list ap) { if (flags & VERIFY_SIZE) { size_t sz_exp = va_arg(ap, size_t); if (sz_exp != sz_actual) { vl_debug("Size mismatch: expected %zu bytes, got %zu bytes.", sz_exp, sz_actual); return NET_EINTEGRITY; } } if (flags & VERIFY_SHA1) { const uint8_t *sha1_exp = va_arg(ap, const uint8_t *); if (memcmp(sha1_exp, hash_actual, sizeof(vl_sha1))) { #ifdef LOG_DEBUG_ENABLED char hex_exp[VL_SHA1_DIGEST_HEX_STRLEN + 1]; char hex_got[VL_SHA1_DIGEST_HEX_STRLEN + 1]; hex_exp[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; hex_got[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; vl_sha1_encode(sha1_exp, hex_exp); vl_sha1_encode(hash_actual, hex_got); vl_debug("SHA1 mismatch: expected %s, got %s.", hex_exp, hex_got); #endif return NET_EINTEGRITY; } } return NET_OK; } static int verifyv(const char *target_path, unsigned flags, va_list ap) { vl_sha1 hash; size_t sz = 0; int ret = NET_EUNSPEC; if (flags == 0) { /* no need to open the file -- we don't care about its size or anything */ if (access(target_path, R_OK) < 0) { return NET_EIO; } return NET_OK; } if ((ret = hash_file(target_path, hash, &sz)) != NET_OK) { return ret; } return cmp_integrity(hash, sz, flags, ap); } int vl_net_verify(const char *target_path, unsigned flags, ...) { int ret = 0; va_list ap; va_start(ap, flags); ret = verifyv(target_path, flags, ap); va_end(ap); return ret; } static int download_verified(const char *url, const char *target, unsigned flags, va_list ap) { CURL *easy = NULL; char errbuf[CURL_ERROR_SIZE]; int ret = NET_EUNSPEC; struct write_ctx wrctx = { .opath = NULL, .ofile = NULL }; long response_code = 0; vl_sha1 ohash; CURLcode ecode; vl_debug("Downloading %s to %s", url, target); easy = curl_easy_init(); if (!easy) { vl_warn("Failed to create easy handle for %s", url); goto cleanup; } errbuf[0] = '\0'; if ((ecode = curl_easy_setopt(easy, CURLOPT_ERRORBUFFER, errbuf)) != CURLE_OK) { vl_warn("curl_easy_setopt(CURLOPT_ERRORBUFFER) failed: %s", curl_easy_strerror(ecode)); goto cleanup; } #define C(_ex) CHECK(_ex, #_ex, ecode, errbuf, cleanup, ret) C(curl_easy_setopt(easy, CURLOPT_URL, url)); C(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); C(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); C(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); C(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60L)); C(curl_easy_setopt(easy, CURLOPT_FAILONERROR, 1L)); vl_sha1_init(&wrctx.sha1_state); wrctx.ofile = fopen(target, "wb"); if (!wrctx.ofile) { vl_warn("Failed to open output file %s: fopen: %s", target, strerror(errno)); ret = NET_EIO; goto cleanup; } C(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); C(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); C(curl_easy_perform(easy)); fclose(wrctx.ofile); wrctx.ofile = NULL; C(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); #undef C if (response_code / 100 == 2) { vl_trace("download %s success: %ld", url, response_code); } else { /* shouldn't ever happen (CURLOPT_FAILONERROR) */ vl_warn("Bad HTTP status code %ld downloading %s", response_code, url); ret = NET_ESTATUS; goto cleanup; } vl_sha1_finalize(&wrctx.sha1_state, ohash); ret = cmp_integrity(ohash, wrctx.total_read, flags, ap); if (ret == NET_EINTEGRITY) { if (remove(target) < 0) { vl_warn("Failed to remove %s with bad integrity: remove: %s", target, strerror(errno)); } } cleanup: if (easy) curl_easy_cleanup(easy); if (wrctx.ofile) { fclose(wrctx.ofile); if (ret != NET_OK && remove(target) < 0) { vl_debug("...failed to clean up after failed download: remove(%s): %s", target, strerror(errno)); } } return ret; } int vl_net_ensure_verified(const char *url, const char *target_path, unsigned flags, ...) { int ret; va_list ap; vl_trace("ensure verified %s to %s", url, target_path); va_start(ap, flags); ret = verifyv(target_path, flags, ap); va_end(ap); if (ret == NET_OK) { vl_debug("No need to download %s - integrity matches.", url); return NET_OK; } va_start(ap, flags); ret = download_verified(url, target_path, flags, ap); va_end(ap); return ret; }