#include "log.h" #include "arena.h" #include "macros.h" #include "sha1.h" #include "net.h" #include /* for dirname/basename */ #include #include #include #include /* TODO: also support etag/if-not-match rather than just last-modified/if-not-modified */ /* memory usage is about 3 * strlen(path) -- what da heeeellll */ static char *derive_meta_path(vl_arena *arena, const char *path) { char *base = basename(vl_arena_strdup(arena, path)); char *dname = dirname(vl_arena_strdup(arena, path)); return vl_arena_sprintf(arena, "%s/.%s.meta", dname, base); } #define NET_BUFSIZE (4096) static int hash_file(const char *fname, vl_sha1 ohash) { FILE *file; unsigned char data[NET_BUFSIZE]; size_t nread; int ret = -1; vl_sha1_st hashst; file = fopen(fname, "rb"); if (!file) { vl_debug("failed to hash file %s: %s", fname, strerror(errno)); return -1; } vl_sha1_init(&hashst); while ((nread = fread(data, 1, NET_BUFSIZE, file)) > 0) { vl_sha1_update(&hashst, data, nread); } if (ferror(file)) { vl_debug("failed to read file %s :(", fname); goto cleanup; } vl_sha1_finalize(&hashst, ohash); ret = 0; cleanup: fclose(file); return ret; } static int check_cache_consistent(vl_arena *arena, const char *path, const char *meta_path, struct curl_slist **headers) { json_error_t jerr; int ret = -1; json_t *j = json_load_file(meta_path, 0, &jerr); char *header = NULL; vl_trace("checking meta file %s", meta_path); if (!j) { vl_debug("failed to load json meta %s(%d:%d:%d): %s", jerr.source, jerr.line, jerr.column, jerr.position, jerr.text); goto cleanup; } const char *lm; const char *sha1_hex; size_t sha1_len; if (json_unpack_ex(j, &jerr, JSON_STRICT, "{s:s, s:s%}", "lm", &lm, "sha1", &sha1_hex, &sha1_len) < 0) { vl_debug("failed to unpack json meta %s: %s", meta_path, jerr.text); goto cleanup; } if (sha1_len != VL_SHA1_DIGEST_HEX_STRLEN) { vl_debug("failed to read json meta %s: invalid sha1 digest (is %zu chars long, expected %u)", meta_path, sha1_len, VL_SHA1_DIGEST_HEX_STRLEN); goto cleanup; } vl_sha1 hashgot, hashexp; if (vl_sha1_decode(hashexp, sha1_hex) < 0) { vl_debug("failed to read json meta %s: invalid sha1 digest (bad format)", meta_path); goto cleanup; } if (hash_file(path, hashgot) < 0) { goto cleanup; } if (memcmp(hashexp, hashgot, sizeof(vl_sha1)) != 0) { #ifdef LOG_DEBUG_ENABLED char hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; char hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0; hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0; vl_sha1_encode(hashexp, hash_exp_hex); vl_sha1_encode(hashgot, hash_got_hex); vl_debug("file %s tampered on disk (sha1 expect %s, got %s)", path, hash_exp_hex, hash_got_hex); #endif goto cleanup; } header = vl_arena_sprintf(arena, "If-Modified-Since: %s", lm); *headers = curl_slist_append(*headers, header); ret = 0; cleanup: if (j) json_decref(j); return ret; } static void write_transfer_meta(CURL *easy, vl_sha1 ohash, const char *meta_path) { json_t *metaj = NULL; struct curl_header *hdr; CURLHcode hcode; char hash_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; vl_trace("writing transfer meta to %s", meta_path); hash_hex[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; hcode = curl_easy_header(easy, "Last-Modified", 0, CURLH_HEADER, -1, &hdr); if (hcode != CURLHE_OK) { vl_debug("Not writing meta %s: curl_easy_header(Last-Modified): %u", meta_path, hcode); goto cleanup; } vl_sha1_encode(ohash, hash_hex); metaj = json_pack("{s:s, s:s}", "lm", hdr->value, "sha1", hash_hex); if (!metaj) { vl_debug("Not writing meta %s: json_pack returned NULL (weird)", meta_path); goto cleanup; } if (json_dump_file(metaj, meta_path, JSON_COMPACT) < 0) { vl_debug("Failed writing meta to %s: json_dump_file", meta_path); goto cleanup; } cleanup: if (metaj) json_decref(metaj); } struct write_ctx { const char *opath; FILE *ofile; vl_sha1_st sha1_state; }; static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user) { struct write_ctx *ctx = user; if (sz * nmemb == 0) return 0; /* Note that the output file is opened lazily, because we might receive a 304 (Not Modified) * response with an empty body (in such a case, this function could be called with nmemb == 0). * fopen for writing truncates the file, so we can't open the file in the main function. */ if (!ctx->ofile) { ctx->ofile = fopen(ctx->opath, "wb"); if (!ctx->ofile) { vl_warn("Failed to open output file: fopen(%s, wb): %s", ctx->opath, strerror(errno)); return CURL_WRITEFUNC_ERROR; } } vl_sha1_update(&ctx->sha1_state, ptr, sz * nmemb); return fwrite(ptr, sz, nmemb, ctx->ofile); } int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_path) { char *meta_path = derive_meta_path(arena, target_path); int ret = -1; char errbuf[CURL_ERROR_SIZE]; CURLcode ecode; CURL *easy = NULL; struct curl_slist *headers = NULL; struct write_ctx wrctx = { .opath = target_path, .ofile = NULL }; vl_sha1 ohash; long response_code; vl_trace("Downloading cached file from %s to %s (%s)", url, target_path, meta_path); check_cache_consistent(arena, target_path, meta_path, &headers); easy = curl_easy_init(); if (!easy) { vl_warn("Failed to set up CURL handle to download %s!", url); goto cleanup; } errbuf[0] = '\0'; if ((ecode = curl_easy_setopt(easy, CURLOPT_ERRORBUFFER, errbuf)) != CURLE_OK) { vl_warn("curl_easy_setopt(CURLOPT_ERRORBUFFER) failed: %s", curl_easy_strerror(ecode)); goto cleanup; } #define CHECK(_ex) do { \ if ((ecode = (_ex)) != CURLE_OK) { \ vl_warn("%s failed: %s (%s)", #_ex, curl_easy_strerror(ecode), errbuf); \ goto cleanup; \ } \ } while (0) CHECK(curl_easy_setopt(easy, CURLOPT_URL, url)); CHECK(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); CHECK(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); CHECK(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); CHECK(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60)); if (headers) { CHECK(curl_easy_setopt(easy, CURLOPT_HTTPHEADER, headers)); } vl_sha1_init(&wrctx.sha1_state); CHECK(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); CHECK(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); CHECK(curl_easy_perform(easy)); CHECK(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); #undef CHECK if (response_code == 304) { vl_info("Downloaded file %s not modified.", target_path); ret = 0; goto cleanup; } else if (response_code / 100 == 2) { vl_trace("download %s success: %ld", url, response_code); } else { vl_warn("Bad HTTP response code %ld downloading %s", response_code, url); goto cleanup; } if (wrctx.ofile) { fclose(wrctx.ofile); wrctx.ofile = NULL; } vl_sha1_finalize(&wrctx.sha1_state, ohash); write_transfer_meta(easy, ohash, meta_path); ret = 0; cleanup: if (easy) curl_easy_cleanup(easy); if (headers) curl_slist_free_all(headers); if (wrctx.ofile) { if (ret < 0 && remove(target_path) < 0) { vl_debug("... failed to clean up after failed download: unlink(%s): %s", target_path, strerror(errno)); } fclose(wrctx.ofile); } return ret; }