From 4cf8b35097a131abcfc8e0d04d35294be13943ac Mon Sep 17 00:00:00 2001 From: bigfoot547 Date: Tue, 6 Jan 2026 03:27:12 -0600 Subject: initial commit --- lib/net.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 lib/net.c (limited to 'lib/net.c') diff --git a/lib/net.c b/lib/net.c new file mode 100644 index 0000000..7509ac0 --- /dev/null +++ b/lib/net.c @@ -0,0 +1,274 @@ +#include "log.h" +#include "arena.h" +#include "macros.h" +#include "sha1.h" +#include "net.h" + +#include /* for dirname/basename */ +#include +#include +#include +#include + +/* TODO: also support etag/if-not-match rather than just last-modified/if-not-modified */ + +/* memory usage is about 3 * strlen(path) -- what da heeeellll */ +static char *derive_meta_path(vl_arena *arena, const char *path) +{ + char *base = basename(vl_arena_strdup(arena, path)); + char *dname = dirname(vl_arena_strdup(arena, path)); + return vl_arena_sprintf(arena, "%s/.%s.meta", dname, base); +} + +#define NET_BUFSIZE (4096) + +static int hash_file(const char *fname, vl_sha1 ohash) +{ + FILE *file; + unsigned char data[NET_BUFSIZE]; + size_t nread; + int ret = -1; + vl_sha1_st hashst; + + file = fopen(fname, "rb"); + + if (!file) { + vl_debug("failed to hash file %s: %s", fname, strerror(errno)); + return -1; + } + + vl_sha1_init(&hashst); + while ((nread = fread(data, 1, NET_BUFSIZE, file)) > 0) { + vl_sha1_update(&hashst, data, nread); + } + + if (ferror(file)) { + vl_debug("failed to read file %s :(", fname); + goto cleanup; + } + + vl_sha1_finalize(&hashst, ohash); + + ret = 0; + +cleanup: + fclose(file); + return ret; +} + +static int check_cache_consistent(vl_arena *arena, const char *path, const char *meta_path, struct curl_slist **headers) +{ + json_error_t jerr; + int ret = -1; + json_t *j = json_load_file(meta_path, 0, &jerr); + char *header = NULL; + + vl_trace("checking meta file %s", meta_path); + + if (!j) { + vl_debug("failed to load json meta %s(%d:%d:%d): %s", jerr.source, jerr.line, jerr.column, jerr.position, jerr.text); + goto cleanup; + } + + const char *lm; + const char *sha1_hex; + size_t sha1_len; + + if (json_unpack_ex(j, &jerr, JSON_STRICT, "{s:s, s:s%}", "lm", &lm, "sha1", &sha1_hex, &sha1_len) < 0) { + vl_debug("failed to unpack json meta %s: %s", meta_path, jerr.text); + goto cleanup; + } + + if (sha1_len != VL_SHA1_DIGEST_HEX_STRLEN) { + vl_debug("failed to read json meta %s: invalid sha1 digest (is %zu chars long, expected %u)", meta_path, sha1_len, VL_SHA1_DIGEST_HEX_STRLEN); + goto cleanup; + } + + vl_sha1 hashgot, hashexp; + if (vl_sha1_decode(hashexp, sha1_hex) < 0) { + vl_debug("failed to read json meta %s: invalid sha1 digest (bad format)", meta_path); + goto cleanup; + } + + if (hash_file(path, hashgot) < 0) { + goto cleanup; + } + + if (memcmp(hashexp, hashgot, sizeof(vl_sha1)) != 0) { +#ifdef LOG_DEBUG_ENABLED + char hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; + char hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; + + hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0; + hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0; + + vl_sha1_encode(hashexp, hash_exp_hex); + vl_sha1_encode(hashgot, hash_got_hex); + + vl_debug("file %s tampered on disk (sha1 expect %s, got %s)", path, hash_exp_hex, hash_got_hex); +#endif + + goto cleanup; + } + + header = vl_arena_sprintf(arena, "If-Modified-Since: %s", lm); + *headers = curl_slist_append(*headers, header); + ret = 0; + +cleanup: + if (j) json_decref(j); + return ret; +} + +static void write_transfer_meta(CURL *easy, vl_sha1 ohash, const char *meta_path) +{ + json_t *metaj = NULL; + struct curl_header *hdr; + CURLHcode hcode; + char hash_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1]; + + vl_trace("writing transfer meta to %s", meta_path); + + hash_hex[VL_SHA1_DIGEST_HEX_STRLEN] = '\0'; + + hcode = curl_easy_header(easy, "Last-Modified", 0, CURLH_HEADER, -1, &hdr); + if (hcode != CURLHE_OK) { + vl_debug("Not writing meta %s: curl_easy_header(Last-Modified): %u", meta_path, hcode); + goto cleanup; + } + + vl_sha1_encode(ohash, hash_hex); + metaj = json_pack("{s:s, s:s}", "lm", hdr->value, "sha1", hash_hex); + if (!metaj) { + vl_debug("Not writing meta %s: json_pack returned NULL (weird)", meta_path); + goto cleanup; + } + + if (json_dump_file(metaj, meta_path, JSON_COMPACT) < 0) { + vl_debug("Failed writing meta to %s: json_dump_file", meta_path); + goto cleanup; + } + +cleanup: + if (metaj) json_decref(metaj); +} + +struct write_ctx +{ + const char *opath; + FILE *ofile; + vl_sha1_st sha1_state; +}; + +static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user) +{ + struct write_ctx *ctx = user; + if (sz * nmemb == 0) return 0; + + /* Note that the output file is opened lazily, because we might receive a 304 (Not Modified) + * response with an empty body (in such a case, this function could be called with nmemb == 0). + * fopen for writing truncates the file, so we can't open the file in the main function. + */ + if (!ctx->ofile) { + ctx->ofile = fopen(ctx->opath, "wb"); + if (!ctx->ofile) { + vl_warn("Failed to open output file: fopen(%s, wb): %s", ctx->opath, strerror(errno)); + return CURL_WRITEFUNC_ERROR; + } + } + + vl_sha1_update(&ctx->sha1_state, ptr, sz * nmemb); + return fwrite(ptr, sz, nmemb, ctx->ofile); +} + +int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_path) +{ + char *meta_path = derive_meta_path(arena, target_path); + int ret = -1; + char errbuf[CURL_ERROR_SIZE]; + CURLcode ecode; + CURL *easy = NULL; + struct curl_slist *headers = NULL; + struct write_ctx wrctx = { .opath = target_path, .ofile = NULL }; + vl_sha1 ohash; + long response_code; + + vl_trace("Downloading cached file from %s to %s (%s)", url, target_path, meta_path); + + check_cache_consistent(arena, target_path, meta_path, &headers); + + easy = curl_easy_init(); + if (!easy) { + vl_warn("Failed to set up CURL handle to download %s!", url); + goto cleanup; + } + + errbuf[0] = '\0'; + + if ((ecode = curl_easy_setopt(easy, CURLOPT_ERRORBUFFER, errbuf)) != CURLE_OK) { + vl_warn("curl_easy_setopt(CURLOPT_ERRORBUFFER) failed: %s", curl_easy_strerror(ecode)); + goto cleanup; + } + +#define CHECK(_ex) do { \ + if ((ecode = (_ex)) != CURLE_OK) { \ + vl_warn("%s failed: %s (%s)", #_ex, curl_easy_strerror(ecode), errbuf); \ + goto cleanup; \ + } \ +} while (0) + + CHECK(curl_easy_setopt(easy, CURLOPT_URL, url)); + CHECK(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT)); + CHECK(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L)); + CHECK(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL)); + CHECK(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60)); + + if (headers) { + CHECK(curl_easy_setopt(easy, CURLOPT_HTTPHEADER, headers)); + } + + vl_sha1_init(&wrctx.sha1_state); + + CHECK(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write)); + CHECK(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx)); + + CHECK(curl_easy_perform(easy)); + + CHECK(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code)); + +#undef CHECK + + if (response_code == 304) { + vl_info("Downloaded file %s not modified.", target_path); + ret = 0; + goto cleanup; + } else if (response_code / 100 == 2) { + vl_trace("download %s success: %ld", url, response_code); + } else { + vl_warn("Bad HTTP response code %ld downloading %s", response_code, url); + goto cleanup; + } + + if (wrctx.ofile) { + fclose(wrctx.ofile); + wrctx.ofile = NULL; + } + + vl_sha1_finalize(&wrctx.sha1_state, ohash); + write_transfer_meta(easy, ohash, meta_path); + + ret = 0; + +cleanup: + if (easy) curl_easy_cleanup(easy); + if (headers) curl_slist_free_all(headers); + if (wrctx.ofile) { + if (ret < 0 && remove(target_path) < 0) { + vl_debug("... failed to clean up after failed download: unlink(%s): %s", target_path, strerror(errno)); + } + + fclose(wrctx.ofile); + } + + return ret; +} -- cgit v1.2.3-70-g09d2