summaryrefslogtreecommitdiffstats
path: root/lib/net.c
diff options
context:
space:
mode:
authorLibravatar bigfoot547 <bigfoot@figboot.dev>2026-01-06 03:27:12 -0600
committerLibravatar bigfoot547 <bigfoot@figboot.dev>2026-01-06 03:27:12 -0600
commit4cf8b35097a131abcfc8e0d04d35294be13943ac (patch)
treebb384a58c53d7b4ab9faf45e84677ae5cdb42ac1 /lib/net.c
initial commit
Diffstat (limited to 'lib/net.c')
-rw-r--r--lib/net.c274
1 files changed, 274 insertions, 0 deletions
diff --git a/lib/net.c b/lib/net.c
new file mode 100644
index 0000000..7509ac0
--- /dev/null
+++ b/lib/net.c
@@ -0,0 +1,274 @@
+#include "log.h"
+#include "arena.h"
+#include "macros.h"
+#include "sha1.h"
+#include "net.h"
+
+#include <libgen.h> /* for dirname/basename */
+#include <curl/curl.h>
+#include <jansson.h>
+#include <errno.h>
+#include <string.h>
+
+/* TODO: also support etag/if-not-match rather than just last-modified/if-not-modified */
+
+/* memory usage is about 3 * strlen(path) -- what da heeeellll */
+static char *derive_meta_path(vl_arena *arena, const char *path)
+{
+ char *base = basename(vl_arena_strdup(arena, path));
+ char *dname = dirname(vl_arena_strdup(arena, path));
+ return vl_arena_sprintf(arena, "%s/.%s.meta", dname, base);
+}
+
+#define NET_BUFSIZE (4096)
+
+static int hash_file(const char *fname, vl_sha1 ohash)
+{
+ FILE *file;
+ unsigned char data[NET_BUFSIZE];
+ size_t nread;
+ int ret = -1;
+ vl_sha1_st hashst;
+
+ file = fopen(fname, "rb");
+
+ if (!file) {
+ vl_debug("failed to hash file %s: %s", fname, strerror(errno));
+ return -1;
+ }
+
+ vl_sha1_init(&hashst);
+ while ((nread = fread(data, 1, NET_BUFSIZE, file)) > 0) {
+ vl_sha1_update(&hashst, data, nread);
+ }
+
+ if (ferror(file)) {
+ vl_debug("failed to read file %s :(", fname);
+ goto cleanup;
+ }
+
+ vl_sha1_finalize(&hashst, ohash);
+
+ ret = 0;
+
+cleanup:
+ fclose(file);
+ return ret;
+}
+
+static int check_cache_consistent(vl_arena *arena, const char *path, const char *meta_path, struct curl_slist **headers)
+{
+ json_error_t jerr;
+ int ret = -1;
+ json_t *j = json_load_file(meta_path, 0, &jerr);
+ char *header = NULL;
+
+ vl_trace("checking meta file %s", meta_path);
+
+ if (!j) {
+ vl_debug("failed to load json meta %s(%d:%d:%d): %s", jerr.source, jerr.line, jerr.column, jerr.position, jerr.text);
+ goto cleanup;
+ }
+
+ const char *lm;
+ const char *sha1_hex;
+ size_t sha1_len;
+
+ if (json_unpack_ex(j, &jerr, JSON_STRICT, "{s:s, s:s%}", "lm", &lm, "sha1", &sha1_hex, &sha1_len) < 0) {
+ vl_debug("failed to unpack json meta %s: %s", meta_path, jerr.text);
+ goto cleanup;
+ }
+
+ if (sha1_len != VL_SHA1_DIGEST_HEX_STRLEN) {
+ vl_debug("failed to read json meta %s: invalid sha1 digest (is %zu chars long, expected %u)", meta_path, sha1_len, VL_SHA1_DIGEST_HEX_STRLEN);
+ goto cleanup;
+ }
+
+ vl_sha1 hashgot, hashexp;
+ if (vl_sha1_decode(hashexp, sha1_hex) < 0) {
+ vl_debug("failed to read json meta %s: invalid sha1 digest (bad format)", meta_path);
+ goto cleanup;
+ }
+
+ if (hash_file(path, hashgot) < 0) {
+ goto cleanup;
+ }
+
+ if (memcmp(hashexp, hashgot, sizeof(vl_sha1)) != 0) {
+#ifdef LOG_DEBUG_ENABLED
+ char hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1];
+ char hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1];
+
+ hash_exp_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0;
+ hash_got_hex[VL_SHA1_DIGEST_HEX_STRLEN] = 0;
+
+ vl_sha1_encode(hashexp, hash_exp_hex);
+ vl_sha1_encode(hashgot, hash_got_hex);
+
+ vl_debug("file %s tampered on disk (sha1 expect %s, got %s)", path, hash_exp_hex, hash_got_hex);
+#endif
+
+ goto cleanup;
+ }
+
+ header = vl_arena_sprintf(arena, "If-Modified-Since: %s", lm);
+ *headers = curl_slist_append(*headers, header);
+ ret = 0;
+
+cleanup:
+ if (j) json_decref(j);
+ return ret;
+}
+
+static void write_transfer_meta(CURL *easy, vl_sha1 ohash, const char *meta_path)
+{
+ json_t *metaj = NULL;
+ struct curl_header *hdr;
+ CURLHcode hcode;
+ char hash_hex[VL_SHA1_DIGEST_HEX_STRLEN + 1];
+
+ vl_trace("writing transfer meta to %s", meta_path);
+
+ hash_hex[VL_SHA1_DIGEST_HEX_STRLEN] = '\0';
+
+ hcode = curl_easy_header(easy, "Last-Modified", 0, CURLH_HEADER, -1, &hdr);
+ if (hcode != CURLHE_OK) {
+ vl_debug("Not writing meta %s: curl_easy_header(Last-Modified): %u", meta_path, hcode);
+ goto cleanup;
+ }
+
+ vl_sha1_encode(ohash, hash_hex);
+ metaj = json_pack("{s:s, s:s}", "lm", hdr->value, "sha1", hash_hex);
+ if (!metaj) {
+ vl_debug("Not writing meta %s: json_pack returned NULL (weird)", meta_path);
+ goto cleanup;
+ }
+
+ if (json_dump_file(metaj, meta_path, JSON_COMPACT) < 0) {
+ vl_debug("Failed writing meta to %s: json_dump_file", meta_path);
+ goto cleanup;
+ }
+
+cleanup:
+ if (metaj) json_decref(metaj);
+}
+
+struct write_ctx
+{
+ const char *opath;
+ FILE *ofile;
+ vl_sha1_st sha1_state;
+};
+
+static size_t handle_write(char *ptr, size_t sz, size_t nmemb, void *user)
+{
+ struct write_ctx *ctx = user;
+ if (sz * nmemb == 0) return 0;
+
+ /* Note that the output file is opened lazily, because we might receive a 304 (Not Modified)
+ * response with an empty body (in such a case, this function could be called with nmemb == 0).
+ * fopen for writing truncates the file, so we can't open the file in the main function.
+ */
+ if (!ctx->ofile) {
+ ctx->ofile = fopen(ctx->opath, "wb");
+ if (!ctx->ofile) {
+ vl_warn("Failed to open output file: fopen(%s, wb): %s", ctx->opath, strerror(errno));
+ return CURL_WRITEFUNC_ERROR;
+ }
+ }
+
+ vl_sha1_update(&ctx->sha1_state, ptr, sz * nmemb);
+ return fwrite(ptr, sz, nmemb, ctx->ofile);
+}
+
+int vl_net_ensure_cached(vl_arena *arena, const char *url, const char *target_path)
+{
+ char *meta_path = derive_meta_path(arena, target_path);
+ int ret = -1;
+ char errbuf[CURL_ERROR_SIZE];
+ CURLcode ecode;
+ CURL *easy = NULL;
+ struct curl_slist *headers = NULL;
+ struct write_ctx wrctx = { .opath = target_path, .ofile = NULL };
+ vl_sha1 ohash;
+ long response_code;
+
+ vl_trace("Downloading cached file from %s to %s (%s)", url, target_path, meta_path);
+
+ check_cache_consistent(arena, target_path, meta_path, &headers);
+
+ easy = curl_easy_init();
+ if (!easy) {
+ vl_warn("Failed to set up CURL handle to download %s!", url);
+ goto cleanup;
+ }
+
+ errbuf[0] = '\0';
+
+ if ((ecode = curl_easy_setopt(easy, CURLOPT_ERRORBUFFER, errbuf)) != CURLE_OK) {
+ vl_warn("curl_easy_setopt(CURLOPT_ERRORBUFFER) failed: %s", curl_easy_strerror(ecode));
+ goto cleanup;
+ }
+
+#define CHECK(_ex) do { \
+ if ((ecode = (_ex)) != CURLE_OK) { \
+ vl_warn("%s failed: %s (%s)", #_ex, curl_easy_strerror(ecode), errbuf); \
+ goto cleanup; \
+ } \
+} while (0)
+
+ CHECK(curl_easy_setopt(easy, CURLOPT_URL, url));
+ CHECK(curl_easy_setopt(easy, CURLOPT_USERAGENT, VL_USER_AGENT));
+ CHECK(curl_easy_setopt(easy, CURLOPT_HTTPGET, 1L));
+ CHECK(curl_easy_setopt(easy, CURLOPT_FOLLOWLOCATION, CURLFOLLOW_ALL));
+ CHECK(curl_easy_setopt(easy, CURLOPT_TIMEOUT, 60));
+
+ if (headers) {
+ CHECK(curl_easy_setopt(easy, CURLOPT_HTTPHEADER, headers));
+ }
+
+ vl_sha1_init(&wrctx.sha1_state);
+
+ CHECK(curl_easy_setopt(easy, CURLOPT_WRITEFUNCTION, &handle_write));
+ CHECK(curl_easy_setopt(easy, CURLOPT_WRITEDATA, &wrctx));
+
+ CHECK(curl_easy_perform(easy));
+
+ CHECK(curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response_code));
+
+#undef CHECK
+
+ if (response_code == 304) {
+ vl_info("Downloaded file %s not modified.", target_path);
+ ret = 0;
+ goto cleanup;
+ } else if (response_code / 100 == 2) {
+ vl_trace("download %s success: %ld", url, response_code);
+ } else {
+ vl_warn("Bad HTTP response code %ld downloading %s", response_code, url);
+ goto cleanup;
+ }
+
+ if (wrctx.ofile) {
+ fclose(wrctx.ofile);
+ wrctx.ofile = NULL;
+ }
+
+ vl_sha1_finalize(&wrctx.sha1_state, ohash);
+ write_transfer_meta(easy, ohash, meta_path);
+
+ ret = 0;
+
+cleanup:
+ if (easy) curl_easy_cleanup(easy);
+ if (headers) curl_slist_free_all(headers);
+ if (wrctx.ofile) {
+ if (ret < 0 && remove(target_path) < 0) {
+ vl_debug("... failed to clean up after failed download: unlink(%s): %s", target_path, strerror(errno));
+ }
+
+ fclose(wrctx.ofile);
+ }
+
+ return ret;
+}