vessel/old/form.c
2025-06-13 17:58:13 +03:00

517 lines
13 KiB
C

#include "include/conf.h"
#include "include/mem.h"
#include "include/form.h"
#include "include/http.h"
#include "include/stream.h"
#include "include/switch.h"
#include "include/blake2s.h"
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
static FormField *FormField_new(void) {
FormField *f = Malloc(sizeof(*f));
if (!f)
return NULL;
if (!HMap_init(&f->headers)) {
Free(f);
return NULL;
}
f->type = FormFieldType_plain;
f->value = NULL;
return f;
}
static void FormField_destroy(FormField *f) {
if (!f)
return;
HMap_destroy_free(&f->headers);
if (f->type == FormFieldType_file)
FormFile_destroy(f->value);
if (f->value)
Free(f->value);
Free(f);
}
static Bool read_url_encoded_form(HTTPRequest *req, HMap *hmap) {
char value[FORM_VALUE_MAX + 1];
const uint64_t rd = Stream_read(req->fp, value, FORM_VALUE_MAX);
if (rd == 0) /* Technically an empty form is still a form, just, uh, empty */
return True;
else if (rd == STREAM_ERROR)
return False;
HMap out = { 0 };
if (!HMap_init(&out))
return False;
value[rd] = '\0';
if (!HTTP_parse_query(value, &out)) {
HMap_destroy_free(&out);
return False;
}
for (uint64_t idx = 0; idx < out.size; ++idx) {
FormField *f = FormField_new();
if (!f)
return False;
HMapBucket b = out.buckets[out.occupied_els[idx]];
f->value = b.value;
f->type = FormFieldType_plain;
if (!HMap_insert(hmap, b.key, f)) {
Free(f);
HMap_destroy_free(&out);
return False;
}
}
return HMap_destroy_free(&out);
}
static inline Bool multipart_skip_data(HTTPRequest *req, uint8_t *http_buf, const char *boundary) {
Stream_readbf(req->fp, http_buf, HTTP_BUFFER_MAX_SIZE, NULL, False, "--%s", boundary);
Stream_read(req->fp, http_buf, 2); /* Skip either -- or \r\n */
if (*http_buf == '-') {
Stream_read(req->fp, http_buf, 2); /* Skip \r\n of the last sequence */
return True;
}
return False;
}
#define multipart_skip_data_loop(f, disposition, ...) \
FormField_destroy((f)); \
HMap_clear_free(&(disposition)); \
if (multipart_skip_data(__VA_ARGS__)) \
break; \
continue
static Bool read_mutipart_form(HTTPRequest *req, HMap *hmap, HMapBucket *content_type) {
FormField *f;
uint64_t size;
HMap content_type_multipart = { 0 };
uint8_t end[2] = { 0 };
uint8_t http_buf[HTTP_BUFFER_MAX_SIZE];
uint8_t value[FORM_VALUE_MAX];
if (!HMap_init(&content_type_multipart))
return False;
if (!HTTP_parse_multipart_header(content_type->value, &content_type_multipart)) {
HMap_destroy_free(&content_type_multipart);
return False;
}
const HMapBucket *boundary_bucket = HMap_find(&content_type_multipart, "boundary");
if (!boundary_bucket) {
HMap_destroy_free(&content_type_multipart);
return False;
}
char *boundary = dupstr(boundary_bucket->value);
if (!boundary) {
HMap_destroy_free(&content_type_multipart);
return False;
}
HMap_destroy_free(&content_type_multipart);
const uint64_t boundary_size = strlen(boundary);
if (boundary_size < 2)
goto error_wbound;
Bool found;
/* Init the disposition HMap */
HMap disposition = { 0 };
if (!HMap_init(&disposition))
goto error_wbound;
/* Read the delimiter */
if (Stream_readbf(
req->fp, http_buf, HTTP_BUFFER_MAX_SIZE, &found, True, "--%s" HTTP_CRLF, boundary) !=
0 ||
!found)
goto error_wdis;
while (*end != '-') {
HMap_clear_free(&disposition);
/*
* 40 is the bare minimum header length required for this form type.
*
* >>> len("content-disposition:form-data;name=a\r\n\r\n")
* 40
*/
size = Stream_readb(
req->fp, http_buf, HTTP_BUFFER_MAX_SIZE, HTTP_2CRLF, HTTP_2CRLF_LENGTH, &found, True);
if (Stream_ignore_err(size) < 40 || !found)
goto error_wdis;
/* Headers */
f = FormField_new();
if (!f)
goto error_wdis;
if (!HTTP_parse_headers(http_buf, (uint64_t)size, &f->headers, NULL))
goto error_wfield;
/* Disposition */
HMapBucket *disposition_b = HMap_find(&f->headers, "content-disposition");
if (!disposition_b) {
/* We can just skip the ones with no disposition header. */
multipart_skip_data_loop(f, disposition, req, http_buf, boundary);
}
if (!HTTP_parse_multipart_header(disposition_b->value, &disposition)) {
/* Bad header */
multipart_skip_data_loop(f, disposition, req, http_buf, boundary);
}
HMapBucket *form_data_disposition_b = HMap_find_val(&disposition, NULL, NULL);
if (!form_data_disposition_b || strcmp(form_data_disposition_b->key, "form-data") != 0) {
/* No `form-data` */
multipart_skip_data_loop(f, disposition, req, http_buf, boundary);
}
HMapBucket *name = HMap_find(&disposition, "name");
if (!name || !name->value) {
/* No key */
multipart_skip_data_loop(f, disposition, req, http_buf, boundary);
}
HMapBucket *filename =
HMap_find(&disposition, "filename*"); /* `filename*` is prioritised over
`filename` (standard practice) */
if (!filename)
filename = HMap_find(&disposition, "filename");
/* Parsing */
Bool do_insert = True;
if (filename) {
do_insert = *(uint8_t *)filename->value != '\0';
FormFile *ff = NULL;
if (do_insert) {
ff = Malloc(sizeof(*ff));
if (!ff)
goto error_wfield;
if (!FormFile_init(ff,
filename->value,
filename->key_length ==
8)) { /* len("filename") == 8, therefore we
need to convert to wchar_t (as
len("filename*")
== 9, which is already unicode) */
Free(ff);
goto error_wfield;
}
f->type = FormFieldType_file;
f->value = ff;
if (!File_open(&ff->f, ff->path, FILEF_WR | FILEF_CREAT, FILEM_ONX)) {
FormFile_destroy(ff);
Free(ff);
goto error_wfield;
}
}
for (uint32_t chunk = 0; chunk < FORM_FILE_VALUE_CHUNKS; ++chunk) {
size =
Stream_readbf(req->fp, value, FORM_VALUE_MAX, &found, False, "--%s", boundary);
if (Stream_ignore_err(size) <= 0)
break;
if (found) {
if (Stream_read(req->fp, end, 2) != 2) {
if (do_insert) {
FormFile_destroy(ff);
Free(ff);
}
goto error_wfield;
}
if (size >= HTTP_CRLF_LENGTH)
size -= HTTP_CRLF_LENGTH;
}
if (do_insert && File_write(&ff->f, value, size) <= 0)
break;
if (found || *end == '-')
break;
}
if (do_insert)
FormFile_close(ff);
if (!found) {
if (do_insert) {
FormFile_destroy(ff);
Free(ff);
}
goto error_wfield;
}
} else {
do_insert = True;
size = Stream_readbf(req->fp, value, FORM_VALUE_MAX, &found, True, "--%s", boundary);
if (size == STREAM_ERROR || !found)
goto error_wfield;
if (Stream_read(req->fp, end, 2) != 2)
goto error_wfield;
if (size == 0)
value[0] = '\0';
f->type = FormFieldType_plain;
/* Skips CRLF after content */
f->value = dupnstr((const char *)value, (uint64_t)(size - HTTP_CRLF_LENGTH));
}
if (do_insert) {
if (!HMap_insert(hmap, name->value, f))
goto error_wfield;
} else
FormField_destroy(f);
}
Free(boundary);
HMap_destroy_free(&disposition);
return True;
error_wfield:
FormField_destroy(f);
error_wdis:
HMap_destroy_free(&disposition);
error_wbound:
Free(boundary);
return False;
}
static Bool read_plain_form(HTTPRequest *req, HMap *hmap) {
FormField *f = FormField_new();
if (!f)
return False;
char value[FORM_VALUE_MAX + 1];
const uint64_t rd = Stream_read(req->fp, value, FORM_VALUE_MAX);
if (rd == STREAM_ERROR)
return False;
value[rd] = '\0';
f->value = dupnstr(value, rd);
if (!f->value) {
Free(f);
return False;
}
f->type = FormFieldType_plain;
/* Add a content-length header. */
char rd_a[32];
snprintf(rd_a, 32, "%jd", rd);
HMap_insert(&f->headers, "content-length", dupstr(rd_a));
return HMap_insert(hmap, FORM_PLAIN_KEY, f);
}
Bool Form_read(HTTPRequest *req, HMap *hmap) {
if (!req || !hmap)
return False;
HMapBucket *b = HMap_find(&req->headers, "content-type");
if (!b || !b->value)
return False;
if (startstr(b->value, "multipart/form-data", False))
return read_mutipart_form(req, hmap, b);
switch (switch_hash(b->value)) {
case FORM_URL_ENCODED_HASH:
return read_url_encoded_form(req, hmap);
case FORM_PLAIN_HASH:
return read_plain_form(req, hmap);
default:
return False;
}
return True;
}
Bool Form_destroy(HMap *hmap) {
for (HMAP_TYPE idx = 0; idx < hmap->size; ++idx)
FormField_destroy(hmap->buckets[hmap->occupied_els[idx]].value);
return HMap_destroy(hmap);
}
/* TODO: Cache files/avoid FS operations? */
Bool FormFile_init(FormFile *ff, const void *filename, const Bool convert) {
if (!ff || !filename)
return False;
if (!File_init(&ff->f, NULL))
return False;
uint8_t seed[32];
char path[FILE_MAX_LEN] = FILE_TMPDIR ".vessel-";
static const uint64_t path_size =
sizeof(FILE_TMPDIR ".vessel-") - 1; /* NOTE: Always the same value */
Blake2sCtx b = { 0 };
if (convert) {
const size_t size = mbstowcs(NULL, (const char *)filename, 0) + 1;
if (size == (size_t)-1)
return False;
ff->filename = Malloc(size * sizeof(*ff->filename));
if (!ff->filename)
return False;
if (mbstowcs(ff->filename, filename, size) == (size_t)-1)
return False;
} else
ff->filename = wdupstr(filename);
const uint64_t filename_len = wlenstr(ff->filename);
if (get_rand_bytes(seed, 32) != 32 || !Blake2sCtx_init(&b, 32, seed, 32) ||
!Blake2sCtx_update(&b, ff->filename, filename_len * sizeof(*ff->filename)) ||
!Blake2sCtx_final(&b) || !Blake2sCtx_to_hex(&b, path + path_size)) {
Free(ff->filename);
return False;
}
/* This should be sufficient to not colide with other processes and files.
* At least not easily.
*
* - BLAKE2s hash for uniqueness and not use the original filename.
* - time(NULL) to make it change every second even with the same
* parameters.
* - getpid() to avoid cross-process collision.
* - 64 bits of more randomness in *((uint64_t *)seed).
* - clock() changes faily often as well, adding another layer of
* uniqueness.
* - filename_len is the last resort if everything else clashes, which is
* very unlikely.
*/
snprintf(path + path_size + 64,
sizeof(path) - path_size - 64,
"-%jx%jx%jx%jx%jx.formfile",
(uint64_t)time(NULL),
(uint64_t)getpid(),
*((uint64_t *)seed),
(uint64_t)clock(),
filename_len);
if (!(ff->path = dupstr(path))) {
Free(ff->filename);
return False;
}
return True;
}
Bool FormFile_open(FormFile *ff) {
if (!ff)
return False;
if (!File_isopen(&ff->f) && !File_open(&ff->f, ff->path, FILEF_RD | FILEF_CREAT, FILEM_ONX))
return False;
return True;
}
Bool FormFile_close(FormFile *ff) {
if (!ff)
return False;
File_close(&ff->f);
return True;
}
uint64_t FormFile_read(FormFile *ff, void *buf, uint64_t count) {
if (!ff || !buf)
return STREAM_ERROR;
if (!FormFile_open(ff))
return STREAM_ERROR;
return File_read(&ff->f, buf, count);
}
Bool FormFile_destroy(FormFile *ff) {
if (!ff)
return False;
File_destroy(&ff->f);
remove(ff->path);
Free(ff->path);
Free(ff->filename);
return True;
}