491 lines
12 KiB
C
Raw Normal View History

2025-01-22 17:22:38 +01:00
/*
* libwebsockets - small server side websockets and web server implementation
*
* Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#if !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif
#include <pthread.h>
#include <libwebsockets.h>
#include "private-lib-core.h"
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <dirent.h>
#include <time.h>
#include <errno.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#if defined(__APPLE__)
#include <sys/dirent.h>
/* Travis OSX does not have DT_REG... */
#if !defined(DT_REG)
#define DT_REG 8
#endif
#endif
struct file_entry {
lws_list_ptr sorted;
lws_list_ptr prev;
char name[64];
time_t modified;
size_t size;
};
struct lws_diskcache_scan {
struct file_entry *batch;
const char *cache_dir_base;
lws_list_ptr head;
time_t last_scan_completed;
uint64_t agg_size;
uint64_t cache_size_limit;
uint64_t avg_size;
uint64_t cache_tries;
uint64_t cache_hits;
int cache_subdir;
int batch_in_use;
int agg_file_count;
int secs_waiting;
};
#define KIB (1024)
#define MIB (KIB * KIB)
#define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)
static const char *hex = "0123456789abcdef";
#define BATCH_COUNT 128
static int
fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
{
struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);
return (int)((long)p2->modified - (long)p1->modified);
}
struct lws_diskcache_scan *
lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
{
struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");
if (!lds)
return NULL;
memset(lds, 0, sizeof(*lds));
lds->cache_dir_base = cache_dir_base;
lds->cache_size_limit = cache_size_limit;
return lds;
}
void
lws_diskcache_destroy(struct lws_diskcache_scan **lds)
{
if ((*lds)->batch)
lws_free((*lds)->batch);
lws_free(*lds);
*lds = NULL;
}
int
lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid)
{
char dir[256];
int n, m;
(void)mkdir(cache_base_dir, (unsigned short)mode);
if (chown(cache_base_dir, uid, (gid_t)-1))
lwsl_err("%s: %s: unable to chown %d\n", __func__,
cache_base_dir, uid);
for (n = 0; n < 16; n++) {
lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
(void)mkdir(dir, (mode_t)mode);
if (chown(dir, uid, (uid_t)-1))
lwsl_err("%s: %s: unable to chown %d\n", __func__,
dir, uid);
for (m = 0; m < 16; m++) {
lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
cache_base_dir, hex[n], hex[m]);
(void)mkdir(dir, (mode_t)mode);
if (chown(dir, uid, (uid_t)-1))
lwsl_err("%s: %s: unable to chown %d\n",
__func__, dir, uid);
}
}
return 0;
}
/* copies and then truncates the incoming name, and renames the file at the
* untruncated path to have the new truncated name */
int
lws_diskcache_finalize_name(char *cache)
{
char ren[256], *p;
strncpy(ren, cache, sizeof(ren) - 1);
ren[sizeof(ren) - 1] = '\0';
p = strchr(cache, '~');
if (p) {
*p = '\0';
if (rename(ren, cache)) {
lwsl_err("%s: problem renaming %s to %s\n", __func__,
ren, cache);
return 1;
}
return 0;
}
return 1;
}
int
lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
const char *hash_hex, int *_fd, char *cache, int cache_len,
size_t *extant_cache_len)
{
struct stat s;
int n;
/* caching is disabled? */
if (!lds->cache_dir_base)
return LWS_DISKCACHE_QUERY_NO_CACHE;
if (!is_bot)
lds->cache_tries++;
n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
hash_hex[0], hash_hex[1], hash_hex);
lwsl_info("%s: job cache %s\n", __func__, cache);
*_fd = open(cache, O_RDONLY);
if (*_fd >= 0) {
int fd;
if (!is_bot)
lds->cache_hits++;
if (fstat(*_fd, &s)) {
close(*_fd);
return LWS_DISKCACHE_QUERY_NO_CACHE;
}
*extant_cache_len = (size_t)s.st_size;
/* "touch" the hit cache file so it's last for LRU now */
fd = open(cache, O_RDWR);
if (fd >= 0)
close(fd);
return LWS_DISKCACHE_QUERY_EXISTS;
}
/* bots are too random to pollute the cache with their antics */
if (is_bot)
return LWS_DISKCACHE_QUERY_NO_CACHE;
/* let's create it first with a unique temp name */
lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(),
extant_cache_len);
*_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
if (*_fd < 0) {
/* well... ok... we will proceed without cache then... */
lwsl_notice("%s: Problem creating cache %s: errno %d\n",
__func__, cache, errno);
return LWS_DISKCACHE_QUERY_NO_CACHE;
}
return LWS_DISKCACHE_QUERY_CREATING;
}
int
lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
{
return lds->secs_waiting;
}
/*
* The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
* the dirs under the cache dir. Since we don't need or want a full list of
* files in there in memory at once, we restrict the linked-list size to
* BATCH_COUNT entries, and once it is full, simply ignore any further files
* that are newer than the newest one on that list. Files older than the
* newest guy already on the list evict the newest guy already on the list
* and are sorted into the correct order. In this way no matter the number
* of files to be processed the memory requirement is fixed at BATCH_COUNT
* struct file_entry-s.
*
* The oldest subset of BATCH_COUNT files are sorted into the cd->batch
* allocation in more recent -> least recent order.
*
* We want to track the total size of all files we saw as well, so we know if
* we need to actually do anything yet to restrict how much space it's taking
* up.
*
* And we want to do those things statefully and incrementally instead of one
* big atomic operation, since the user may want a huge cache, so we look in
* one cache dir at a time and track state in the repodir struct.
*
* When we have seen everything, we add the doubly-linked prev pointers and then
* if we are over the limit, start deleting up to BATCH_COUNT files working back
* from the end.
*/
int
lws_diskcache_trim(struct lws_diskcache_scan *lds)
{
size_t cache_size_limit = (size_t)lds->cache_size_limit;
char dirpath[132], filepath[132 + 32];
lws_list_ptr lp, op = NULL;
int files_trimmed = 0;
struct file_entry *p;
int fd, n, ret = -1;
size_t trimmed = 0;
struct dirent *de;
struct stat s;
DIR *dir;
if (!lds->cache_subdir) {
if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
return 0;
lds->batch = lws_malloc(sizeof(struct file_entry) *
BATCH_COUNT, "cache_trim");
if (!lds->batch) {
lwsl_err("%s: OOM\n", __func__);
return 1;
}
lds->agg_size = 0;
lds->head = NULL;
lds->batch_in_use = 0;
lds->agg_file_count = 0;
}
lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
hex[lds->cache_subdir & 15]);
dir = opendir(dirpath);
if (!dir) {
lwsl_err("Unable to walk repo dir '%s'\n",
lds->cache_dir_base);
return -1;
}
do {
de = readdir(dir);
if (!de)
break;
if (de->d_type != DT_REG)
continue;
lds->agg_file_count++;
lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
de->d_name);
fd = open(filepath, O_RDONLY);
if (fd < 0) {
lwsl_err("%s: cannot open %s\n", __func__, filepath);
continue;
}
n = fstat(fd, &s);
close(fd);
if (n) {
lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
continue;
}
lds->agg_size += (uint64_t)s.st_size;
if (lds->batch_in_use == BATCH_COUNT) {
/*
* once we filled up the batch with candidates, we don't
* need to consider any files newer than the newest guy
* on the list...
*/
if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
continue;
/*
* ... and if we find an older file later, we know it
* will be replacing the newest guy on the list, so use
* that directly...
*/
p = lds->head;
lds->head = p->sorted;
} else
/* we are still accepting anything to fill the batch */
p = &lds->batch[lds->batch_in_use++];
p->sorted = NULL;
strncpy(p->name, de->d_name, sizeof(p->name) - 1);
p->name[sizeof(p->name) - 1] = '\0';
p->modified = s.st_mtime;
p->size = (size_t)s.st_size;
lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
} while (de);
ret = 0;
lds->cache_subdir++;
if (lds->cache_subdir != 0x100)
goto done;
/* we completed the whole scan... */
/* if really no guidence, then 256MiB */
if (!cache_size_limit)
cache_size_limit = 256 * 1024 * 1024;
if (lds->agg_size > cache_size_limit) {
/* apply prev pointers to make the list doubly-linked */
lp = lds->head;
while (lp) {
p = lp_to_fe(lp, sorted);
p->prev = op;
op = &p->prev;
lp = p->sorted;
}
/*
* reverse the list (start from tail, now traverse using
* .prev)... it's oldest-first now...
*/
lp = op;
while (lp && lds->agg_size > cache_size_limit) {
p = lp_to_fe(lp, prev);
lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
lds->cache_dir_base, p->name[0],
p->name[1], p->name);
if (!unlink(filepath)) {
lds->agg_size -= p->size;
trimmed += p->size;
files_trimmed++;
} else
lwsl_notice("%s: Failed to unlink %s\n",
__func__, filepath);
lp = p->prev;
}
if (files_trimmed)
lwsl_notice("%s: %s: trimmed %d files totalling "
"%lldKib, leaving %lldMiB\n", __func__,
lds->cache_dir_base, files_trimmed,
((unsigned long long)trimmed) / KIB,
((unsigned long long)lds->agg_size) / MIB);
}
if (lds->agg_size && lds->agg_file_count)
lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count;
/*
* estimate how long we can go before scanning again... default we need
* to start again immediately
*/
lds->last_scan_completed = time(NULL);
lds->secs_waiting = 1;
if (lds->agg_size < cache_size_limit) {
uint64_t avg = 4096, capacity, projected;
/* let's use 80% of the real average for margin */
if (lds->agg_size && lds->agg_file_count)
avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10;
/*
* if we collected BATCH_COUNT files of the average size,
* how much can we clean up in 256s?
*/
capacity = avg * BATCH_COUNT;
/*
* if the cache grew by 10%, would we hit the limit even then?
*/
projected = (lds->agg_size * 11) / 10;
if (projected < cache_size_limit)
/* no... */
lds->secs_waiting = (int)((256 / 2) * ((cache_size_limit -
projected) / capacity));
/*
* large waits imply we may not have enough info yet, so
* check once an hour at least.
*/
if (lds->secs_waiting > 3600)
lds->secs_waiting = 3600;
} else
lds->secs_waiting = 0;
lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
lds->cache_dir_base,
(unsigned long long)lds->agg_size / KIB,
(unsigned long long)cache_size_limit / KIB,
lds->secs_waiting);
lws_free(lds->batch);
lds->batch = NULL;
lds->cache_subdir = 0;
done:
closedir(dir);
return ret;
}