491 lines
12 KiB
C
491 lines
12 KiB
C
/*
|
|
* libwebsockets - small server side websockets and web server implementation
|
|
*
|
|
* Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#if !defined(_GNU_SOURCE)
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
#include <pthread.h>
|
|
|
|
#include <libwebsockets.h>
|
|
#include "private-lib-core.h"
|
|
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <dirent.h>
|
|
#include <time.h>
|
|
#include <errno.h>
|
|
#include <stdarg.h>
|
|
|
|
#include <sys/stat.h>
|
|
#include <sys/time.h>
|
|
#include <sys/types.h>
|
|
|
|
#if defined(__APPLE__)
|
|
#include <sys/dirent.h>
|
|
/* Travis OSX does not have DT_REG... */
|
|
#if !defined(DT_REG)
|
|
#define DT_REG 8
|
|
#endif
|
|
#endif
|
|
|
|
struct file_entry {
|
|
lws_list_ptr sorted;
|
|
lws_list_ptr prev;
|
|
char name[64];
|
|
time_t modified;
|
|
size_t size;
|
|
};
|
|
|
|
struct lws_diskcache_scan {
|
|
struct file_entry *batch;
|
|
const char *cache_dir_base;
|
|
lws_list_ptr head;
|
|
time_t last_scan_completed;
|
|
uint64_t agg_size;
|
|
uint64_t cache_size_limit;
|
|
uint64_t avg_size;
|
|
uint64_t cache_tries;
|
|
uint64_t cache_hits;
|
|
int cache_subdir;
|
|
int batch_in_use;
|
|
int agg_file_count;
|
|
int secs_waiting;
|
|
};
|
|
|
|
#define KIB (1024)
|
|
#define MIB (KIB * KIB)
|
|
|
|
#define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)
|
|
|
|
static const char *hex = "0123456789abcdef";
|
|
|
|
#define BATCH_COUNT 128
|
|
|
|
static int
|
|
fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
|
|
{
|
|
struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);
|
|
|
|
return (int)((long)p2->modified - (long)p1->modified);
|
|
}
|
|
|
|
struct lws_diskcache_scan *
|
|
lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
|
|
{
|
|
struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");
|
|
|
|
if (!lds)
|
|
return NULL;
|
|
|
|
memset(lds, 0, sizeof(*lds));
|
|
|
|
lds->cache_dir_base = cache_dir_base;
|
|
lds->cache_size_limit = cache_size_limit;
|
|
|
|
return lds;
|
|
}
|
|
|
|
void
|
|
lws_diskcache_destroy(struct lws_diskcache_scan **lds)
|
|
{
|
|
if ((*lds)->batch)
|
|
lws_free((*lds)->batch);
|
|
lws_free(*lds);
|
|
*lds = NULL;
|
|
}
|
|
|
|
int
|
|
lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid)
|
|
{
|
|
char dir[256];
|
|
int n, m;
|
|
|
|
(void)mkdir(cache_base_dir, (unsigned short)mode);
|
|
if (chown(cache_base_dir, uid, (gid_t)-1))
|
|
lwsl_err("%s: %s: unable to chown %d\n", __func__,
|
|
cache_base_dir, uid);
|
|
|
|
for (n = 0; n < 16; n++) {
|
|
lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
|
|
(void)mkdir(dir, (mode_t)mode);
|
|
if (chown(dir, uid, (uid_t)-1))
|
|
lwsl_err("%s: %s: unable to chown %d\n", __func__,
|
|
dir, uid);
|
|
for (m = 0; m < 16; m++) {
|
|
lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
|
|
cache_base_dir, hex[n], hex[m]);
|
|
(void)mkdir(dir, (mode_t)mode);
|
|
if (chown(dir, uid, (uid_t)-1))
|
|
lwsl_err("%s: %s: unable to chown %d\n",
|
|
__func__, dir, uid);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* copies and then truncates the incoming name, and renames the file at the
|
|
* untruncated path to have the new truncated name */
|
|
|
|
int
|
|
lws_diskcache_finalize_name(char *cache)
|
|
{
|
|
char ren[256], *p;
|
|
|
|
strncpy(ren, cache, sizeof(ren) - 1);
|
|
ren[sizeof(ren) - 1] = '\0';
|
|
p = strchr(cache, '~');
|
|
if (p) {
|
|
*p = '\0';
|
|
if (rename(ren, cache)) {
|
|
lwsl_err("%s: problem renaming %s to %s\n", __func__,
|
|
ren, cache);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
|
|
const char *hash_hex, int *_fd, char *cache, int cache_len,
|
|
size_t *extant_cache_len)
|
|
{
|
|
struct stat s;
|
|
int n;
|
|
|
|
/* caching is disabled? */
|
|
if (!lds->cache_dir_base)
|
|
return LWS_DISKCACHE_QUERY_NO_CACHE;
|
|
|
|
if (!is_bot)
|
|
lds->cache_tries++;
|
|
|
|
n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
|
|
hash_hex[0], hash_hex[1], hash_hex);
|
|
|
|
lwsl_info("%s: job cache %s\n", __func__, cache);
|
|
|
|
*_fd = open(cache, O_RDONLY);
|
|
if (*_fd >= 0) {
|
|
int fd;
|
|
|
|
if (!is_bot)
|
|
lds->cache_hits++;
|
|
|
|
if (fstat(*_fd, &s)) {
|
|
close(*_fd);
|
|
|
|
return LWS_DISKCACHE_QUERY_NO_CACHE;
|
|
}
|
|
|
|
*extant_cache_len = (size_t)s.st_size;
|
|
|
|
/* "touch" the hit cache file so it's last for LRU now */
|
|
fd = open(cache, O_RDWR);
|
|
if (fd >= 0)
|
|
close(fd);
|
|
|
|
return LWS_DISKCACHE_QUERY_EXISTS;
|
|
}
|
|
|
|
/* bots are too random to pollute the cache with their antics */
|
|
if (is_bot)
|
|
return LWS_DISKCACHE_QUERY_NO_CACHE;
|
|
|
|
/* let's create it first with a unique temp name */
|
|
|
|
lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(),
|
|
extant_cache_len);
|
|
|
|
*_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
|
|
if (*_fd < 0) {
|
|
/* well... ok... we will proceed without cache then... */
|
|
lwsl_notice("%s: Problem creating cache %s: errno %d\n",
|
|
__func__, cache, errno);
|
|
return LWS_DISKCACHE_QUERY_NO_CACHE;
|
|
}
|
|
|
|
return LWS_DISKCACHE_QUERY_CREATING;
|
|
}
|
|
|
|
int
|
|
lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
|
|
{
|
|
return lds->secs_waiting;
|
|
}
|
|
|
|
/*
|
|
* The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
|
|
* the dirs under the cache dir. Since we don't need or want a full list of
|
|
* files in there in memory at once, we restrict the linked-list size to
|
|
* BATCH_COUNT entries, and once it is full, simply ignore any further files
|
|
* that are newer than the newest one on that list. Files older than the
|
|
* newest guy already on the list evict the newest guy already on the list
|
|
* and are sorted into the correct order. In this way no matter the number
|
|
* of files to be processed the memory requirement is fixed at BATCH_COUNT
|
|
* struct file_entry-s.
|
|
*
|
|
* The oldest subset of BATCH_COUNT files are sorted into the cd->batch
|
|
* allocation in more recent -> least recent order.
|
|
*
|
|
* We want to track the total size of all files we saw as well, so we know if
|
|
* we need to actually do anything yet to restrict how much space it's taking
|
|
* up.
|
|
*
|
|
* And we want to do those things statefully and incrementally instead of one
|
|
* big atomic operation, since the user may want a huge cache, so we look in
|
|
* one cache dir at a time and track state in the repodir struct.
|
|
*
|
|
* When we have seen everything, we add the doubly-linked prev pointers and then
|
|
* if we are over the limit, start deleting up to BATCH_COUNT files working back
|
|
* from the end.
|
|
*/
|
|
|
|
int
|
|
lws_diskcache_trim(struct lws_diskcache_scan *lds)
|
|
{
|
|
size_t cache_size_limit = (size_t)lds->cache_size_limit;
|
|
char dirpath[132], filepath[132 + 32];
|
|
lws_list_ptr lp, op = NULL;
|
|
int files_trimmed = 0;
|
|
struct file_entry *p;
|
|
int fd, n, ret = -1;
|
|
size_t trimmed = 0;
|
|
struct dirent *de;
|
|
struct stat s;
|
|
DIR *dir;
|
|
|
|
if (!lds->cache_subdir) {
|
|
|
|
if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
|
|
return 0;
|
|
|
|
lds->batch = lws_malloc(sizeof(struct file_entry) *
|
|
BATCH_COUNT, "cache_trim");
|
|
if (!lds->batch) {
|
|
lwsl_err("%s: OOM\n", __func__);
|
|
|
|
return 1;
|
|
}
|
|
lds->agg_size = 0;
|
|
lds->head = NULL;
|
|
lds->batch_in_use = 0;
|
|
lds->agg_file_count = 0;
|
|
}
|
|
|
|
lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
|
|
lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
|
|
hex[lds->cache_subdir & 15]);
|
|
|
|
dir = opendir(dirpath);
|
|
if (!dir) {
|
|
lwsl_err("Unable to walk repo dir '%s'\n",
|
|
lds->cache_dir_base);
|
|
return -1;
|
|
}
|
|
|
|
do {
|
|
de = readdir(dir);
|
|
if (!de)
|
|
break;
|
|
|
|
if (de->d_type != DT_REG)
|
|
continue;
|
|
|
|
lds->agg_file_count++;
|
|
|
|
lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
|
|
de->d_name);
|
|
|
|
fd = open(filepath, O_RDONLY);
|
|
if (fd < 0) {
|
|
lwsl_err("%s: cannot open %s\n", __func__, filepath);
|
|
|
|
continue;
|
|
}
|
|
|
|
n = fstat(fd, &s);
|
|
close(fd);
|
|
if (n) {
|
|
lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
|
|
continue;
|
|
}
|
|
|
|
lds->agg_size += (uint64_t)s.st_size;
|
|
|
|
if (lds->batch_in_use == BATCH_COUNT) {
|
|
/*
|
|
* once we filled up the batch with candidates, we don't
|
|
* need to consider any files newer than the newest guy
|
|
* on the list...
|
|
*/
|
|
if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
|
|
continue;
|
|
|
|
/*
|
|
* ... and if we find an older file later, we know it
|
|
* will be replacing the newest guy on the list, so use
|
|
* that directly...
|
|
*/
|
|
p = lds->head;
|
|
lds->head = p->sorted;
|
|
} else
|
|
/* we are still accepting anything to fill the batch */
|
|
|
|
p = &lds->batch[lds->batch_in_use++];
|
|
|
|
p->sorted = NULL;
|
|
strncpy(p->name, de->d_name, sizeof(p->name) - 1);
|
|
p->name[sizeof(p->name) - 1] = '\0';
|
|
p->modified = s.st_mtime;
|
|
p->size = (size_t)s.st_size;
|
|
|
|
lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
|
|
} while (de);
|
|
|
|
ret = 0;
|
|
|
|
lds->cache_subdir++;
|
|
if (lds->cache_subdir != 0x100)
|
|
goto done;
|
|
|
|
/* we completed the whole scan... */
|
|
|
|
/* if really no guidence, then 256MiB */
|
|
if (!cache_size_limit)
|
|
cache_size_limit = 256 * 1024 * 1024;
|
|
|
|
if (lds->agg_size > cache_size_limit) {
|
|
|
|
/* apply prev pointers to make the list doubly-linked */
|
|
|
|
lp = lds->head;
|
|
while (lp) {
|
|
p = lp_to_fe(lp, sorted);
|
|
|
|
p->prev = op;
|
|
op = &p->prev;
|
|
lp = p->sorted;
|
|
}
|
|
|
|
/*
|
|
* reverse the list (start from tail, now traverse using
|
|
* .prev)... it's oldest-first now...
|
|
*/
|
|
|
|
lp = op;
|
|
|
|
while (lp && lds->agg_size > cache_size_limit) {
|
|
p = lp_to_fe(lp, prev);
|
|
|
|
lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
|
|
lds->cache_dir_base, p->name[0],
|
|
p->name[1], p->name);
|
|
|
|
if (!unlink(filepath)) {
|
|
lds->agg_size -= p->size;
|
|
trimmed += p->size;
|
|
files_trimmed++;
|
|
} else
|
|
lwsl_notice("%s: Failed to unlink %s\n",
|
|
__func__, filepath);
|
|
|
|
lp = p->prev;
|
|
}
|
|
|
|
if (files_trimmed)
|
|
lwsl_notice("%s: %s: trimmed %d files totalling "
|
|
"%lldKib, leaving %lldMiB\n", __func__,
|
|
lds->cache_dir_base, files_trimmed,
|
|
((unsigned long long)trimmed) / KIB,
|
|
((unsigned long long)lds->agg_size) / MIB);
|
|
}
|
|
|
|
if (lds->agg_size && lds->agg_file_count)
|
|
lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count;
|
|
|
|
/*
|
|
* estimate how long we can go before scanning again... default we need
|
|
* to start again immediately
|
|
*/
|
|
|
|
lds->last_scan_completed = time(NULL);
|
|
lds->secs_waiting = 1;
|
|
|
|
if (lds->agg_size < cache_size_limit) {
|
|
uint64_t avg = 4096, capacity, projected;
|
|
|
|
/* let's use 80% of the real average for margin */
|
|
if (lds->agg_size && lds->agg_file_count)
|
|
avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10;
|
|
|
|
/*
|
|
* if we collected BATCH_COUNT files of the average size,
|
|
* how much can we clean up in 256s?
|
|
*/
|
|
|
|
capacity = avg * BATCH_COUNT;
|
|
|
|
/*
|
|
* if the cache grew by 10%, would we hit the limit even then?
|
|
*/
|
|
projected = (lds->agg_size * 11) / 10;
|
|
if (projected < cache_size_limit)
|
|
/* no... */
|
|
lds->secs_waiting = (int)((256 / 2) * ((cache_size_limit -
|
|
projected) / capacity));
|
|
|
|
/*
|
|
* large waits imply we may not have enough info yet, so
|
|
* check once an hour at least.
|
|
*/
|
|
|
|
if (lds->secs_waiting > 3600)
|
|
lds->secs_waiting = 3600;
|
|
} else
|
|
lds->secs_waiting = 0;
|
|
|
|
lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
|
|
lds->cache_dir_base,
|
|
(unsigned long long)lds->agg_size / KIB,
|
|
(unsigned long long)cache_size_limit / KIB,
|
|
lds->secs_waiting);
|
|
|
|
lws_free(lds->batch);
|
|
lds->batch = NULL;
|
|
|
|
lds->cache_subdir = 0;
|
|
|
|
done:
|
|
closedir(dir);
|
|
|
|
return ret;
|
|
}
|