/* * libwebsockets - small server side websockets and web server implementation * * Copyright (C) 2010 - 2019 Andy Green * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #if !defined(_GNU_SOURCE) #define _GNU_SOURCE #endif #include #include #include "private-lib-core.h" #include #include #include #include #include #include #include #include #include #include #include #if defined(__APPLE__) #include /* Travis OSX does not have DT_REG... */ #if !defined(DT_REG) #define DT_REG 8 #endif #endif struct file_entry { lws_list_ptr sorted; lws_list_ptr prev; char name[64]; time_t modified; size_t size; }; struct lws_diskcache_scan { struct file_entry *batch; const char *cache_dir_base; lws_list_ptr head; time_t last_scan_completed; uint64_t agg_size; uint64_t cache_size_limit; uint64_t avg_size; uint64_t cache_tries; uint64_t cache_hits; int cache_subdir; int batch_in_use; int agg_file_count; int secs_waiting; }; #define KIB (1024) #define MIB (KIB * KIB) #define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n) static const char *hex = "0123456789abcdef"; #define BATCH_COUNT 128 static int fe_modified_sort(lws_list_ptr a, lws_list_ptr b) { struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted); return (int)((long)p2->modified - (long)p1->modified); } struct lws_diskcache_scan * lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit) { struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan"); if (!lds) return NULL; memset(lds, 0, sizeof(*lds)); lds->cache_dir_base = cache_dir_base; lds->cache_size_limit = cache_size_limit; return lds; } void lws_diskcache_destroy(struct lws_diskcache_scan **lds) { if ((*lds)->batch) lws_free((*lds)->batch); lws_free(*lds); *lds = NULL; } int lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid) { char dir[256]; int n, m; (void)mkdir(cache_base_dir, (unsigned short)mode); if (chown(cache_base_dir, uid, (gid_t)-1)) lwsl_err("%s: %s: unable to chown %d\n", __func__, cache_base_dir, uid); for (n = 0; n < 16; n++) { lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]); (void)mkdir(dir, (mode_t)mode); if (chown(dir, uid, (uid_t)-1)) lwsl_err("%s: %s: unable to chown %d\n", __func__, dir, uid); for (m = 0; m < 16; m++) { lws_snprintf(dir, sizeof(dir), "%s/%c/%c", cache_base_dir, hex[n], hex[m]); (void)mkdir(dir, (mode_t)mode); if (chown(dir, uid, (uid_t)-1)) lwsl_err("%s: %s: unable to chown %d\n", __func__, dir, uid); } } return 0; } /* copies and then truncates the incoming name, and renames the file at the * untruncated path to have the new truncated name */ int lws_diskcache_finalize_name(char *cache) { char ren[256], *p; strncpy(ren, cache, sizeof(ren) - 1); ren[sizeof(ren) - 1] = '\0'; p = strchr(cache, '~'); if (p) { *p = '\0'; if (rename(ren, cache)) { lwsl_err("%s: problem renaming %s to %s\n", __func__, ren, cache); return 1; } return 0; } return 1; } int lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot, const char *hash_hex, int *_fd, char *cache, int cache_len, size_t *extant_cache_len) { struct stat s; int n; /* caching is disabled? */ if (!lds->cache_dir_base) return LWS_DISKCACHE_QUERY_NO_CACHE; if (!is_bot) lds->cache_tries++; n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base, hash_hex[0], hash_hex[1], hash_hex); lwsl_info("%s: job cache %s\n", __func__, cache); *_fd = open(cache, O_RDONLY); if (*_fd >= 0) { int fd; if (!is_bot) lds->cache_hits++; if (fstat(*_fd, &s)) { close(*_fd); return LWS_DISKCACHE_QUERY_NO_CACHE; } *extant_cache_len = (size_t)s.st_size; /* "touch" the hit cache file so it's last for LRU now */ fd = open(cache, O_RDWR); if (fd >= 0) close(fd); return LWS_DISKCACHE_QUERY_EXISTS; } /* bots are too random to pollute the cache with their antics */ if (is_bot) return LWS_DISKCACHE_QUERY_NO_CACHE; /* let's create it first with a unique temp name */ lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(), extant_cache_len); *_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600); if (*_fd < 0) { /* well... ok... we will proceed without cache then... */ lwsl_notice("%s: Problem creating cache %s: errno %d\n", __func__, cache, errno); return LWS_DISKCACHE_QUERY_NO_CACHE; } return LWS_DISKCACHE_QUERY_CREATING; } int lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds) { return lds->secs_waiting; } /* * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from * the dirs under the cache dir. Since we don't need or want a full list of * files in there in memory at once, we restrict the linked-list size to * BATCH_COUNT entries, and once it is full, simply ignore any further files * that are newer than the newest one on that list. Files older than the * newest guy already on the list evict the newest guy already on the list * and are sorted into the correct order. In this way no matter the number * of files to be processed the memory requirement is fixed at BATCH_COUNT * struct file_entry-s. * * The oldest subset of BATCH_COUNT files are sorted into the cd->batch * allocation in more recent -> least recent order. * * We want to track the total size of all files we saw as well, so we know if * we need to actually do anything yet to restrict how much space it's taking * up. * * And we want to do those things statefully and incrementally instead of one * big atomic operation, since the user may want a huge cache, so we look in * one cache dir at a time and track state in the repodir struct. * * When we have seen everything, we add the doubly-linked prev pointers and then * if we are over the limit, start deleting up to BATCH_COUNT files working back * from the end. */ int lws_diskcache_trim(struct lws_diskcache_scan *lds) { size_t cache_size_limit = (size_t)lds->cache_size_limit; char dirpath[132], filepath[132 + 32]; lws_list_ptr lp, op = NULL; int files_trimmed = 0; struct file_entry *p; int fd, n, ret = -1; size_t trimmed = 0; struct dirent *de; struct stat s; DIR *dir; if (!lds->cache_subdir) { if (lds->last_scan_completed + lds->secs_waiting > time(NULL)) return 0; lds->batch = lws_malloc(sizeof(struct file_entry) * BATCH_COUNT, "cache_trim"); if (!lds->batch) { lwsl_err("%s: OOM\n", __func__); return 1; } lds->agg_size = 0; lds->head = NULL; lds->batch_in_use = 0; lds->agg_file_count = 0; } lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c", lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15], hex[lds->cache_subdir & 15]); dir = opendir(dirpath); if (!dir) { lwsl_err("Unable to walk repo dir '%s'\n", lds->cache_dir_base); return -1; } do { de = readdir(dir); if (!de) break; if (de->d_type != DT_REG) continue; lds->agg_file_count++; lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath, de->d_name); fd = open(filepath, O_RDONLY); if (fd < 0) { lwsl_err("%s: cannot open %s\n", __func__, filepath); continue; } n = fstat(fd, &s); close(fd); if (n) { lwsl_notice("%s: cannot stat %s\n", __func__, filepath); continue; } lds->agg_size += (uint64_t)s.st_size; if (lds->batch_in_use == BATCH_COUNT) { /* * once we filled up the batch with candidates, we don't * need to consider any files newer than the newest guy * on the list... */ if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime) continue; /* * ... and if we find an older file later, we know it * will be replacing the newest guy on the list, so use * that directly... */ p = lds->head; lds->head = p->sorted; } else /* we are still accepting anything to fill the batch */ p = &lds->batch[lds->batch_in_use++]; p->sorted = NULL; strncpy(p->name, de->d_name, sizeof(p->name) - 1); p->name[sizeof(p->name) - 1] = '\0'; p->modified = s.st_mtime; p->size = (size_t)s.st_size; lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort); } while (de); ret = 0; lds->cache_subdir++; if (lds->cache_subdir != 0x100) goto done; /* we completed the whole scan... */ /* if really no guidence, then 256MiB */ if (!cache_size_limit) cache_size_limit = 256 * 1024 * 1024; if (lds->agg_size > cache_size_limit) { /* apply prev pointers to make the list doubly-linked */ lp = lds->head; while (lp) { p = lp_to_fe(lp, sorted); p->prev = op; op = &p->prev; lp = p->sorted; } /* * reverse the list (start from tail, now traverse using * .prev)... it's oldest-first now... */ lp = op; while (lp && lds->agg_size > cache_size_limit) { p = lp_to_fe(lp, prev); lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s", lds->cache_dir_base, p->name[0], p->name[1], p->name); if (!unlink(filepath)) { lds->agg_size -= p->size; trimmed += p->size; files_trimmed++; } else lwsl_notice("%s: Failed to unlink %s\n", __func__, filepath); lp = p->prev; } if (files_trimmed) lwsl_notice("%s: %s: trimmed %d files totalling " "%lldKib, leaving %lldMiB\n", __func__, lds->cache_dir_base, files_trimmed, ((unsigned long long)trimmed) / KIB, ((unsigned long long)lds->agg_size) / MIB); } if (lds->agg_size && lds->agg_file_count) lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count; /* * estimate how long we can go before scanning again... default we need * to start again immediately */ lds->last_scan_completed = time(NULL); lds->secs_waiting = 1; if (lds->agg_size < cache_size_limit) { uint64_t avg = 4096, capacity, projected; /* let's use 80% of the real average for margin */ if (lds->agg_size && lds->agg_file_count) avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10; /* * if we collected BATCH_COUNT files of the average size, * how much can we clean up in 256s? */ capacity = avg * BATCH_COUNT; /* * if the cache grew by 10%, would we hit the limit even then? */ projected = (lds->agg_size * 11) / 10; if (projected < cache_size_limit) /* no... */ lds->secs_waiting = (int)((256 / 2) * ((cache_size_limit - projected) / capacity)); /* * large waits imply we may not have enough info yet, so * check once an hour at least. */ if (lds->secs_waiting > 3600) lds->secs_waiting = 3600; } else lds->secs_waiting = 0; lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__, lds->cache_dir_base, (unsigned long long)lds->agg_size / KIB, (unsigned long long)cache_size_limit / KIB, lds->secs_waiting); lws_free(lds->batch); lds->batch = NULL; lds->cache_subdir = 0; done: closedir(dir); return ret; }