/* * libwebsockets - small server side websockets and web server implementation * * Copyright (C) 2010 - 2022 Andy Green * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Secure Streams / OTA * * In the interests of minimizing heap usage, OTA SS is only existing during * update checks, update bulk data download, and OTA storage. Checks are * initiated by cx->sul_ota_periodic which is triggered at OPERATIONAL and then * periodically as set in system_ops->ota_ops->ota_periodic_check_secs. */ #include "private-lib-core.h" static const char * const ota_pub_jwk = LWS_OTA_PUBLIC_JWK; /* This is a string that is unique to the build type / application... we use * it to make sure that we are updating to the same kind of build... */ const char *lws_ota_variant = LWS_OTA_VARIANT; static void ota_write_sul_cb(lws_sorted_usec_list_t *sul) { lws_ota_t *g = lws_container_of(sul, lws_ota_t, sul_drain); /* we use this to retry entering modal */ if (g->state == LWSOS_AWAITING_MODAL) { const lws_ota_ops_t *ota_ops = &g->cx->system_ops->ota_ops; /* * Ask the user code to move to AWAITING_MODAL_UPDATING which it * should agree to... and then MODAL_UPDATING whereit may choose * to indicate it can't stop what it's doing right now. */ lws_state_transition(&g->cx->mgr_system, LWS_SYSTATE_AWAITING_MODAL_UPDATING); lws_state_transition(&g->cx->mgr_system, LWS_SYSTATE_MODAL_UPDATING); if (g->cx->mgr_system.state != LWS_SYSTATE_MODAL_UPDATING) { /* * Something decided we can't do the update right now, eg, * he's busy rendering something that would exhause the heap * if we also tried to get on with the update. * * Let's try again in 1s, up to a timeout. */ lwsl_ss_warn(g->ss, "Scheduling update mode retry"); lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, LWS_US_PER_SEC); return; } /* we can go ahead now, the system is in the update mode */ g->state = LWSOS_FETCHING; /* prep the gzip stream decompression */ g->inflate = lws_upng_inflator_create(&g->outring, &g->outringlen, &g->opl, &g->cl); if (!g->inflate) { lwsl_err("%s: zlib init failed\n", __func__); goto update_impossible; } g->state = LWSOS_FETCHING_INITED_GZ; /* prep the hash computation of the decompressed data */ if (lws_genhash_init(&g->ctx, LWS_GENHASH_TYPE_SHA512)) { lwsl_err("%s: hash init failed\n", __func__); goto update_impossible; } g->state = LWSOS_FETCHING_INITED_GZ_HASH; /* we don't want to create a dupe of ourselves while * we're busy doing the OTA */ lws_sul_cancel(&g->cx->sul_ota_periodic); lwsl_warn("%s: platform ota start\n", __func__); /* continues asynchronously */ if (ota_ops->ota_start(g)) { lwsl_err("%s: ota_start failed\n", __func__); goto update_impossible; } return; update_impossible: g->state = LWSOS_FAILED; lws_ss_start_timeout(g->ss, 1); return; } if (*((volatile lws_ota_async_t *)&g->async_last)) { /* * The task is busy, we can't start anything atm. When it * is finished, the write completion will come back here. */ // lwsl_notice("%s: async_last busy\n", __func__); return; } /* * We have a chance to write the next chunk... let's stage g->buf with * as much inflated data as we can with what we have to hand, and set it * writing */ g->buf_len = 0; while (g->buf_len < sizeof(g->buf) - 8 && g->seen + g->buf_len < g->expected_size) { lws_stateful_ret_t sr = 0; size_t os, part; /* inflator pauses for WANT_OUTPUT after this many bytes out */ g->inflate->bypl = sizeof(g->buf) - g->buf_len - 1; if (*g->opl == *g->cl) { /* No output pending.. do we have unused input left? */ if (g->flow.len) { /* * There's some input already available, * let's process that and see if it helped */ sr = lws_upng_inflate_data(g->inflate, NULL, 0); if (sr & LWS_SRET_FATAL) { lwsl_ss_err(g->ss, "inflate error 1"); goto fail; } g->flow.len = g->inflate->inlen - (g->inflate->bp >> 3); } if (*g->opl == *g->cl) { /* * Still no output available... let's * attempt to move to the next */ lws_flow_req(&g->flow); if (!g->flow.len) break; sr = lws_upng_inflate_data(g->inflate, g->flow.data, g->flow.len); g->flow.len = g->inflate->inlen - (g->inflate->bp >> 3); } } /* there is already output pending */ if (sr & LWS_SRET_FATAL) { lwsl_ss_err(g->ss, "inflate error %d", sr & 0xff); goto fail; } os = ((*g->opl - g->old_op) % g->outringlen); if (os > sizeof(g->buf) - g->buf_len) os = sizeof(g->buf) - g->buf_len; if (!os) { lwsl_err("%s: Nothing to compose in\n", __func__); break; } part = os; if (*g->opl % g->outringlen < g->old_op) part = g->outringlen - g->old_op; memcpy(g->buf + g->buf_len, g->outring + g->old_op, part); g->buf_len += part; if (part != os) { memcpy(g->buf + g->buf_len, g->outring, os - part); g->buf_len += os - part; } g->old_op = *g->opl % g->outringlen; *g->cl += os; } /* while try to fill the staging buffer */ if (!g->buf_len) /* no ammo to work with... we will come back next time we * get some rx */ return; g->seen += g->buf_len; if (g->seen > g->expected_size) { lwsl_ss_err(g->ss, "oversize payload"); goto fail; } /* let's track the hash as we get it */ if (lws_genhash_update(&g->ctx, g->buf, g->buf_len)) { lwsl_ss_err(g->ss, "hash update failed"); goto fail; } if (g->seen == g->expected_size) { char temp[64]; lws_upng_inflator_destroy(&g->inflate); lws_genhash_destroy(&g->ctx, temp); if (memcmp(temp, g->sha512, sizeof(temp))) { lwsl_err("%s: payload hash differs\n", __func__); goto fail; } } g->cx->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_WRITE); return; fail: g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; lws_ss_cx_from_user(g)->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_ABORT); } static void ota_completion_start(lws_ota_t *g) { if (g->async_r != LWSOTARET_OK) { lwsl_ss_err(g->ss, "OTA START FAILED r %d", g->async_r); g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; lws_ss_cx_from_user(g)->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_ABORT); return; } /* we can start writing now */ g->ota_start_done = 1; g->state = LWSOS_STARTED; if (lws_ss_client_connect(lws_ss_from_user(g))) lwsl_ss_warn(g->ss, "reconn failed"); lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); } static void ota_completion_write(lws_ota_t *g) { const lws_ota_ops_t *ota_ops = &g->cx->system_ops->ota_ops; uint8_t pc; if (g->async_r != LWSOTARET_OK) { lwsl_ss_err(g->ss, "r %d", g->async_r); g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; lws_ss_cx_from_user(g)->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_ABORT); return; } g->written += g->buf_len; pc = (uint8_t)((g->written * 100) / g->expected_size); if (pc != g->last_pc) { g->last_pc = pc; lwsl_notice("%s: %u%%\n", __func__, pc); if (ota_ops->ota_progress) g->cx->system_ops->ota_ops.ota_progress(LWSOTARET_PROGRESS, pc); } if (g->written != g->expected_size) { lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); return; } /* We have completed writing the last part */ lwsl_warn("%s: finalizing good ota\n", __func__); g->cx->system_ops->ota_ops.ota_queue(g, LWS_OTA_ASYNC_FINALIZE); } static void ota_completion_finalize(lws_ota_t *g) { lwsl_notice("%s: %d\n", __func__, g->async_r); if (g->async_r) return; g->cx->system_ops->reboot(); } static void ota_completion_abort(lws_ota_t *g) { int secs = 0; if (g->cx->system_ops && g->cx->system_ops->ota_ops.ota_periodic_check_secs) secs = g->cx->system_ops->ota_ops.ota_periodic_check_secs; /* return from modal update state */ lws_state_transition(&g->cx->mgr_system, LWS_SYSTATE_OPERATIONAL); /* we've had it */ lws_ss_start_timeout(g->ss, 1); lws_sul_schedule(g->cx, 0, &g->cx->sul_ota_periodic, lws_ota_periodic_cb, secs ? secs * LWS_US_PER_SEC : 24 * 3600 * LWS_US_PER_SEC); } static lws_ss_state_return_t ota_rx(void *userobj, const uint8_t *in, size_t len, int flags) { lws_ss_state_return_t r = LWSSSSRET_DISCONNECT_ME; lws_ota_t *g = (lws_ota_t *)userobj; const lws_ota_ops_t *ota_ops = &lws_ss_cx_from_user(g)->system_ops->ota_ops; struct lws_jws_map map; struct lws_jwk jwk; uint64_t fw_last; char temp[1024]; int temp_len = sizeof(temp); const char *p; size_t alen; int n; if (g->state >= LWSOS_FETCHING) { lwsl_info("%s: fetching %u, fl 0x%02X\n", __func__, (unsigned int)len, flags); /* * We are decompressing, checking and flashing the image. * * g->flow and its buflist is managing COMPRESSED data from the * network according to g->flow.window limit. Rx events are * tiggered by tx credit manipulation from, and coming to * service g->flow / buflist state ONLY and do not know or care * about direct inflator state (it makes itself felt by using * g->flow data in the write completion). * * The inflator may not need any g->flow data to produce output, * or it may need all of it and more before it can produce * output, or somewhere in the middle. At the output side, we * have a fixed-size staging buffer so we may need to come back * to issue more inflated data without any network event * triggering it. */ if (flags & LWSSS_FLAG_SOM) { g->state = LWSOS_WRITING; g->flow.state = LWSDLOFLOW_STATE_READ; g->flow.h = g->ss; g->flow.window = 4096; if (ota_ops->ota_progress) ota_ops->ota_progress(LWSOTARET_PROGRESS, 0); } if (len && lws_buflist_append_segment(&g->flow.bl, in, len) < 0) { lwsl_ss_err(g->ss, "OOM"); goto fetch_fail; } lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); if (flags & LWSSS_FLAG_EOM) /* * This was the last part, so there is no more new data * in flight */ g->flow.state = (uint8_t)LWSDLOFLOW_STATE_READ_COMPLETED; return LWSSSSRET_OK; fetch_fail: g->flow.state = LWSDLOFLOW_STATE_READ_FAILED; return LWSSSSRET_DISCONNECT_ME; } /* we are collecting the manifest... */ if (g->pos + len > sizeof(g->buf)) return LWSSSSRET_DISCONNECT_ME; memcpy(g->buf + g->pos, in, len); g->pos += len; if ((flags & LWSSS_FLAG_EOM) != LWSSS_FLAG_EOM) return LWSSSSRET_OK; /* we want to validate the JWS manifest against our public JWK */ if (lws_jwk_import(&jwk, NULL, NULL, ota_pub_jwk, strlen(ota_pub_jwk))) { lwsl_err("%s: unable to import jwk\n", __func__); return LWSSSSRET_DISCONNECT_ME; } /* Step 1... is the JWS signed by the required key? */ if (lws_jws_sig_confirm_compact_b64(g->buf, g->pos, &map, &jwk, lws_ss_cx_from_user(g), temp, &temp_len)) { lwsl_err("%s: manifest failed sig check\n", __func__); goto bail; } /* finished with the jwk */ lws_jwk_destroy(&jwk); /* Step 2... the JOSE and payload sections are there, right? */ if (!map.buf[LJWS_JOSE] || !map.buf[LJWS_PYLD]) { lwsl_err("%s: no JOSE block\n", __func__); goto bail1; } /* Step 3... do we agree the signing alg is secure enough? */ p = lws_json_simple_find(map.buf[LJWS_JOSE], map.len[LJWS_JOSE], "\"alg\":", &alen); if (!p) { lwsl_err("%s: no alg\n", __func__); goto bail1; } if (strncmp("ES512", p, alen)) { lwsl_err("%s: bad alg %.*s %d\n", __func__, (int)alen, p, (int)alen); goto bail1; } /* * We trust that the manifest was robustly signed by the key we like, * let's parse out the pieces we care about and validate the firmware is * the same variant build as we're currently running, and, eg, we're not * being given a validly-signed real firmware from the wrong variant, * that will brick us. */ lwsl_hexdump_notice(map.buf[LJWS_PYLD], map.len[LJWS_PYLD]); lwsl_notice("%s: JWS validated okay\n", __func__); p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], "\"variant\":", &alen); if (!p || strncmp(lws_ota_variant, p, alen)) { lwsl_err("%s: wrong variant %.*s\n", __func__, (int)alen, p); goto bail1; } /* * We liked the manifest, prepare to go again targeting the payload * that the manifest described to us. */ p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], "\"path\":", &alen); if (!p) { lwsl_err("%s: no path\n", __func__); goto bail1; } lws_strnncpy(g->file, p, alen, sizeof(g->file)); if (lws_ss_set_metadata(lws_ss_from_user(g), "file", g->file, alen)) { lwsl_err("%s: failed to set firmware file %s\n", __func__, LWS_OTA_VARIANT); return LWSSSSRET_DISCONNECT_ME; } p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], "\"size\":", &alen); if (!p) { lwsl_err("%s: no size\n", __func__); goto bail1; } g->expected_size = (size_t)atoll(p); p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], "\"unixtime\":", &alen); if (!p) { lwsl_err("%s: no unxitime\n", __func__); goto bail1; } g->unixtime = (uint64_t)atoll(p); p = lws_json_simple_find(map.buf[LJWS_PYLD], map.len[LJWS_PYLD], "\"sha512\":", &alen); if (!p) { lwsl_err("%s: no hash\n", __func__); goto bail1; } n = lws_hex_len_to_byte_array(p, alen, g->sha512, sizeof(g->sha512)); if (n != sizeof(g->sha512)) { lwsl_err("%s: bad hash %d %u %s\n", __func__, n, (unsigned int)alen, p); goto bail1; } /* * So... is it newer? */ if (!ota_ops->ota_get_last_fw_unixtime(&fw_last) && g->unixtime <= fw_last) { /* * We don't actually want this... */ lwsl_ss_warn(g->ss, "Latest update is not newer"); return LWSSSSRET_DISCONNECT_ME; } /* ... this is something that we like the look of... schedule trying * to enter LWS_SYSTATE_MODAL_UPDATING state after this, and retry if * we don't get there immediately */ g->state = LWSOS_AWAITING_MODAL; lws_sul_schedule(g->cx, 0, &g->sul_drain, ota_write_sul_cb, 1); /* on the other hand, don't let it keep trying forever */ lws_ss_start_timeout(g->ss, 30000); /* * We will DISCONNECT shortly, we won't proceed to the update image * download unless we can agree with the user code to enter MODAL_ * UPDATING within a timeout. Otherwise we will give up and retry * after 24h or whatever. */ return LWSSSSRET_OK; bail: lws_jwk_destroy(&jwk); bail1: return r; } static lws_ss_state_return_t ota_state(void *userobj, void *h_src, lws_ss_constate_t state, lws_ss_tx_ordinal_t ack) { lws_ota_t *g = (lws_ota_t *)userobj; int n; switch ((int)state) { case LWSSSCS_CREATING: /* start the transaction as soon as we exist */ g->cx = lws_ss_cx_from_user(g); g->cx->ota_ss = g->ss; g->state = LWSOS_CHECKING; if (lws_ss_set_metadata(lws_ss_from_user(g), "ota_variant", LWS_OTA_VARIANT, strlen(LWS_OTA_VARIANT))) { lwsl_err("%s: failed to set ota_variant %s\n", __func__, LWS_OTA_VARIANT); return LWSSSSRET_DISCONNECT_ME; } if (lws_ss_set_metadata(lws_ss_from_user(g), "file", "manifest.jws", 12)) { lwsl_err("%s: failed to set ota_variant %s\n", __func__, LWS_OTA_VARIANT); return LWSSSSRET_DISCONNECT_ME; } return lws_ss_client_connect(lws_ss_from_user(g)); case LWSSSCS_DISCONNECTED: /* * We have two kinds of connection that may disconnect, the * manifest fetch, and the firmware fetch. */ switch (g->state) { case LWSOS_FETCHING_INITED_GZ_HASH: case LWSOS_FETCHING: return LWSSSSRET_OK; case LWSOS_WRITING: /* * The network part of fetching the update image is * over. If it didn't fail, we need to stick around and * let it either finish / writing and finalizing, or * timeout. */ lwsl_notice("%s: draining\n", __func__); lws_ss_start_timeout(g->ss, 45000); return LWSSSSRET_OK; case LWSOS_AWAITING_MODAL: /* * We might have to wait a bit to find a good moment to * enter the update mode. If we disconnect * inbetweentimes, it's OK. */ return LWSSSSRET_OK; default: lwsl_notice("%s: state %d, DESTROYING\n", __func__, g->state); return LWSSSSRET_DESTROY_ME; } case LWSSSCS_DESTROYING: /* we only live for one ota check / fetch */ lws_ss_cx_from_user(g)->ota_ss = NULL; lws_buflist_destroy_all_segments(&g->flow.bl); lws_sul_cancel(&g->sul_drain); if (g->state == LWSOS_FETCHING_INITED_GZ_HASH) lws_genhash_destroy(&g->ctx, NULL); if (g->state >= LWSOS_FETCHING_INITED_GZ && g->state < LWSOS_FINALIZING) lws_upng_inflator_destroy(&g->inflate); return LWSSSSRET_OK; case LWSSSCS_TIMEOUT: lwsl_err("%s: timeout\n", __func__); return LWSSSSRET_DESTROY_ME; case LWSSSCS_EVENT_WAIT_CANCELLED: /* We may have a completion */ if (g->async_completed) { g->async_completed = 0; n = g->async_last; *((volatile lws_ota_async_t *)&g->async_last) = 0; switch (n) { case LWS_OTA_ASYNC_START: ota_completion_start(g); break; case LWS_OTA_ASYNC_WRITE: ota_completion_write(g); break; /* EVENT_WAIT_CANCELLED doesn't deal with returns */ case LWS_OTA_ASYNC_ABORT: /* let's forget about it then */ lws_ss_start_timeout(g->ss, 1); ota_completion_abort(g); break; case LWS_OTA_ASYNC_FINALIZE: lws_ss_start_timeout(g->ss, 5000); ota_completion_finalize(g); break; } } break; } return LWSSSSRET_OK; } static LWS_SS_INFO("ota", lws_ota_t) .rx = ota_rx, .state = ota_state, .manual_initial_tx_credit = sizeof(((lws_ota_t *)NULL)->buf), }; /* * Creates the SS and kicks off the manifest check */ void lws_ota_periodic_cb(lws_sorted_usec_list_t *sul) { struct lws_context *cx = lws_container_of(sul, struct lws_context, sul_ota_periodic); int secs = 0; if (cx->system_ops && cx->system_ops->ota_ops.ota_periodic_check_secs) secs = cx->system_ops->ota_ops.ota_periodic_check_secs; lwsl_notice("%s\n", __func__); if (lws_ss_create(cx, 0, &ssi_lws_ota_t, NULL, NULL, NULL, NULL)) lwsl_cx_warn(cx, "failed to create ota SS"); /* set up coming back again at (usually long) periods */ lws_sul_schedule(cx, 0, sul, lws_ota_periodic_cb, secs ? secs * LWS_US_PER_SEC : 24 * 3600 * LWS_US_PER_SEC); } const char * lws_ota_variant_name(void) { return lws_ota_variant; }