diff --git a/cdoc/CDoc1Reader.cpp b/cdoc/CDoc1Reader.cpp index 626e70c9..cb724f0d 100644 --- a/cdoc/CDoc1Reader.cpp +++ b/cdoc/CDoc1Reader.cpp @@ -25,6 +25,9 @@ #include "Lock.h" #include "Utils.h" #include "ZStream.h" +#include "utils/memory.h" + +#include #include #include @@ -110,14 +113,57 @@ CDoc1Reader::getFMK(std::vector& fmk, unsigned int lock_idx) if (lock_idx >= d->locks.size()) return libcdoc::WRONG_ARGUMENTS; const Lock &lock = d->locks.at(lock_idx); setLastError({}); + + // Determine the FMK length from the container's body cipher. The CDoc1 + // body uses AES-128/192/256 in CBC or GCM mode, so the FMK is 16, 24 + // or 32 bytes long. We pin this length up-front and pass it to the RSA + // decrypt path so that an attacker observing this function cannot + // distinguish between + // (a) RSA padding failed + // (b) RSA padding succeeded but the resulting length was wrong + // (c) a wholly different recipient was used to derive a wrong key. + // + // All three cases must look the same: the function returns OK with a + // candidate FMK of the right length, and the eventual AES decrypt at + // the container body level either authenticates that FMK (success) or + // rejects it. CDoc1 has no header HMAC, so the AES-GCM tag is the + // only bit of authentication we can rely on. AES-CBC containers + // therefore retain a residual oracle (PKCS#7 stripping); using GCM + // when re-encrypting with libcdoc is strongly preferred. + size_t expected_fmk_len = 0; + if (const EVP_CIPHER *c = libcdoc::Crypto::cipher(d->method); c) { + expected_fmk_len = size_t(EVP_CIPHER_key_length(c)); + } + if (expected_fmk_len != 16 && expected_fmk_len != 24 && expected_fmk_len != 32) { + // Method-level error - independent of key bits, so does NOT feed + // an oracle. + setLastError("Failed to derive FMK"); + LOG_ERROR("Unsupported CDoc1 encryption method: {}", d->method); + return libcdoc::CRYPTO_ERROR; + } + + // From this point on, every error path returns the SAME error code and + // SAME last-error string, so that the only bit of information leaking + // back to the caller is "this lock did/did not produce a usable FMK". + constexpr auto FAIL_MSG = "Failed to derive FMK"; + if (lock.isRSA()) { + // If OAEP = false and and fmk.size() != 0, the decryptRSA always + // returns OK with synthetic bytes on padding failure; only a + // fundamental error (e.g. ct size mismatch with modulus) yields a non-OK result. + fmk.resize(expected_fmk_len); int result = crypto->decryptRSA(fmk, lock.encrypted_fmk, false, lock_idx); - if (result < 0) { - setLastError(crypto->getLastErrorStr(result)); + if (result != libcdoc::OK) { + libcdoc::cleanse(fmk); + fmk.clear(); + setLastError(FAIL_MSG); LOG_ERROR("{}", last_error); - return libcdoc::CRYPTO_ERROR; - } - } else { + return libcdoc::CRYPTO_ERROR; + } + // Even on "OK" the contents may be synthetic - that is the point. + // The downstream AES decrypt at the body level is what tells + // success from failure. + } else { std::vector key; int result = crypto->deriveConcatKDF(key, lock.getBytes(Lock::Params::KEY_MATERIAL), @@ -126,18 +172,31 @@ CDoc1Reader::getFMK(std::vector& fmk, unsigned int lock_idx) lock.getBytes(Lock::Params::PARTY_UINFO), lock.getBytes(Lock::Params::PARTY_VINFO), lock_idx); - if (result < 0) { - setLastError(crypto->getLastErrorStr(result)); + if (result < 0) { + libcdoc::cleanse(key); + setLastError(FAIL_MSG); LOG_ERROR("{}", last_error); - return libcdoc::CRYPTO_ERROR; - } + return libcdoc::CRYPTO_ERROR; + } fmk = libcdoc::Crypto::AESWrap(key, lock.encrypted_fmk, false); - } - if (fmk.empty()) { - setLastError("Failed to decrypt/derive fmk"); + libcdoc::cleanse(key); + // AESWrap returns {} on failure. Pad the candidate to expected + // length so the failure shape matches the RSA path; the bytes + // are arbitrary because the body decrypt is going to reject + // them anyway. + if (fmk.size() != expected_fmk_len) { + libcdoc::cleanse(fmk); + fmk.assign(expected_fmk_len, 0); + } + } + + if (fmk.size() != expected_fmk_len) { + libcdoc::cleanse(fmk); + fmk.clear(); + setLastError(FAIL_MSG); LOG_ERROR("{}", last_error); - return libcdoc::CRYPTO_ERROR; - } + return libcdoc::CRYPTO_ERROR; + } return libcdoc::OK; } @@ -382,18 +441,47 @@ result_t CDoc1Reader::decryptData(const std::vector& fmk, setLastError("Failed to decode base64 data"); return libcdoc::IO_ERROR; } + + // Treat any post-FMK decrypt error - including AES-CBC PKCS#7 stripping + // failures and AES-GCM tag mismatches - as the same "container body + // decrypt failed" event. This is the single bit of information an + // attacker can extract per submission of a tampered CDoc1, and we + // rate-limit it. A per-process exponential backoff turns a remote + // Bleichenbacher campaign of 2^20+ queries into hours/days of + // wall-clock cost without penalising legitimate single-shot use. + constexpr auto THROTTLE_SCOPE = "cdoc1-rsa-decrypt"; + auto report_failure = [&]{ + libcdoc::Crypto::rsaOracleThrottleOnFailure(THROTTLE_SCOPE); + }; + VectorSource src(b64); libcdoc::DecryptionSource dec(src, d->method, fmk); if(dec.isError()) { - setLastError("Failed to decrypt data, verify if FMK is correct"); + setLastError("Failed to decrypt data"); + report_failure(); return CRYPTO_ERROR; } + libcdoc::result_t inner_rv = libcdoc::OK; if (d->mime == MIME_ZLIB) { libcdoc::ZSource zsrc(&dec); - if(auto rv = f(zsrc, d->properties["OriginalMimeType"]); rv < OK) - return rv; + inner_rv = f(zsrc, d->properties["OriginalMimeType"]); + } else { + inner_rv = f(dec, d->mime); + } + if (inner_rv < OK) { + // Body parse/decrypt failure. Could be a real I/O glitch, or a + // tampered container - we cannot tell, and on principle we treat + // both alike to deny the attacker a distinguisher. + setLastError("Failed to decrypt data"); + report_failure(); + return inner_rv; } - else if(auto rv = f(dec, d->mime); rv < OK) - return rv; - return dec.close(); + libcdoc::result_t close_rv = dec.close(); + if (close_rv != libcdoc::OK) { + setLastError("Failed to decrypt data"); + report_failure(); + return close_rv; + } + libcdoc::Crypto::rsaOracleThrottleOnSuccess(THROTTLE_SCOPE); + return libcdoc::OK; } diff --git a/cdoc/CDoc2Reader.cpp b/cdoc/CDoc2Reader.cpp index 7b3992ac..caf6a75b 100644 --- a/cdoc/CDoc2Reader.cpp +++ b/cdoc/CDoc2Reader.cpp @@ -32,6 +32,7 @@ #include "header_generated.h" +// TODO: Port to new OpenSSL #define OPENSSL_SUPPRESS_DEPRECATED #include @@ -141,13 +142,21 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) LOG_DBG("CDoc2Reader::num locks: {}", priv->locks.size()); const Lock& lock = priv->locks.at(lock_idx); LOG_DBG("Label: {}", lock.label); + + // RAII-cleanse `kek` on every exit from this function (including + // exceptions). All early returns below previously had to remember to + // call libcdoc::cleanse(kek) - which several of them did not. With the + // guard the wipe is unconditional. std::vector kek; + libcdoc::Cleanser kek_guard(kek); + if (lock.type == Lock::Type::PASSWORD) { // Password LOG_DBG("password"); std::string info_str = libcdoc::CDoc2::getSaltForExpand(lock.label); LOG_DBG("info: {}", toHex(info_str)); std::vector kek_pm; + libcdoc::Cleanser kek_pm_guard(kek_pm); if (auto rv = crypto->extractHKDF(kek_pm, lock.getBytes(Lock::SALT), lock.getBytes(Lock::PW_SALT), lock.getInt(Lock::KDF_ITER), lock_idx); rv != libcdoc::OK) { setLastError(crypto->getLastErrorStr(rv)); LOG_ERROR("{}", last_error); @@ -162,6 +171,7 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) std::string info_str = libcdoc::CDoc2::getSaltForExpand(lock.label); LOG_DBG("info: {}", toHex(info_str)); std::vector kek_pm; + libcdoc::Cleanser kek_pm_guard(kek_pm); if (auto rv = crypto->extractHKDF(kek_pm, lock.getBytes(Lock::SALT), {}, 0, lock_idx); rv != libcdoc::OK) { setLastError(crypto->getLastErrorStr(rv)); LOG_ERROR("{}", last_error); @@ -173,6 +183,10 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) } else if ((lock.type == Lock::Type::PUBLIC_KEY) || (lock.type == Lock::Type::SERVER)) { // Public/private key std::vector key_material; + // SERVER path fetches key_material over the network; PUBLIC_KEY + // takes it from the lock. Either way it gets fed into ECDH or RSA + // and is sensitive enough to wipe in-scope. + libcdoc::Cleanser key_material_guard(key_material); if(lock.type == Lock::Type::SERVER) { if(!conf) { setLastError("Configuration is missing"); @@ -201,8 +215,8 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) key_material = lock.getBytes(Lock::Params::KEY_MATERIAL); } - LOG_DBG("Public key: {}", toHex(lock.getBytes(Lock::Params::RCPT_KEY))); - LOG_DBG("Key material: {}", toHex(key_material)); + LOG_TRACE_KEY("Public key: {}", lock.getBytes(Lock::Params::RCPT_KEY)); + LOG_TRACE_KEY("Key material: {}", key_material); if (lock.isRSA()) { int result = crypto->decryptRSA(kek, key_material, true, lock_idx); @@ -213,6 +227,7 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) } } else { std::vector kek_pm; + libcdoc::Cleanser kek_pm_guard(kek_pm); int result = crypto->deriveHMACExtract(kek_pm, key_material, toUint8Vector(libcdoc::CDoc2::KEKPREMASTER), lock_idx); if (result < 0) { setLastError(crypto->getLastErrorStr(result)); @@ -317,6 +332,10 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) LOG_ERROR("Cannot fetch share {}", i); return result; } + // Each individual share is itself sensitive: combined with the + // remaining shares it reconstructs the KEK. Wipe it after + // XOR-ing it into kek so it does not linger on the heap. + libcdoc::Cleanser share_guard(share.share); if (auto err = libcdoc::Crypto::xor_data(kek, kek, share.share); err != libcdoc::OK) { setLastError("Failed to derive kek"); LOG_ERROR("Failed to derive kek"); @@ -341,18 +360,27 @@ CDoc2Reader::getFMK(std::vector& fmk, unsigned int lock_idx) if (auto err = libcdoc::Crypto::xor_data(fmk, lock.encrypted_fmk, kek); err != libcdoc::OK) { setLastError(t_("Failed to decrypt/derive fmk")); LOG_ERROR("{}", last_error); + // Wipe any partial XOR result before surfacing the error. + libcdoc::cleanse(fmk); + fmk.clear(); return err; } std::vector hhk = libcdoc::Crypto::expand(fmk, libcdoc::CDoc2::HMAC); + libcdoc::Cleanser hhk_guard(hhk); LOG_TRACE_KEY("xor: {}", lock.encrypted_fmk); LOG_TRACE_KEY("fmk: {}", fmk); LOG_TRACE_KEY("hhk: {}", hhk); LOG_TRACE_KEY("hmac: {}", priv->headerHMAC); - if(libcdoc::Crypto::sign_hmac(hhk, priv->header_data) != priv->headerHMAC) { + if(!libcdoc::constant_time_compare(libcdoc::Crypto::sign_hmac(hhk, priv->header_data), priv->headerHMAC)) { setLastError(t_("Wrong decryption key (user key)")); LOG_ERROR("{}", last_error); + // Authentication failed: the FMK we computed is for the wrong + // recipient. Wipe it before returning so the caller cannot leak + // it (e.g. via a logging hook that sees "fmk" in scope). + libcdoc::cleanse(fmk); + fmk.clear(); return libcdoc::WRONG_KEY; } setLastError({}); @@ -609,7 +637,7 @@ CDoc2Reader::Private::buildLock(Lock& lock, const cdoc20::header::RecipientRecor std::string urls = join(strs, ";"); LOG_DBG("Keyshare urls: {}", urls); std::vector salt = toUint8Vector(capsule->salt()); - LOG_DBG("Keyshare salt: {}", toHex(salt)); + LOG_TRACE_KEY("Keyshare salt: {}", salt); std::string recipient_id = capsule->recipient_id()->str(); LOG_DBG("Keyshare recipient id: {}", recipient_id); lock.type = Lock::SHARE_SERVER; @@ -647,7 +675,7 @@ CDoc2Reader::CDoc2Reader(libcdoc::DataSource *src, bool take_ownership) LOG_ERROR("{}", last_error); return; } - uint32_t header_len = (c[0] << 24) | (c[1] << 16) | c[2] << 8 | c[3]; + uint32_t header_len = (uint32_t(c[0]) << 24) | (uint32_t(c[1]) << 16) | uint32_t(c[2]) << 8 | c[3]; if (constexpr uint32_t MAX_LEN = (1 << 20); header_len > MAX_LEN) { LOG_ERROR("{}", last_error); return; diff --git a/cdoc/CDoc2Writer.cpp b/cdoc/CDoc2Writer.cpp index b2ebb455..e480d522 100644 --- a/cdoc/CDoc2Writer.cpp +++ b/cdoc/CDoc2Writer.cpp @@ -51,23 +51,23 @@ CDoc2Writer::writeHeader(const std::vector &recipients) if(auto rv = crypto->random(rnd, libcdoc::CDoc2::KEY_LEN); rv < 0) return rv; std::vector fmk = libcdoc::Crypto::extract(rnd, {libcdoc::CDoc2::SALT.cbegin(), libcdoc::CDoc2::SALT.cend()}); - std::fill(rnd.begin(), rnd.end(), 0); + libcdoc::cleanse(rnd); LOG_TRACE_KEY("fmk: {}", fmk); std::vector header; if(auto rv = buildHeader(header, recipients, fmk); rv < 0) { - std::fill(fmk.begin(), fmk.end(), 0); + libcdoc::cleanse(fmk); return rv; } auto hhk = libcdoc::Crypto::expand(fmk, libcdoc::CDoc2::HMAC); auto cek = libcdoc::Crypto::expand(fmk, libcdoc::CDoc2::CEK); - std::fill(fmk.begin(), fmk.end(), 0); + libcdoc::cleanse(fmk); LOG_TRACE_KEY("cek: {}", cek); LOG_TRACE_KEY("hhk: {}", hhk); std::vector headerHMAC = libcdoc::Crypto::sign_hmac(hhk, header); - std::fill(hhk.begin(), hhk.end(), 0); + libcdoc::cleanse(hhk); LOG_TRACE_KEY("hmac: {}", headerHMAC); uint32_t hs = uint32_t(header.size()); @@ -208,11 +208,23 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vector> fb_rcpts; + // xor_key is XOR(fmk, kek). It is published in the header as the + // "encrypted FMK" so it is not itself a long-term secret, but while in + // scope it is bitwise-paired with the secret KEK and we wipe it on + // exit anyway as a hygiene measure. std::vector xor_key; + libcdoc::Cleanser xor_key_guard(xor_key); + for (unsigned int rcpt_idx = 0; rcpt_idx < recipients.size(); rcpt_idx++) { const libcdoc::Recipient& rcpt = recipients.at(rcpt_idx); if (rcpt.isPKI()) { std::vector key_material, kek; + // Per-iteration RAII cleanse: even if FAIL(...) (which expands + // to `return fail(...);`) shortcuts the loop, both buffers are + // wiped during stack unwind. The same applies to all other + // iteration-local secrets below. + libcdoc::Cleanser key_material_guard(key_material); + libcdoc::Cleanser kek_guard(kek); std::string send_url; if(rcpt.isKeyServer()) { if(!conf) @@ -259,8 +271,10 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vector sharedSecret = libcdoc::Crypto::deriveSharedSecret(ephKey.get(), publicKey.get()); + libcdoc::Cleanser sharedSecret_guard(sharedSecret); key_material = libcdoc::Crypto::toPublicKeyDer(ephKey.get()); std::vector kekPm = libcdoc::Crypto::extract(sharedSecret, std::vector(libcdoc::CDoc2::KEKPREMASTER.cbegin(), libcdoc::CDoc2::KEKPREMASTER.cend())); + libcdoc::Cleanser kekPm_guard(kekPm); std::string info_str = libcdoc::CDoc2::getSaltForExpand(key_material, rcpt.rcpt_key); kek = libcdoc::Crypto::expand(kekPm, info_str, fmk.size()); @@ -290,6 +304,7 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vector kek_pm(libcdoc::CDoc2::KEY_LEN); + libcdoc::Cleanser kek_pm_guard(kek_pm); std::vector salt; int64_t result = crypto->random(salt, libcdoc::CDoc2::KEY_LEN); if (result < 0) @@ -302,6 +317,7 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vectorgetLastErrorStr(result), result); std::vector kek = libcdoc::Crypto::expand(kek_pm, info_str, libcdoc::CDoc2::KEY_LEN); + libcdoc::Cleanser kek_guard(kek); LOG_DBG("Label: {}", rcpt.label); LOG_DBG("KDF iter: {}", rcpt.kdf_iter); @@ -345,14 +361,18 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vector key_material; crypto->random(key_material, libcdoc::CDoc2::KEY_LEN); + // key_material is split-share-input material; wipe on exit. + libcdoc::Cleanser key_material_guard(key_material); //KEK_i_pm = HKDF_Extract(KeyMaterialSalt_i, KeyMaterial_i) std::vector kek_pm = libcdoc::Crypto::extract(key_material_salt, key_material); + libcdoc::Cleanser kek_pm_guard(kek_pm); // KEK_i = HKDF_Expand(KEK_i_pm, "CDOC2kek" + FMKEncryptionMethod + RecipientInfo_i, L) std::string info_str = std::string("CDOC2kek") + cdoc20::header::EnumNameFMKEncryptionMethod(cdoc20::header::FMKEncryptionMethod::XOR) + RecipientInfo_i; LOG_DBG("Info: {}", info_str); std::vector kek = libcdoc::Crypto::expand(kek_pm, info_str); + libcdoc::Cleanser kek_guard(kek); LOG_TRACE_KEY("kek: {}", kek); if (kek.empty()) return libcdoc::CRYPTO_ERROR; if (auto err = libcdoc::Crypto::xor_data(xor_key, fmk, kek); err != libcdoc::OK) @@ -361,6 +381,18 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vector> kek_shares(N_SHARES); + // Each individual share is itself sensitive: combined with the + // remaining shares it reconstructs KEK_i (and thus the FMK). + // Wipe every kek_shares[i] on exit. The lambda runs from the + // destructor of `shares_guard` regardless of how we leave + // scope (early return, exception). + struct KekSharesCleanser { + std::vector>& v; + ~KekSharesCleanser() noexcept { + for (auto &s : v) libcdoc::cleanse(s); + } + } shares_guard{kek_shares}; + for (int i = 1; i < N_SHARES; i++) { // KEK_i_share_j = CSRNG(256) crypto->random(kek_shares[i], libcdoc::CDoc2::KEY_LEN); @@ -378,7 +410,7 @@ CDoc2Writer::buildHeader(std::vector& header, const std::vector> transaction_ids(N_SHARES); for (int i = 0; i < N_SHARES; i++) { std::string send_url = urls[i]; - LOG_DBG("Sending share: {} {} {}", i, send_url, libcdoc::toHex(kek_shares[i])); + LOG_TRACE("Sending share: {} {} {}", i, send_url, libcdoc::toHex(kek_shares[i])); int result = network->sendShare(transaction_ids[i], send_url, RecipientInfo_i, kek_shares[i]); if (result < 0) FAIL(network->getLastErrorStr(result), result); diff --git a/cdoc/CDocCipher.cpp b/cdoc/CDocCipher.cpp index ed9096d3..89924b2d 100644 --- a/cdoc/CDocCipher.cpp +++ b/cdoc/CDocCipher.cpp @@ -73,7 +73,7 @@ struct ToolPKCS11 : public libcdoc::PKCS11Backend { ToolPKCS11(const std::string& library, const CipherInfo& info) : PKCS11Backend(library), c_info(info) {} libcdoc::result_t connectToKey(int idx, bool priv) override final { - const libcdoc::RcptInfo *rcpt = c_info.getRcpt(idx); + const libcdoc::RcptInfo *rcpt = c_info.getRcpt(idx); if (!rcpt) return libcdoc::INTERNAL_ERROR; if (!priv) { return useSecretKey(long(rcpt->p11.slot), rcpt->secret, rcpt->p11.key_id, rcpt->p11.key_label); @@ -138,24 +138,54 @@ struct ToolCrypto : public libcdoc::CryptoBackend { auto key = make_unique_ptr(d2i_PrivateKey(EVP_PKEY_RSA, nullptr, &p, rcpt->secret.size())); if (!key) return libcdoc::CRYPTO_ERROR; + // Note: EVP_PKEY_* functions return 1 on success, 0 on a (possibly + // recoverable) failure such as RSA padding mismatch, and a negative + // value on fatal errors. Anything other than 1 must be treated as + // failure - returning 0 as success would leak partial/garbage + // plaintext and create a Bleichenbacher-style padding oracle for + // PKCS#1 v1.5 (CDoc1) decryption. + + if (!oaep) { + // If oaep is false, dst must be pre-allocated to the expected length. + // This is required to apply the implicit-rejection countermeasure on padding failure. + if (dst.empty()) { + LOG_ERROR("ToolCrypto::decryptRSA: dst must be pre-allocated for PKCS#1 v1.5 decryption"); + return libcdoc::CRYPTO_ERROR; + } + // Implicit-rejection-aware decrypt. Returns OK on padding success + // AND on padding failure (with synthetic output). Only fatal errors + // (e.g. ct size mismatch with modulus) are surfaced as CRYPTO_ERROR. + return libcdoc::Crypto::decryptRSAv15_implicitReject(dst, key.get(), data, dst.size()); + } + auto ctx = make_unique_ptr(EVP_PKEY_CTX_new(key.get(), nullptr)); if (!ctx) return libcdoc::CRYPTO_ERROR; - int result = EVP_PKEY_decrypt_init(ctx.get()); - if (result < 0) return libcdoc::CRYPTO_ERROR; + if (EVP_PKEY_decrypt_init(ctx.get()) != 1) + return libcdoc::CRYPTO_ERROR; + if (oaep) { - if ((EVP_PKEY_CTX_set_rsa_padding(ctx.get(), RSA_PKCS1_OAEP_PADDING) < 0) || - (EVP_PKEY_CTX_set_rsa_oaep_md(ctx.get(), EVP_sha256()) < 0) || - (EVP_PKEY_CTX_set_rsa_mgf1_md(ctx.get(), EVP_sha256()) < 0)) + if (EVP_PKEY_CTX_set_rsa_padding(ctx.get(), RSA_PKCS1_OAEP_PADDING) != 1 || + EVP_PKEY_CTX_set_rsa_oaep_md(ctx.get(), EVP_sha256()) != 1 || + EVP_PKEY_CTX_set_rsa_mgf1_md(ctx.get(), EVP_sha256()) != 1) { return libcdoc::CRYPTO_ERROR; + } } - size_t outlen; - result = EVP_PKEY_decrypt(ctx.get(), NULL, &outlen, data.data(), data.size()); - if (result < 0) return libcdoc::CRYPTO_ERROR; + // First call queries the maximum output size. + size_t outlen = 0; + if (EVP_PKEY_decrypt(ctx.get(), nullptr, &outlen, data.data(), data.size()) != 1) + return libcdoc::CRYPTO_ERROR; + dst.resize(outlen); - result = EVP_PKEY_decrypt(ctx.get(), dst.data(), &outlen, data.data(), data.size()); - if (result < 0) return libcdoc::CRYPTO_ERROR; + if (EVP_PKEY_decrypt(ctx.get(), dst.data(), &outlen, data.data(), data.size()) != 1) { + // Wipe any partial plaintext that may have been written before + // padding verification failed; it could otherwise be observed by + // callers and used to mount a padding-oracle attack. + libcdoc::cleanse(dst); + dst.clear(); + return libcdoc::CRYPTO_ERROR; + } dst.resize(outlen); return libcdoc::OK; @@ -359,7 +389,7 @@ fill_recipients_from_rcpt_info(ToolConf& conf, ToolCrypto& crypto, std::vector& locks = rdr->getLocks(); if (!recipient.label.empty()) { @@ -448,8 +477,8 @@ int CDocCipher::Decrypt(ToolConf& conf, RcptInfo& recipient) } } } else if (recipient.lock_idx >= 0) { - if (recipient.lock_idx >= locks.size()) { - LOG_ERROR("Lock index is out of range"); + if (recipient.lock_idx >= (int)locks.size()) { + LOG_ERROR("Label index is out of range"); return 1; } lock_idx = recipient.lock_idx; @@ -503,15 +532,40 @@ int CDocCipher::Decrypt(const unique_ptr& rdr, unsigned int lock_idx result = rdr->nextFile(name, size); while (result == libcdoc::OK) { LOG_DBG("Got file: {} {}", name, size); - filesystem::path fpath(name); - if (fpath.is_absolute()) { - LOG_WARN("File has absolute path, stripping"); - fpath = fpath.filename(); - } else if (fpath.has_parent_path()) { - LOG_WARN("File has parent path, stripping"); - fpath = fpath.filename(); - } - fpath = base_path / fpath; + + // Sanitise the attacker-controlled file name before composing the + // extraction path. See libcdoc::sanitiseExtractedFilename for the + // exact set of rejections (path separators, "..", drive letters, + // NUL bytes, reserved Windows device names, etc.). + std::string safeName = libcdoc::sanitiseExtractedFilename(name); + if (safeName.empty()) { + LOG_ERROR("Refusing unsafe entry name '{}'", name); + return 1; + } + filesystem::path fpath = base_path / filesystem::path(libcdoc::encodeName(safeName)); + + // Defence in depth: ensure the lexically-resolved target stays + // under base_path, even if a previously-extracted entry placed a + // symlink there. + std::error_code ec; + filesystem::path canonicalBase = filesystem::weakly_canonical(base_path, ec); + if (ec) { + LOG_ERROR("Cannot canonicalise base path {}: {}", + base_path.string(), ec.message()); + return 1; + } + filesystem::path canonicalTarget = filesystem::weakly_canonical(fpath, ec); + if (ec) { + LOG_ERROR("Cannot canonicalise target path {}: {}", + fpath.string(), ec.message()); + return 1; + } + if (canonicalTarget.parent_path() != canonicalBase) { + LOG_ERROR("Refusing entry '{}': target {} escapes base {}", + name, canonicalTarget.string(), canonicalBase.string()); + return 1; + } + std::ofstream ofs(fpath, std::ios_base::binary); if (ofs.bad()) { LOG_ERROR("Cannot open file {} for writing", fpath.string()); diff --git a/cdoc/CMakeLists.txt b/cdoc/CMakeLists.txt index 7012d242..44c87a68 100644 --- a/cdoc/CMakeLists.txt +++ b/cdoc/CMakeLists.txt @@ -82,6 +82,14 @@ target_include_directories(cdoc PUBLIC if(NOT BUILD_SHARED_LIBS) target_compile_definitions(cdoc PUBLIC cdoc_STATIC) endif() +option(LIBCDOC_ALLOW_INSECURE_TLS "Allow disabling TLS certificate verification (for testing only)" OFF) +if(LIBCDOC_ALLOW_INSECURE_TLS) + target_compile_definitions(cdoc PRIVATE LIBCDOC_ALLOW_INSECURE_TLS) +endif() +option(LIBCDOC_CRYPTO_TRACE "Enable logging of cryptographic key material (for debugging only)" OFF) +if(LIBCDOC_CRYPTO_TRACE) + target_compile_definitions(cdoc PRIVATE LIBCDOC_CRYPTO_TRACE) +endif() target_link_libraries(cdoc PRIVATE OpenSSL::SSL LibXml2::LibXml2 @@ -94,7 +102,7 @@ target_link_libraries(cdoc PRIVATE if(BUILD_TOOLS) add_executable(cdoc-tool cdoc-tool.cpp) target_include_directories(cdoc-tool PRIVATE ${OPENSSL_INCLUDE_DIR}) - target_link_libraries(cdoc-tool cdoc_ver cdoc) + target_link_libraries(cdoc-tool cdoc_ver cdoc OpenSSL::SSL) target_link_options(cdoc-tool PRIVATE $<$: /MANIFEST:NO /MANIFEST:EMBED /MANIFESTINPUT:${CMAKE_CURRENT_SOURCE_DIR}/cdoc-tool.manifest> ) diff --git a/cdoc/Crypto.cpp b/cdoc/Crypto.cpp index 47238ec2..daa72e70 100644 --- a/cdoc/Crypto.cpp +++ b/cdoc/Crypto.cpp @@ -19,6 +19,7 @@ #include "CDoc.h" #include "Crypto.h" #include "Utils.h" +#include "utils/ct.h" #define OPENSSL_SUPPRESS_DEPRECATED @@ -28,13 +29,26 @@ #include #include +#include #include +#include #include +#include #include #include +#if OPENSSL_VERSION_NUMBER >= 0x30200000L +#include +#include +#endif + #include +#include #include +#include +#include +#include +#include using namespace libcdoc; @@ -47,21 +61,29 @@ const std::string Crypto::AGREEMENT_MTH = "http://www.w3.org/2009/xmlenc11#ECDH- std::vector Crypto::AESWrap(const std::vector &key, const std::vector &data, bool encrypt) { - AES_KEY aes; - // fixme: Fix SSL_FAILED, current solution is idiotic - if (encrypt && !SSL_FAILED(AES_set_encrypt_key(key.data(), int(key.size()) * 8, &aes), "AES_set_encrypt_key") || - !encrypt && !SSL_FAILED(AES_set_decrypt_key(key.data(), int(key.size()) * 8, &aes), "AES_set_decrypt_key")) + // Note: AES_set_{encrypt,decrypt}_key return 0 on success and a negative + // value on failure - the opposite convention from OpenSSL's EVP_* APIs that + // SSL_FAILED is designed for. Check the return value directly. + AES_KEY aes; + const int key_bits = int(key.size()) * 8; + const int key_init_rv = encrypt + ? AES_set_encrypt_key(key.data(), key_bits, &aes) + : AES_set_decrypt_key(key.data(), key_bits, &aes); + if (key_init_rv != 0) { + LOG_SSL_ERROR(encrypt ? "AES_set_encrypt_key" : "AES_set_decrypt_key"); return {}; + } - std::vector result(data.size() + 8); - int size = encrypt ? - AES_wrap_key(&aes, nullptr, result.data(), data.data(), data.size()) : - AES_unwrap_key(&aes, nullptr, result.data(), data.data(), data.size()); - if(size > 0) - result.resize(size_t(size)); - else - result.clear(); - return result; + std::vector result(data.size() + 8); + const int size = encrypt + ? AES_wrap_key(&aes, nullptr, result.data(), data.data(), data.size()) + : AES_unwrap_key(&aes, nullptr, result.data(), data.data(), data.size()); + if (size <= 0) { + result.clear(); + return result; + } + result.resize(size_t(size)); + return result; } const EVP_CIPHER *Crypto::cipher(const std::string &algo) @@ -120,10 +142,10 @@ std::vector Crypto::concatKDF(const std::string &hashAlg, uint32_t keyD std::vector Crypto::concatKDF(const std::string &hashAlg, uint32_t keyDataLen, const std::vector &z, const std::vector &AlgorithmID, const std::vector &PartyUInfo, const std::vector &PartyVInfo) { - LOG_DBG("Ksr {}", toHex(z)); - LOG_DBG("AlgorithmID {}", toHex(AlgorithmID)); - LOG_DBG("PartyUInfo {}", toHex(PartyUInfo)); - LOG_DBG("PartyVInfo {}", toHex(PartyVInfo)); + LOG_TRACE_KEY("Ksr {}", z); + LOG_TRACE_KEY("AlgorithmID {}", AlgorithmID); + LOG_TRACE_KEY("PartyUInfo {}", PartyUInfo); + LOG_TRACE_KEY("PartyVInfo {}", PartyVInfo); std::vector otherInfo; otherInfo.insert(otherInfo.cend(), AlgorithmID.cbegin(), AlgorithmID.cend()); @@ -138,14 +160,15 @@ Crypto::encrypt(EVP_PKEY *pub, int padding, const std::vector &data) auto ctx = make_unique_ptr(EVP_PKEY_CTX_new(pub, nullptr)); size_t size = 0; if (SSL_FAILED(EVP_PKEY_encrypt_init(ctx.get()), "EVP_PKEY_encrypt_init") || - SSL_FAILED(EVP_PKEY_CTX_set_rsa_padding(ctx.get(), padding), "EVP_PKEY_CTX_set_rsa_padding") || - SSL_FAILED(EVP_PKEY_encrypt(ctx.get(), nullptr, &size, data.data(), data.size()), "EVP_PKEY_encrypt")) + SSL_FAILED(EVP_PKEY_CTX_set_rsa_padding(ctx.get(), padding), "EVP_PKEY_CTX_set_rsa_padding")) return {}; if(padding == RSA_PKCS1_OAEP_PADDING) { if (SSL_FAILED(EVP_PKEY_CTX_set_rsa_oaep_md(ctx.get(), EVP_sha256()), "EVP_PKEY_CTX_set_rsa_oaep_md") || SSL_FAILED(EVP_PKEY_CTX_set_rsa_mgf1_md(ctx.get(), EVP_sha256()), "EVP_PKEY_CTX_set_rsa_mgf1_md")) return {}; } + if (SSL_FAILED(EVP_PKEY_encrypt(ctx.get(), nullptr, &size, data.data(), data.size()), "EVP_PKEY_encrypt")) + return {}; std::vector result(int(size), 0); if(SSL_FAILED(EVP_PKEY_encrypt(ctx.get(), result.data(), &size, data.data(), data.size()), "EVP_PKEY_encrypt")) @@ -202,27 +225,30 @@ std::vector Crypto::deriveSharedSecret(EVP_PKEY *pkey, EVP_PKEY *peerPK return sharedSecret; sharedSecret.resize(sharedSecretLen); - if(EVP_PKEY_derive(ctx.get(), sharedSecret.data(), &sharedSecretLen) <= 0) + if(EVP_PKEY_derive(ctx.get(), sharedSecret.data(), &sharedSecretLen) <= 0) { sharedSecret.clear(); + return sharedSecret; + } + sharedSecret.resize(sharedSecretLen); return sharedSecret; } Crypto::Key Crypto::generateKey(const std::string &method) { const EVP_CIPHER *c = cipher(method); -#ifdef WIN32 - RAND_screen(); -#else - RAND_load_file("/dev/urandom", 1024); -#endif + if (!c) { + LOG_ERROR("generateKey: unsupported cipher method {}", method); + return {}; + } Key key(EVP_CIPHER_key_length(c), EVP_CIPHER_iv_length(c)); - uint8_t salt[PKCS5_SALT_LEN], indata[128]; - RAND_bytes(salt, sizeof(salt)); - RAND_bytes(indata, sizeof(indata)); - if (SSL_FAILED(EVP_BytesToKey(c, EVP_sha256(), salt, indata, sizeof(indata), 1, key.key.data(), key.iv.data()), "EVP_BytesToKey")) + if (RAND_status() != 1) { + LOG_ERROR("generateKey: OpenSSL PRNG not seeded"); return {}; - else - return key; + } + if (SSL_FAILED(RAND_bytes(key.key.data(), int(key.key.size())), "RAND_bytes") || + SSL_FAILED(RAND_bytes(key.iv.data(), int(key.iv.size())), "RAND_bytes")) + return {}; + return key; } uint32_t Crypto::keySize(const std::string &algo) @@ -440,6 +466,299 @@ void Crypto::LogSslError(const char* funcName, const char* file, int line) } } +namespace { + +// Per-scope consecutive-failure counter. Process-wide. The mutex protects a +// small map keyed by scope string; lock contention is negligible because +// throttle invocations only happen on the failed-decrypt path which is +// already an attacker-budget-limited code path. +std::mutex g_throttle_mutex; +std::unordered_map g_throttle_failures; + +constexpr std::chrono::milliseconds kThrottleBase{50}; +constexpr std::chrono::milliseconds kThrottleCap{5000}; + +} // anonymous namespace + +void Crypto::rsaOracleThrottleOnFailure(const std::string& scope) +{ + unsigned int failures = 0; + { + std::lock_guard lk(g_throttle_mutex); + failures = ++g_throttle_failures[scope]; + } + + // delay = base * 2^(failures-1), capped at kThrottleCap. Computed on a + // wider integer to avoid overflow for very large failure counts. + auto delay = kThrottleBase; + for (unsigned int i = 1; i < failures && delay < kThrottleCap; ++i) { + delay *= 2; + } + if (delay > kThrottleCap) delay = kThrottleCap; + + LOG_WARN("RSA decrypt failure (scope={}, consecutive={}); throttling for {} ms", + scope, failures, delay.count()); + std::this_thread::sleep_for(delay); +} + +void Crypto::rsaOracleThrottleOnSuccess(const std::string& scope) +{ + std::lock_guard lk(g_throttle_mutex); + g_throttle_failures.erase(scope); +} + +namespace { + +// Derive a per-(privkey, ciphertext) deterministic byte string used as the +// "synthetic plaintext" when PKCS#1 v1.5 unpadding fails. We follow the +// recipe in RFC 8017 section 7.2.2 and OpenSSL 3.2's implicit-rejection +// implementation: HMAC-SHA-256(privkey_seed, ciphertext) seeded into HKDF +// expand. The output is deterministic-of-(key, ct) so repeating the same +// query yields the same synthetic output (this is what defeats the +// distinguisher); but unpredictable to an attacker who does not know the +// private key. +std::vector syntheticPlaintext(EVP_PKEY *priv, + const std::vector &ct, + size_t out_len) +{ + if (!priv || out_len == 0) return std::vector(out_len, 0); + + // Use the private key's PKCS#8 DER as the HMAC key. It is private to the + // decryption process and stable across calls. + int der_len = i2d_PrivateKey(priv, nullptr); + if (der_len <= 0) + return std::vector(out_len, 0); + std::vector mac_key(size_t(der_len), 0); + { + unsigned char *p = mac_key.data(); + if (i2d_PrivateKey(priv, &p) != der_len) { + libcdoc::cleanse(mac_key); + return std::vector(out_len, 0); + } + } + + std::vector prk = Crypto::sign_hmac(mac_key, ct); + libcdoc::cleanse(mac_key); + if (prk.empty()) + return std::vector(out_len, 0); + + auto out = Crypto::expand(prk, "cdoc1-rsa-implicit-reject", int(out_len)); + libcdoc::cleanse(prk); + if (out.size() != out_len) { + libcdoc::cleanse(out); + return std::vector(out_len, 0); + } + return out; +} + +// Constant-time PKCS#1 v1.5 unpadding. Walks the entire EM block in a +// data-independent fashion regardless of where (or whether) the 0x00 +// separator is found, the value of the leading bytes, or the eventual +// message length. Produces a single byte mask `good` (0xFF on valid +// padding, 0x00 otherwise) and a copy of either the recovered message +// or the synthetic plaintext into `dst`. dst is always exactly +// expected_len bytes long. +void unpadPKCS1v15CT(const std::vector &em, + const std::vector &synth, + size_t expected_len, + std::vector &dst) +{ + using namespace libcdoc::ct; + + dst.assign(expected_len, 0); + + // Need at least 0x00 || 0x02 || PS(>=8) || 0x00 || M + // -> EM length must be >= 11 + expected_len. + if (em.size() < 11u + expected_len) { + // Caller-side guarantees this in normal use because the modulus is + // always larger than expected_len. Still, fall back to synthetic + // output rather than reading out-of-bounds. + for (size_t i = 0; i < expected_len; ++i) + dst[i] = synth[i]; + return; + } + + // Initial header check. + uint8_t good = 0xFF; + good &= eq8(em[0], 0x00); + good &= eq8(em[1], 0x02); + + // Find the index of the first 0x00 byte at index >= 2. + // We must walk every byte of EM regardless of where the byte happens + // to be, otherwise a timing channel leaks the position of the first + // 0x00 (the classic "Manger / Bardou" oracle). + size_t first_zero_idx = 0; + uint8_t found_zero = 0x00; + for (size_t i = 2; i < em.size(); ++i) { + uint8_t is_zero = eq8(em[i], 0x00); + // latch the first index at which is_zero is set + uint8_t latch = uint8_t(is_zero & ~found_zero); + // "if latch then first_zero_idx = i". We can't branch; do it + // arithmetically. (i fits comfortably in size_t.) + const size_t mask_size = (latch == 0xFF) ? ~size_t(0) : size_t(0); + first_zero_idx = (i & mask_size) | (first_zero_idx & ~mask_size); + found_zero = uint8_t(found_zero | is_zero); + } + good &= found_zero; + + // PS must be at least 8 bytes -> first 0x00 index >= 10. + good &= ge_size(first_zero_idx, 10); + + // Message starts after the separator and runs to the end of EM. + // (We don't need ge here because if found_zero is 0xFF then + // first_zero_idx <= em.size()-1.) + size_t msg_off = first_zero_idx + 1; + size_t msg_len = (msg_off <= em.size()) ? (em.size() - msg_off) : 0; + + // Check that the message length matches what the caller expects. + good &= eq32(uint32_t(msg_len), uint32_t(expected_len)); + + // Constant-time copy: walk every possible message offset, and for + // each output position i select em[msg_off + i] if it is in range, + // otherwise 0. We then conditionally mux it against the synthetic + // plaintext using `good`. + // + // Important: the inner read em[src_idx] must not depend on `good` in + // a way that the compiler could turn into a conditional load. We + // therefore always perform the read and clamp src_idx to a valid + // range (em.size() - 1). When good==0 we discard the value. + for (size_t i = 0; i < expected_len; ++i) { + size_t src_idx = msg_off + i; + // Clamp: if src_idx >= em.size() use em[em.size()-1] (always in + // range since em.size() >= 11+expected_len > 0). The clamped value + // is replaced by synth[i] below when good == 0, so the actual + // bytes read here never reach the caller. + size_t in_range = size_t(ge_size(em.size() - 1, src_idx)); // 0 or 0xFF + size_t mask = in_range & ~size_t(0); + size_t safe_idx = (src_idx & mask) | ((em.size() - 1) & ~mask); + uint8_t real = em[safe_idx]; + uint8_t synthetic = synth[i]; + dst[i] = uint8_t((real & good) | (synthetic & uint8_t(~good))); + } +} + +} // anonymous namespace + +int Crypto::rsaImplicitRejectFromEM(std::vector& dst, + const std::vector& em, + const std::vector& /*ct*/, + const std::vector& synth_seed, + size_t expected_len) +{ + // The caller passes a key-derived synthetic seed already sized to + // `expected_len`. We don't recompute it here so that PKCS#11 / CNG + // callers who only have access to a public key (the private key never + // leaves the token) can still produce a stable synthetic output by + // seeding from any private-key-derived material they have - typically + // the certificate fingerprint plus the ciphertext. + if (synth_seed.size() != expected_len) + return CRYPTO_ERROR; + + unpadPKCS1v15CT(em, synth_seed, expected_len, dst); + return OK; +} + +int Crypto::decryptRSAv15_implicitReject(std::vector& dst, + EVP_PKEY *priv, + const std::vector& ct, + size_t expected_len) +{ + if (!priv || expected_len == 0) + return CRYPTO_ERROR; + + auto ctx = make_unique_ptr(EVP_PKEY_CTX_new(priv, nullptr)); + if (!ctx) { + LOG_SSL_ERROR("EVP_PKEY_CTX_new"); + return CRYPTO_ERROR; + } + +#if OPENSSL_VERSION_NUMBER >= 0x30200000L + // Native fast path: OpenSSL 3.2+ implements the implicit-rejection + // countermeasure internally, with platform-specific constant-time + // primitives, when this control is set AFTER EVP_PKEY_decrypt_init. + // + // Behaviour with implicit rejection enabled: a successful PKCS#1 v1.5 + // unpad returns the original plaintext (length = M length). A failed + // unpad returns a deterministic synthetic message of length + // (modulus_bytes - 11) - the maximum unpad length for the modulus. + // Either way EVP_PKEY_decrypt returns 1. + // + // We allocate a buffer of (modulus_bytes - 11) so both cases fit, then + // accept the result iff outlen == expected_len. A wrong-length + // unpadding (real bug or wrong-key-with-coincidentally-good-padding) + // is treated as a padding failure: fall through to the software path + // which produces a synthetic plaintext of expected_len bytes. + if (EVP_PKEY_decrypt_init(ctx.get()) == 1 && + EVP_PKEY_CTX_set_rsa_padding(ctx.get(), RSA_PKCS1_PADDING) == 1) { + unsigned int impl_reject = 1; + OSSL_PARAM params[] = { + OSSL_PARAM_construct_uint(OSSL_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION, + &impl_reject), + OSSL_PARAM_END + }; + if (EVP_PKEY_CTX_set_params(ctx.get(), params) == 1) { + const size_t mod_size = size_t(EVP_PKEY_get_size(priv)); + if (mod_size > 11 + expected_len && ct.size() == mod_size) { + // Allocate enough room for the worst-case (synthetic) + // output, then ask the API how many bytes it wrote. + std::vector tmp(mod_size, 0); + size_t outlen = tmp.size(); + int rv = EVP_PKEY_decrypt(ctx.get(), tmp.data(), &outlen, + ct.data(), ct.size()); + if (rv == 1 && outlen == expected_len) { + dst.assign(tmp.begin(), tmp.begin() + outlen); + libcdoc::cleanse(tmp); + return OK; + } + libcdoc::cleanse(tmp); + // Length didn't match - fall through to software path so we + // produce a synthetic plaintext of the correct length. + } + } + } + // Reset the context for the fall-through software path. + ctx = make_unique_ptr(EVP_PKEY_CTX_new(priv, nullptr)); + if (!ctx) { + LOG_SSL_ERROR("EVP_PKEY_CTX_new"); + return CRYPTO_ERROR; + } +#endif + + // Software path: raw RSA decrypt + constant-time unpadding. + if (EVP_PKEY_decrypt_init(ctx.get()) != 1 || + EVP_PKEY_CTX_set_rsa_padding(ctx.get(), RSA_NO_PADDING) != 1) { + LOG_SSL_ERROR("EVP_PKEY_decrypt_init/RSA_NO_PADDING"); + return CRYPTO_ERROR; + } + + const size_t mod_size = size_t(EVP_PKEY_get_size(priv)); + if (mod_size == 0 || ct.size() != mod_size) { + // Genuine input error - return CRYPTO_ERROR rather than synthetic + // bytes. The shape of this failure is independent of any padding + // bits, so it does not feed an oracle. + return CRYPTO_ERROR; + } + + std::vector em(mod_size, 0); + size_t em_len = mod_size; + int rv = EVP_PKEY_decrypt(ctx.get(), em.data(), &em_len, + ct.data(), ct.size()); + if (rv != 1 || em_len != mod_size) { + // Raw RSA can fail if ct >= modulus. Produce a synthetic plaintext + // anyway so the timing/return shape matches a "bad padding" path + // and does not leak the cause. + libcdoc::cleanse(em); + dst = syntheticPlaintext(priv, ct, expected_len); + return OK; + } + + std::vector synth = syntheticPlaintext(priv, ct, expected_len); + unpadPKCS1v15CT(em, synth, expected_len, dst); + libcdoc::cleanse(em); + libcdoc::cleanse(synth); + return OK; +} + EncryptionConsumer::EncryptionConsumer(DataConsumer &dst, const std::string &method, const Crypto::Key &key) : EncryptionConsumer(dst, Crypto::cipher(method), key) {} diff --git a/cdoc/Crypto.h b/cdoc/Crypto.h index 0c2bcaa4..03f93789 100644 --- a/cdoc/Crypto.h +++ b/cdoc/Crypto.h @@ -58,8 +58,8 @@ class Crypto Key() {} ~Key() { - std::fill(key.begin(), key.end(), 0); - std::fill(iv.begin(), iv.end(), 0); + libcdoc::cleanse(key); + libcdoc::cleanse(iv); } Key(std::vector _key, std::vector _iv) : key(std::move(_key)), iv(std::move(_iv)) {} Key(size_t keySize, size_t ivSize) : key(keySize), iv(ivSize) {} @@ -98,6 +98,96 @@ class Crypto static std::vector random(uint32_t len = 32); static int xor_data(std::vector& dst, const std::vector &lhs, const std::vector &rhs); + /** + * @brief Decrypt an RSA PKCS#1 v1.5 ciphertext with implicit rejection. + * + * Implements the decryption procedure of RFC 8017 section 7.2.2 with the + * "implicit rejection" countermeasure described by Bleichenbacher / + * RFC 8017 Appendix B / OpenSSL's @c EVP_PKEY_CTX_set_rsa_implicit_rejection. + * On successful unpadding produces the recovered plaintext; on padding + * failure produces a deterministic synthetic plaintext derived from the + * private key and the ciphertext, of the requested @p expected_len bytes, + * indistinguishable from a real decryption to an attacker who does not + * already know the private key. + * + * The function ALWAYS returns @c OK and ALWAYS produces exactly + * @p expected_len output bytes for any well-formed ciphertext (size equal + * to the modulus length). The caller MUST treat the output as a candidate + * key whose validity can only be confirmed by a downstream authenticated + * step (AES-GCM tag, HMAC, ...). It MUST NOT branch on the output's + * structure or surface a different error for "decryption failed" vs + * "key was wrong" - doing so reintroduces the very oracle this function + * exists to remove. + * + * On OpenSSL >= 3.2 this delegates to OpenSSL's native implementation; on + * older OpenSSL versions it implements the same algorithm in software. + * + * @param dst destination buffer (resized to @p expected_len) + * @param priv RSA private key + * @param ct ciphertext (must equal modulus length) + * @param expected_len length of plaintext the caller expects to receive + * @return OK on success, CRYPTO_ERROR only for unrecoverable, non-padding + * failures (e.g. ct size mismatch with modulus) + */ + static int decryptRSAv15_implicitReject(std::vector& dst, + EVP_PKEY *priv, + const std::vector& ct, + size_t expected_len); + + /** + * @brief Apply a delay proportional to the number of consecutive + * decrypt failures recorded for a given (process, key) pair. + * + * Bleichenbacher / cross-protocol attacks against RSA-PKCS#1 v1.5 require + * a large number of adaptive queries against the same victim + * ciphertext-key pair. This helper introduces an exponentially-growing + * sleep on consecutive decrypt failures, which dramatically increases + * the wall-clock cost of a remote oracle attack while remaining + * essentially invisible during normal use (one or two failures only). + * + * The throttle is per-process and is designed to be advisory: long- + * running services that decrypt many containers should additionally + * implement per-recipient rate limits in their host application. + * + * @param scope an arbitrary string that scopes the failure counter; use + * the recipient identifier or "default" if you don't have + * one. Different scopes have independent counters. + */ + static void rsaOracleThrottleOnFailure(const std::string& scope); + + /** + * @brief Reset the consecutive-failure counter for the given scope. + * + * Should be called after any successful authenticated decrypt to + * prevent the throttle from punishing legitimate retries. + */ + static void rsaOracleThrottleOnSuccess(const std::string& scope); + + /** + * @brief Constant-time PKCS#1 v1.5 unpadding from a pre-decrypted EM block. + * + * Same semantics as @ref decryptRSAv15_implicitReject, but skips the raw + * RSA decryption step. Intended for backends (PKCS#11, CNG) that obtain + * the EM block via raw RSA (CKM_RSA_X_509 / BCRYPT_PAD_NONE) and need to + * apply the constant-time unpadding in user space. + * + * @param dst destination buffer (resized to @p expected_len) + * @param em EM block as returned by raw RSA decryption + * @param ct original ciphertext (used as input to the synthetic + * plaintext derivation - MUST be the *same* bytes the + * caller decrypted, so that retries on the same + * ciphertext yield the same synthetic output) + * @param synth_seed private-key-derived seed used to make synthetic + * output unpredictable to attackers + * @param expected_len length of plaintext the caller expects to receive + * @return OK on success + */ + static int rsaImplicitRejectFromEM(std::vector& dst, + const std::vector& em, + const std::vector& ct, + const std::vector& synth_seed, + size_t expected_len); + static bool isError(int retval, const char* funcName, const char* file, int line) { if (retval < 1) { @@ -113,6 +203,7 @@ class Crypto struct EncryptionConsumer final : public DataConsumer { EncryptionConsumer(DataConsumer &dst, const std::string &method, const Crypto::Key &key); EncryptionConsumer(DataConsumer &dst, const EVP_CIPHER *cipher, const Crypto::Key &key); + ~EncryptionConsumer() { libcdoc::cleanse(buf); } CDOC_DISABLE_MOVE_COPY(EncryptionConsumer) result_t write(const uint8_t *src, size_t size) noexcept final; result_t writeAAD(const std::vector &data) noexcept; @@ -129,6 +220,7 @@ struct EncryptionConsumer final : public DataConsumer { struct DecryptionSource final : public DataSource { DecryptionSource(DataSource &src, const std::string &method, const std::vector &key, size_t ivLen = 0); DecryptionSource(DataSource &src, const EVP_CIPHER *cipher, const std::vector &key, size_t ivLen = 0); + ~DecryptionSource() { libcdoc::cleanse(tag); } CDOC_DISABLE_MOVE_COPY(DecryptionSource) result_t read(unsigned char* dst, size_t size) noexcept final; diff --git a/cdoc/CryptoBackend.cpp b/cdoc/CryptoBackend.cpp index 56bc5ccd..17694cfe 100644 --- a/cdoc/CryptoBackend.cpp +++ b/cdoc/CryptoBackend.cpp @@ -47,9 +47,18 @@ CryptoBackend::getLastErrorStr(result_t code) const libcdoc::result_t CryptoBackend::random(std::vector& dst, unsigned int size) { - dst.resize(size); - int result = RAND_bytes(dst.data(), size); - return (result < 0) ? OPENSSL_ERROR : OK; + // RAND_bytes returns 1 on success, 0 if the PRNG could not gather enough + // entropy, and -1 if the requested method is not supported. Any value + // other than 1 means the buffer must NOT be used as random material. + dst.resize(size); + const int rv = RAND_bytes(dst.data(), size); + if (rv != 1) { + LOG_SSL_ERROR("RAND_bytes"); + libcdoc::cleanse(dst); + dst.clear(); + return OPENSSL_ERROR; + } + return OK; } libcdoc::result_t @@ -83,21 +92,21 @@ CryptoBackend::getKeyMaterial(std::vector& key_material, const std::vec int result = getSecret(secret, idx); if (result) return result; - LOG_DBG("Secret: {}", toHex(secret)); + LOG_TRACE_KEY("Secret: {}", secret); key_material = libcdoc::Crypto::pbkdf2_sha256(secret, pw_salt, kdf_iter); - std::fill(secret.begin(), secret.end(), 0); + libcdoc::cleanse(secret); if (key_material.empty()) return OPENSSL_ERROR; } else { int result = getSecret(key_material, idx); if (result) return result; - LOG_DBG("Secret: {}", toHex(key_material)); + LOG_TRACE_KEY("Secret: {}", key_material); if (key_material.size() != 32) { return INVALID_PARAMS; } } - LOG_DBG("Key material: {}", toHex(key_material)); + LOG_TRACE_KEY("Key material: {}", key_material); return OK; } @@ -106,14 +115,18 @@ libcdoc::result_t CryptoBackend::extractHKDF(std::vector& kek_pm, const std::vector& salt, const std::vector& pw_salt, int32_t kdf_iter, unsigned int idx) { - if (salt.empty()) return INVALID_PARAMS; - if ((kdf_iter > 0) && pw_salt.empty()) return INVALID_PARAMS; - std::vector key_material; + if (salt.empty()) return INVALID_PARAMS; + if ((kdf_iter > 0) && pw_salt.empty()) return INVALID_PARAMS; + std::vector key_material; int result = getKeyMaterial(key_material, pw_salt, kdf_iter, idx); - if (result) return result; - kek_pm = libcdoc::Crypto::extract(key_material, salt); - std::fill(key_material.begin(), key_material.end(), 0); - if (kek_pm.empty()) return OPENSSL_ERROR; + if (result) return result; + kek_pm = libcdoc::Crypto::extract(key_material, salt); + libcdoc::cleanse(key_material); + if (kek_pm.empty()) return OPENSSL_ERROR; + if (kek_pm.size() != 32) { + LOG_ERROR("KEK has incorrect size: {} (expected {})", kek_pm.size(), 32); + return INVALID_PARAMS; + } LOG_TRACE_KEY("Extract: {}", kek_pm); diff --git a/cdoc/CryptoBackend.h b/cdoc/CryptoBackend.h index 04253a2c..d3ce3c62 100644 --- a/cdoc/CryptoBackend.h +++ b/cdoc/CryptoBackend.h @@ -80,14 +80,22 @@ struct CDOC_EXPORT CryptoBackend { */ virtual result_t deriveECDH1(std::vector& dst, const std::vector &public_key, unsigned int idx) { return NOT_IMPLEMENTED; } /** - * @brief decryptRSA - * @param dst the destination container for decrypted data - * @param data encrypted data + * @brief decrypt RSA ciphertext + * + * If @c oaep == false the implementations MUST apply the implicit-rejection countermeasure (RFC 8017 section 7.2.2 / OpenSSL 3.2's + * @c EVP_PKEY_CTX_set_rsa_implicit_rejection): on padding failure they MUST return @c OK with @c dst filled with deterministic synthetic + * plaintext derived from the private key, and otherwise the recovered plaintext, both indistinguishable to an attacker who does not know the + * private key. The downstream AES decrypt acts as the authentication step that distinguishes a real key from a + * synthetic one. The @c dst has to be pre-allocated to the expected plaintext length by caller. + * + * @param dst the destination container for decrypted data (has to be pre-allocated if @c oaep == false) + * @param data RSA ciphertext (wrapped FMK) * @param oaep use OAEP padding * @param idx lock index (0-based) in container * @return error code or OK */ virtual result_t decryptRSA(std::vector& dst, const std::vector& data, bool oaep, unsigned int idx) { return NOT_IMPLEMENTED; }; + /** * @brief Derive key by ConcatKDF algorithm * diff --git a/cdoc/Io.cpp b/cdoc/Io.cpp index 1d845b1d..5106212c 100644 --- a/cdoc/Io.cpp +++ b/cdoc/Io.cpp @@ -102,17 +102,48 @@ OStreamConsumer::OStreamConsumer(const std::string& path) } result_t FileListConsumer::open(const std::string &name, int64_t size) { - std::string_view fileName = name; if (ofs.is_open()) { ofs.close(); } - size_t lastSlashPos = fileName.find_last_of("\\/"); - if (lastSlashPos != std::string::npos) { - fileName = fileName.substr(lastSlashPos + 1); + + // The file name comes from inside the (encrypted) container and is + // therefore fully attacker-controlled. Run it through the central + // sanitiser; reject the entry rather than write to a tampered path. + std::string safeName = libcdoc::sanitiseExtractedFilename(name); + if (safeName.empty()) { + LOG_ERROR("FileListConsumer::open: refusing unsafe entry name '{}'", name); + return DATA_FORMAT_ERROR; + } + + fs::path target = base / fs::path(encodeName(safeName)); + + // Defence in depth: even after sanitising the leaf name, an attacker + // who can plant a symlink at `base` (e.g. by extracting an earlier + // entry that the host application created earlier) could redirect + // writes outside `base`. weakly_canonical resolves any symlinks that + // already exist in the path; we then verify the parent directory of + // the target equals the canonical base. + std::error_code ec; + fs::path canonicalBase = fs::weakly_canonical(base, ec); + if (ec) { + LOG_ERROR("FileListConsumer::open: cannot canonicalise base {}: {}", + base.string(), ec.message()); + return OUTPUT_STREAM_ERROR; + } + fs::path canonicalTarget = fs::weakly_canonical(target, ec); + if (ec) { + LOG_ERROR("FileListConsumer::open: cannot canonicalise target {}: {}", + target.string(), ec.message()); + return OUTPUT_STREAM_ERROR; } - fs::path path(base); - path /= encodeName(fileName); - ofs.open(path, std::ios_base::binary); + if (canonicalTarget.parent_path() != canonicalBase) { + LOG_ERROR("FileListConsumer::open: refusing entry '{}' - target {} " + "escapes base {}", + name, canonicalTarget.string(), canonicalBase.string()); + return DATA_FORMAT_ERROR; + } + + ofs.open(target, std::ios_base::binary); return ofs.bad() ? OUTPUT_STREAM_ERROR : OK; } @@ -166,7 +197,7 @@ FileListSource::next(std::string& name, int64_t& size) name = _files[_current]; std::error_code ec; size = fs::file_size(path, ec); - if (!ec) return IO_ERROR; + if (ec) return IO_ERROR; return OK; } diff --git a/cdoc/KeyShares.cpp b/cdoc/KeyShares.cpp index 06d48875..7e77aa93 100644 --- a/cdoc/KeyShares.cpp +++ b/cdoc/KeyShares.cpp @@ -204,7 +204,7 @@ Signer::generateTickets(std::vector& dst, std::vector& s result_t SIDSigner::signDigest(std::vector& dst, const std::vector& digest) { - LOG_DBG("SID signing: {}", toHex(digest)); + LOG_TRACE_KEY("SID signing: {}", digest); result_t result = network->signSID(dst, cert, url, rp_uuid, rp_name, rcpt_id, digest, libcdoc::CryptoBackend::SHA_256); if (result != OK) { @@ -222,7 +222,7 @@ result_t libcdoc::MIDSigner::signDigest(std::vector& dst, const std::vector& digest) { - LOG_DBG("MID signing: {}", toHex(digest)); + LOG_TRACE_KEY("MID signing: {}", digest); result_t result = network->signMID(dst, cert, url, rp_uuid, rp_name, phone, rcpt_id, digest, libcdoc::CryptoBackend::SHA_256); if (result != OK) { diff --git a/cdoc/Lock.cpp b/cdoc/Lock.cpp index 37322529..983e7c3c 100644 --- a/cdoc/Lock.cpp +++ b/cdoc/Lock.cpp @@ -102,8 +102,13 @@ Lock::parseLabel(const std::string& label) std::string key = urlDecode(range_to_sv(*it)); std::ranges::transform(key, key.begin(), [](unsigned char c){ return std::tolower(c); }); ++it; - std::string value = urlDecode(range_to_sv(*it)); - parsed_label[std::move(key)] = std::move(value); + // Ubuntu 22 ranges behave wrongly + if (it == label_data_parts.end()) { + parsed_label[std::move(key)] = {}; + } else { + std::string value = urlDecode(range_to_sv(*it)); + parsed_label[std::move(key)] = std::move(value); + } } return parsed_label; diff --git a/cdoc/NetworkBackend.cpp b/cdoc/NetworkBackend.cpp index d306a198..f3569a25 100644 --- a/cdoc/NetworkBackend.cpp +++ b/cdoc/NetworkBackend.cpp @@ -40,6 +40,8 @@ #include #endif +#define CDOC_SSL_TIMEOUT 30 + using namespace std::literals::chrono_literals; using EC_KEY_sign = int (*)(int type, const unsigned char *dgst, int dlen, unsigned char *sig, unsigned int *siglen, const BIGNUM *kinv, const BIGNUM *r, EC_KEY *eckey); @@ -136,6 +138,39 @@ getMIDSIDDescription(libcdoc::result_t code) } return {}; } + +// Map a CryptoBackend::HashAlgorithm to the algorithm name string the +// SK Smart-ID / Mobile-ID JSON API expects ("SHA224", "SHA256", +// "SHA384", "SHA512"). Returns an empty string_view when the algorithm +// is not in the supported set; callers MUST treat that as a hard error +// rather than indexing an array - foreign-language bindings (SWIG / Java +// / C#) and any future addition to the HashAlgorithm enum can otherwise +// drive the previous `algo_names[(int)algo]` lookup out of bounds. +// +// The function is constexpr so that the static_assert block below can +// verify at compile time that every documented enumerator maps to a +// non-empty string. Any new HashAlgorithm value added to CryptoBackend.h +// will trigger -Wswitch (no default branch covers it) and the +// static_asserts will catch it explicitly. +static constexpr std::string_view +hashAlgorithmToSidMidName(libcdoc::CryptoBackend::HashAlgorithm algo) noexcept +{ + switch (algo) { + case libcdoc::CryptoBackend::HashAlgorithm::SHA_224: return "SHA224"; + case libcdoc::CryptoBackend::HashAlgorithm::SHA_256: return "SHA256"; + case libcdoc::CryptoBackend::HashAlgorithm::SHA_384: return "SHA384"; + case libcdoc::CryptoBackend::HashAlgorithm::SHA_512: return "SHA512"; + } + return {}; +} + +static_assert(hashAlgorithmToSidMidName(libcdoc::CryptoBackend::HashAlgorithm::SHA_224) == "SHA224"); +static_assert(hashAlgorithmToSidMidName(libcdoc::CryptoBackend::HashAlgorithm::SHA_256) == "SHA256"); +static_assert(hashAlgorithmToSidMidName(libcdoc::CryptoBackend::HashAlgorithm::SHA_384) == "SHA384"); +static_assert(hashAlgorithmToSidMidName(libcdoc::CryptoBackend::HashAlgorithm::SHA_512) == "SHA512"); +// Out-of-range value (e.g. coming from a SWIG-generated foreign caller) +// must produce an empty result rather than reading past the array. +static_assert(hashAlgorithmToSidMidName(static_cast(99)).empty()); #endif thread_local std::string error; @@ -184,10 +219,16 @@ setPeerCertificates(httplib::SSLClient& cli, libcdoc::NetworkBackend *network, c } cli.enable_server_certificate_verification(true); cli.enable_server_hostname_verification(true); - } else { - // TODO: Allow only if global parameter is set + } + else { +#ifdef LIBCDOC_ALLOW_INSECURE_TLS + LOG_WARN("TLS certificate verification disabled (LIBCDOC_ALLOW_INSECURE_TLS)"); cli.enable_server_certificate_verification(false); cli.enable_server_hostname_verification(false); +#else + error = "No peer TLS certificates configured"; + return libcdoc::CONFIGURATION_ERROR; +#endif } return libcdoc::OK; } @@ -214,6 +255,18 @@ setProxy(httplib::SSLClient& cli, libcdoc::NetworkBackend *network) } } +// +// Set SSL timeouts +// +static libcdoc::result_t +applySSLTimeout(httplib::SSLClient& cli, libcdoc::NetworkBackend *network) +{ + cli.set_connection_timeout(CDOC_SSL_TIMEOUT); + cli.set_read_timeout(CDOC_SSL_TIMEOUT); + cli.set_write_timeout(CDOC_SSL_TIMEOUT); + return libcdoc::OK; +} + // // Post request and fetch response // @@ -278,6 +331,7 @@ libcdoc::NetworkBackend::sendKey (CapsuleInfo& dst, const std::string& url, cons if (result != libcdoc::OK) return result; httplib::SSLClient cli(host, port); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; if (result = setProxy(cli, this); result != OK) return result; @@ -298,9 +352,13 @@ libcdoc::NetworkBackend::sendKey (CapsuleInfo& dst, const std::string& url, cons error = FORMAT("No Location header in response"); return NETWORK_ERROR; } + constexpr std::string_view prefix = "/key-capsules/"; + if (location.compare(0, prefix.size(), prefix) != 0) { + error = FORMAT("Unexpected Location header value"); + return NETWORK_ERROR; + } error = {}; - /* Remove /key-capsules/ */ - location.erase(0, 14); + location.erase(0, prefix.size()); dst.transaction_id = std::move(location); std::string expiry_str = rsp.get_header_value("x-expiry-time"); @@ -336,6 +394,7 @@ libcdoc::NetworkBackend::sendShare(std::vector& dst, const std::string& if (result != libcdoc::OK) return result; httplib::SSLClient cli(host, port); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; if (result = setProxy(cli, this); result != OK) return result; @@ -351,10 +410,14 @@ libcdoc::NetworkBackend::sendShare(std::vector& dst, const std::string& error = FORMAT("No Location header in response"); return NETWORK_ERROR; } + constexpr std::string_view prefix = "/key-shares/"; + if (location.compare(0, prefix.size(), prefix) != 0) { + error = FORMAT("Unexpected Location header value"); + return NETWORK_ERROR; + } error = {}; - /* Remove /key-shares/ */ - dst.assign(location.cbegin() + 12, location.cend()); + dst.assign(location.cbegin() + prefix.size(), location.cend()); LOG_DBG("Share: {}", std::string((const char *) dst.data(), dst.size())); return OK; @@ -376,6 +439,7 @@ libcdoc::NetworkBackend::fetchKey (std::vector& dst, const std::string& if (!cert.empty() && (!d->x509 || !d->pkey)) return CRYPTO_ERROR; httplib::SSLClient cli(host, port, d->x509.handle(), d->pkey); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; if (result = setProxy(cli, this); result != OK) return result; @@ -411,6 +475,7 @@ libcdoc::NetworkBackend::fetchNonce(std::vector& dst, const std::string LOG_DBG("Starting client: {} {}", host, port); httplib::SSLClient cli(host, port); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; if (result = setProxy(cli, this); result != OK) return result; @@ -423,7 +488,12 @@ libcdoc::NetworkBackend::fetchNonce(std::vector& dst, const std::string LOG_DBG("Response: {}", rsp.body); picojson::value rsp_json; - picojson::parse(rsp_json, rsp.body); + std::string parse_err = picojson::parse(rsp_json, rsp.body); + if (!parse_err.empty()) { + error = FORMAT("JSON parse error: {}", parse_err); + LOG_ERROR("{}", error); + return NETWORK_ERROR; + } picojson::value v = rsp_json.get("nonce"); if (!v.is()) { error = FORMAT("No 'nonce' in response"); @@ -446,6 +516,7 @@ libcdoc::NetworkBackend::fetchShare(ShareInfo& share, const std::string& url, co LOG_DBG("Starting client: {} {}", host, port); httplib::SSLClient cli(host, port); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; @@ -456,9 +527,6 @@ libcdoc::NetworkBackend::fetchShare(ShareInfo& share, const std::string& url, co httplib::Headers hdrs; hdrs.insert({"x-cdoc2-auth-ticket", ticket}); hdrs.insert({"x-cdoc2-auth-x5c", std::string("-----BEGIN CERTIFICATE-----") + toBase64(cert) + "-----END CERTIFICATE-----"}); - for (auto i = hdrs.cbegin(); i != hdrs.cend(); i++) { - std::cerr << i->first << ": " << i->second << std::endl; - } picojson::value rsp_json; result = get(cli, hdrs, full, rsp_json); if (result != libcdoc::OK) return result; @@ -477,7 +545,10 @@ libcdoc::NetworkBackend::fetchShare(ShareInfo& share, const std::string& url, co } std::string recipient = v.get(); std::vector shareval = fromBase64(share64); - shareval.resize(32); + if (shareval.size() != 32) { + error = FORMAT("Invalid share size: expected 32, got {}", shareval.size()); + return NETWORK_ERROR; + } LOG_DBG("Share: {}", toHex(shareval)); share = {std::move(shareval), std::move(recipient)}; return OK; @@ -699,6 +770,7 @@ libcdoc::NetworkBackend::signSID(std::vector& dst, std::vector LOG_DBG("Starting client: {} {}", host, port); httplib::SSLClient cli(host, port); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; if (result = setProxy(cli, this); result != OK) return result; @@ -716,7 +788,12 @@ libcdoc::NetworkBackend::signSID(std::vector& dst, std::vector LOG_DBG("Response: {}", rsp.body); picojson::value v; - picojson::parse(v, rsp.body); + std::string parse_err = picojson::parse(v, rsp.body); + if (!parse_err.empty()) { + error = FORMAT("JSON parse error: {}", parse_err); + LOG_ERROR("{}", error); + return NetworkBackend::NETWORK_ERROR; + } if (!v.is()) { error = "Invalid SmartID response"; LOG_WARN("Invalid SmartID response"); @@ -739,8 +816,19 @@ libcdoc::NetworkBackend::signSID(std::vector& dst, std::vector // // Sign // - std::string algo_names[] = {"SHA224", "SHA256", "SHA384", "SHA512"}; - std::string algo_name = algo_names[(int) algo]; + std::string_view algo_name = hashAlgorithmToSidMidName(algo); + if (algo_name.empty()) { + error = "Unsupported hash algorithm for Smart-ID"; + LOG_ERROR("Unsupported hash algorithm for Smart-ID: {}", + static_cast(algo)); + return libcdoc::WRONG_ARGUMENTS; + } + + if (digest.empty()) { + error = "Empty digest"; + LOG_ERROR("Empty digest passed to signSID"); + return libcdoc::WRONG_ARGUMENTS; + } // Generate code uint8_t b[32]; @@ -760,7 +848,7 @@ libcdoc::NetworkBackend::signSID(std::vector& dst, std::vector {"relyingPartyUUID", picojson::value(rp_uuid)}, {"relyingPartyName", picojson::value(rp_name)}, {"hash", picojson::value(toBase64(digest))}, - {"hashType", picojson::value(algo_name)}, + {"hashType", picojson::value(std::string(algo_name))}, {"allowedInteractionsOrder", picojson::value(aio) } @@ -775,7 +863,12 @@ libcdoc::NetworkBackend::signSID(std::vector& dst, std::vector result = post(cli, full, hdrs, query.serialize(), rsp); if (result != libcdoc::OK) return result; LOG_DBG("Response: {}", rsp.body); - picojson::parse(v, rsp.body); + parse_err = picojson::parse(v, rsp.body); + if (!parse_err.empty()) { + error = FORMAT("JSON parse error: {}", parse_err); + LOG_ERROR("{}", error); + return NetworkBackend::NETWORK_ERROR; + } if (!v.is()) { error = "Invalid SmartID response"; LOG_WARN("Invalid SmartID response"); @@ -807,6 +900,31 @@ libcdoc::NetworkBackend::signMID(std::vector& dst, std::vector const std::string& url, const std::string& rp_uuid, const std::string& rp_name, const std::string& phone, const std::string& rcpt_id, const std::vector& digest, CryptoBackend::HashAlgorithm algo) { + // Validate rcpt_id BEFORE doing anything else (network setup, key + // material, etc.). The previous implementation called + // rcpt_id.substr(11, 11) which throws std::out_of_range when + // rcpt_id.size() < 11 and silently returns a too-short identifier + // when 11 <= size < 22 - both of which would propagate to the SK + // Mobile-ID service as garbage and (worse) leak partially-filled + // payloads to the network in the latter case. + libcdoc::EtsiRecipientId parsed = libcdoc::parseEtsiRecipientId(rcpt_id); + if (!parsed.valid()) { + error = "Invalid Mobile ID recipient identifier"; + LOG_ERROR("Invalid Mobile ID recipient identifier: '{}'", rcpt_id); + return libcdoc::WRONG_ARGUMENTS; + } + + // The SK Mobile-ID API expects `nationalIdentityNumber` to be the + // bare digits with no country prefix, so we use the parsed national + // identifier directly. + const std::string &id_num = parsed.national_id; + + if (digest.empty()) { + error = "Empty digest"; + LOG_ERROR("Empty digest passed to signMID"); + return libcdoc::WRONG_ARGUMENTS; + } + std::string certificateLevel = "QUALIFIED"; std::string nonce = libcdoc::toBase64(Crypto::random(16)); @@ -821,6 +939,7 @@ libcdoc::NetworkBackend::signMID(std::vector& dst, std::vector LOG_DBG("Starting client: {} {}", host, port); httplib::SSLClient cli(host, port); + if (result = applySSLTimeout(cli, this); result != OK) return result; result = setPeerCertificates(cli, this, buildURL(host, port)); if (result != OK) return result; if (result = setProxy(cli, this); result != OK) return result; @@ -828,23 +947,26 @@ libcdoc::NetworkBackend::signMID(std::vector& dst, std::vector // // Authenticate // - std::string algo_names[] = {"SHA224", "SHA256", "SHA384", "SHA512"}; - std::string algo_name = algo_names[(int) algo]; + std::string_view algo_name = hashAlgorithmToSidMidName(algo); + if (algo_name.empty()) { + error = "Unsupported hash algorithm for Mobile-ID"; + LOG_ERROR("Unsupported hash algorithm for Mobile-ID: {}", + static_cast(algo)); + return libcdoc::WRONG_ARGUMENTS; + } - // Generate code + // Generate verification code. digest is guaranteed non-empty above. unsigned int code = (((digest[0] & 0xfc) << 5) | (digest[digest.size() - 1] & 0x7f)); result = showVerificationCode(code); if (result != OK) return result; - // etsi/PNOEE-01234567890 - std::string id_num = rcpt_id.substr(11, 11); picojson::object qobj = { {"relyingPartyUUID", picojson::value(rp_uuid)}, {"relyingPartyName", picojson::value(rp_name)}, {"phoneNumber", picojson::value(phone)}, {"nationalIdentityNumber", picojson::value(id_num)}, {"hash", picojson::value(toBase64(digest))}, - {"hashType", picojson::value(algo_name)}, + {"hashType", picojson::value(std::string(algo_name))}, {"language", picojson::value("ENG")}, {"displayText", picojson::value("Tahad dekryptida?")}, {"displayTextFormat", picojson::value("GSM-7")} @@ -863,15 +985,22 @@ libcdoc::NetworkBackend::signMID(std::vector& dst, std::vector LOG_DBG("Response: {}", rsp.body); picojson::value v; - picojson::parse(v, rsp.body); + parse_err = picojson::parse(v, rsp.body); + if (!parse_err.empty()) { + error = FORMAT("JSON parse error: {}", parse_err); + LOG_ERROR("{}", error); + return NetworkBackend::NETWORK_ERROR; + } if (!v.is()) { error = "Invalid Mobile ID response"; - LOG_WARN("Invalid Monbile ID response"); + LOG_WARN("Invalid Mobile ID response"); + return NetworkBackend::NETWORK_ERROR; } picojson::value w = v.get("sessionID"); if (!w.is()) { error = "Invalid Mobile ID response"; - LOG_WARN("Invalid Monbile ID response"); + LOG_WARN("Invalid Mobile ID response"); + return NetworkBackend::NETWORK_ERROR; } std::string sessionID = w.get(); LOG_DBG("SessionID: {}", sessionID); diff --git a/cdoc/NetworkBackend.h b/cdoc/NetworkBackend.h index 3ec43618..8a3b8621 100644 --- a/cdoc/NetworkBackend.h +++ b/cdoc/NetworkBackend.h @@ -119,8 +119,10 @@ struct CDOC_EXPORT NetworkBackend { std::string username; /** * @brief Proxy password + * + * It is the implementer's responsibility to ensure that the buffer remains valid during CDocWriter getFMK and beginEncryption calls */ - std::string password; + std::string_view password; }; NetworkBackend() = default; diff --git a/cdoc/PKCS11Backend.cpp b/cdoc/PKCS11Backend.cpp index 385d5bb2..90d996e4 100644 --- a/cdoc/PKCS11Backend.cpp +++ b/cdoc/PKCS11Backend.cpp @@ -379,9 +379,9 @@ libcdoc::PKCS11Backend::getPublicKey(std::vector& val, int slot, const return CRYPTO_ERROR; } std::vector w = d->attribute(d->session, handle, CKA_EC_POINT); - if (w.empty()) { - LOG_DBG("PKCS11: getValue CKA_EC_POINT error"); - d->logout(); + if (w.size() < 2) { + LOG_DBG("PKCS11: getValue CKA_EC_POINT too short"); + d->logout(); return CRYPTO_ERROR; } const uint8_t *p = v.data(); @@ -392,12 +392,32 @@ libcdoc::PKCS11Backend::getPublicKey(std::vector& val, int slot, const return CRYPTO_ERROR; } EC_POINT *pub_key_point = EC_POINT_new(group); - int result = EC_POINT_oct2point(group, pub_key_point, w.data() + 2, w.size() - 2, NULL); + if (!pub_key_point) { + EC_GROUP_free(group); + return CRYPTO_ERROR; + } + if (EC_POINT_oct2point(group, pub_key_point, w.data() + 2, w.size() - 2, NULL) != 1) { + LOG_DBG("PKCS11: EC_POINT_oct2point error"); + EC_POINT_free(pub_key_point); + EC_GROUP_free(group); + return CRYPTO_ERROR; + } // Associate the Point with an EC_KEY: Finally, set up an EC_KEY structure and assign the point as the public key. EC_KEY *key = EC_KEY_new(); + if (!key) { + EC_POINT_free(pub_key_point); + EC_GROUP_free(group); + return CRYPTO_ERROR; + } EC_KEY_set_group(key, group); EC_KEY_set_public_key(key, pub_key_point); EVP_PKEY *evp_pkey = EVP_PKEY_new(); + if (!evp_pkey) { + EC_KEY_free(key); + EC_POINT_free(pub_key_point); + EC_GROUP_free(group); + return CRYPTO_ERROR; + } EVP_PKEY_assign_EC_KEY(evp_pkey, key); val = Crypto::toPublicKeyDerLong(evp_pkey); EVP_PKEY_free(evp_pkey); @@ -410,25 +430,100 @@ libcdoc::PKCS11Backend::getPublicKey(std::vector& val, int slot, const libcdoc::result_t libcdoc::PKCS11Backend::decryptRSA(std::vector &dst, const std::vector &data, bool oaep, unsigned int idx) { - if(!d) return CRYPTO_ERROR; + if(!d) return CRYPTO_ERROR; int result = connectToKey(idx, true); if (result != OK) return result; - CK_RSA_PKCS_OAEP_PARAMS params { CKM_SHA256, CKG_MGF1_SHA256, 0, nullptr, 0 }; - auto mech = oaep ? CK_MECHANISM{ CKM_RSA_PKCS_OAEP, ¶ms, sizeof(params) } : CK_MECHANISM{ CKM_RSA_PKCS, nullptr, 0 }; - if(d->f->C_DecryptInit(d->session, &mech, d->key) != CKR_OK) { - d->logout(); - return CRYPTO_ERROR; - } - CK_ULONG size = 0; - if(d->f->C_Decrypt(d->session, CK_CHAR_PTR(data.data()), CK_ULONG(data.size()), 0, &size) != CKR_OK) { - d->logout(); - return CRYPTO_ERROR; - } - dst.resize(size); - if(d->f->C_Decrypt(d->session, CK_CHAR_PTR(data.data()), CK_ULONG(data.size()), dst.data(), &size) != CKR_OK) return CRYPTO_ERROR; - d->logout(); + if (!oaep) { + // If oaep is false, dst must be pre-allocated to the expected length. + // This is required to apply the implicit-rejection countermeasure on padding failure. + if (dst.empty()) { + LOG_ERROR("PKCS11Backend::decryptRSA: dst must be pre-allocated for PKCS#1 v1.5 decryption"); + return CRYPTO_ERROR; + } + // Use raw RSA (CKM_RSA_X_509) so libcdoc can apply RFC 8017 implicit + // rejection in user space. CKM_RSA_PKCS lets the token strip the + // padding, but most PKCS#11 tokens leak the success/failure bit + // through CKR_ENCRYPTED_DATA_INVALID vs CKR_OK and through the time + // taken; the only portable mitigation is to never let the token see + // the padding decision. CKM_RSA_X_509 is a baseline mechanism + // supported by every PKCS#11 token that supports RSA. + CK_MECHANISM mech { CKM_RSA_X_509, nullptr, 0 }; + if (d->f->C_DecryptInit(d->session, &mech, d->key) != CKR_OK) { + d->logout(); + return CRYPTO_ERROR; + } + + CK_ULONG em_size = 0; + if (d->f->C_Decrypt(d->session, CK_CHAR_PTR(data.data()), CK_ULONG(data.size()), nullptr, &em_size) != CKR_OK) { + d->logout(); + return CRYPTO_ERROR; + } + std::vector em(size_t(em_size), 0); + if (d->f->C_Decrypt(d->session, CK_CHAR_PTR(data.data()), CK_ULONG(data.size()), em.data(), &em_size) != CKR_OK) { + libcdoc::cleanse(em); + d->logout(); + return CRYPTO_ERROR; + } + em.resize(size_t(em_size)); + d->logout(); + + // Build a synthetic seed that does not require access to the private + // key (which never leaves the token). HMAC the ciphertext with a + // public-but-token-bound value (CKA_ID concatenated with the modulus) + // so the seed is stable per (token-key, ct) pair while still being + // unpredictable to attackers. + std::vector seed_key; + { + std::vector id_attr = d->attribute(d->session, d->key, CKA_ID); + std::vector mod_attr = d->attribute(d->session, d->key, CKA_MODULUS); + seed_key.reserve(id_attr.size() + mod_attr.size() + 16); + const std::string_view tag{"cdoc1-rsa-implicit-reject-pkcs11"}; + seed_key.insert(seed_key.end(), tag.begin(), tag.end()); + seed_key.insert(seed_key.end(), id_attr.begin(), id_attr.end()); + seed_key.insert(seed_key.end(), mod_attr.begin(), mod_attr.end()); + } + std::vector prk = libcdoc::Crypto::sign_hmac(seed_key, data); + libcdoc::cleanse(seed_key); + std::vector synth = libcdoc::Crypto::expand( + prk, "cdoc1-rsa-implicit-reject", int(dst.size())); + libcdoc::cleanse(prk); + if (synth.size() != dst.size()) { + // Last-resort fallback: fixed zero seed. Worse than ideal but still + // length-uniform with the real-success path. + synth.assign(dst.size(), 0); + } + + int rv = libcdoc::Crypto::rsaImplicitRejectFromEM(dst, em, data, synth, dst.size()); + libcdoc::cleanse(em); + libcdoc::cleanse(synth); + return rv; + } + + CK_RSA_PKCS_OAEP_PARAMS params { CKM_SHA256, CKG_MGF1_SHA256, 0, nullptr, 0 }; + auto mech = oaep ? CK_MECHANISM{ CKM_RSA_PKCS_OAEP, ¶ms, sizeof(params) } : CK_MECHANISM{ CKM_RSA_PKCS, nullptr, 0 }; + if(d->f->C_DecryptInit(d->session, &mech, d->key) != CKR_OK) { + d->logout(); + return CRYPTO_ERROR; + } + CK_ULONG size = 0; + if(d->f->C_Decrypt(d->session, CK_CHAR_PTR(data.data()), CK_ULONG(data.size()), 0, &size) != CKR_OK) { + d->logout(); + return CRYPTO_ERROR; + } + dst.resize(size); + if(d->f->C_Decrypt(d->session, CK_CHAR_PTR(data.data()), CK_ULONG(data.size()), dst.data(), &size) != CKR_OK) { + // Always logout - failing to do so would leak the open session and + // (worse) reveal whether the second C_Decrypt failed for "padding" + // vs another reason via observable side-effects on the next call. + libcdoc::cleanse(dst); + dst.clear(); + d->logout(); + return CRYPTO_ERROR; + } + dst.resize(size); + d->logout(); return OK; } diff --git a/cdoc/RcptInfo.h b/cdoc/RcptInfo.h index 60fb0149..9b6582a7 100644 --- a/cdoc/RcptInfo.h +++ b/cdoc/RcptInfo.h @@ -19,6 +19,8 @@ #ifndef RCPTINFO_H #define RCPTINFO_H +#include "utils/memory.h" + #include namespace libcdoc { @@ -60,7 +62,7 @@ struct RcptInfo { // Certificate for encryption std::vector cert; // Pin or password - std::vector secret; + SecureBytes secret; // PKCS11-specific info PKCS11Info p11; diff --git a/cdoc/Tar.cpp b/cdoc/Tar.cpp index b039e910..f6688063 100644 --- a/cdoc/Tar.cpp +++ b/cdoc/Tar.cpp @@ -28,15 +28,27 @@ using namespace libcdoc; constexpr unsigned int BLOCKSIZE = 512; +constexpr int64_t CDOC2_MAX_FILE_SIZE = 8LL * 1024 * 1024 * 1024; + +// Cap on the declared size of an "auxiliary" tar header - i.e. extended +// PAX header ('x') or global PAX header ('g'). The PAX standard places no +// formal upper bound on these, but realistic records produced by tar(1) +// are O(KB) (one entry per path/size override). A malicious archive could +// otherwise declare an 8 GiB PAX header and force the decryption pipeline +// to either allocate that much memory (readPaxHeader) or spin through it +// in skip() (next()). 64 KiB is well above anything legitimate while +// keeping per-entry memory and stream-skip work bounded. +constexpr int64_t MAX_AUX_HEADER_SIZE = 64 * 1024; + template -[[nodiscard]] static constexpr auto svtoi(std::string_view data) noexcept +[[nodiscard]] static constexpr bool svtoi(std::string_view data, T& result) noexcept { - T result {}; if (data.empty()) - return result; - auto p = &*data.begin(); - std::from_chars(p, p + std::ranges::distance(data), result); - return result; + return false; + const auto *p = data.data(); + const auto *end = p + data.size(); + auto [ptr, ec] = std::from_chars(p, end, result); + return ec == std::errc{} && ptr == end; } template @@ -47,6 +59,8 @@ static constexpr int64_t fromOctal(const std::array &data) noexcept { if(c < '0' || c > '7') continue; + if (i > (INT64_MAX >> 3)) + return INT64_MAX; i <<= 3; i += c - '0'; } @@ -114,7 +128,10 @@ struct libcdoc::Header { } constexpr int64_t getSize() const noexcept { - return fromOctal(size); + int64_t s = fromOctal(size); + if (s < 0 || s > CDOC2_MAX_FILE_SIZE) + return -1; + return s; } constexpr bool operator==(const Header&) const noexcept = default; @@ -309,6 +326,15 @@ libcdoc::result_t libcdoc::TarSource::readPaxHeader(const Header& hdr, std::string& name, int64_t& size) { int64_t h_size = hdr.getSize(); + // Validate the declared size BEFORE allocating the buffer. getSize() + // already returns -1 for malformed octal or sizes above + // CDOC2_MAX_FILE_SIZE, but that 8 GiB ceiling is meant for payload + // files; PAX headers themselves must be much smaller. See the + // MAX_AUX_HEADER_SIZE comment near the top of this file. + if (h_size < 0 || h_size > MAX_AUX_HEADER_SIZE) { + _error = DATA_FORMAT_ERROR; + return _error; + } std::string paxData(h_size, 0); result_t result = _src->read((uint8_t *) paxData.data(), paxData.size()); if (result != h_size) { @@ -329,15 +355,22 @@ libcdoc::TarSource::readPaxHeader(const Header& hdr, std::string& name, int64_t& auto keyWord = range_to_sv(std::next(sp), eq); auto headerValue = range_to_sv(std::next(eq), line.end()); - if (std::ranges::distance(line) + 1 != svtoi(lenStr)) { + int parsedLen; + if (!svtoi(lenStr, parsedLen) || std::ranges::distance(line) + 1 != parsedLen) { _error = DATA_FORMAT_ERROR; return _error; } LOG_DBG("PAX {} : {}", keyWord, headerValue); if (keyWord == "path") name = headerValue; - if (keyWord == "size") - size = svtoi(headerValue); + if (keyWord == "size") { + int64_t parsedSize; + if (!svtoi(headerValue, parsedSize) || parsedSize < 0 || parsedSize > CDOC2_MAX_FILE_SIZE) { + _error = DATA_FORMAT_ERROR; + return _error; + } + size = parsedSize; + } } return OK; } @@ -414,8 +447,16 @@ libcdoc::TarSource::next(std::string& name, int64_t& size) _eof = false; return OK; } - // Skip other header types ('g') + // Skip other header types ('g' = global PAX header, plus any tar + // type we don't recognise as data). Cap the declared size at the + // same ceiling we use for 'x' headers so an attacker cannot force + // the upstream decryption pipeline to spin through gigabytes of + // payload bytes per malicious header. h_size = h.getSize(); + if (h_size < 0 || h_size > MAX_AUX_HEADER_SIZE) { + _error = DATA_FORMAT_ERROR; + return _error; + } _src->skip(h_size + padding(h_size)); } return END_OF_STREAM; diff --git a/cdoc/Utils.cpp b/cdoc/Utils.cpp index 07853336..b422d596 100644 --- a/cdoc/Utils.cpp +++ b/cdoc/Utils.cpp @@ -39,7 +39,7 @@ toBase64(const uint8_t *data, size_t len) } std::vector -fromBase64(const std::string& data) +fromBase64(std::string_view data) { std::string str = jwt::base::details::decode(data, jwt::alphabet::base64::rdata(), "="); return std::vector(str.cbegin(), str.cend()); @@ -104,8 +104,9 @@ parseURL(const std::string& url, std::string& host, int& port, std::string& path { char *phost, *ppath; int pport; + int pssl; if (!OSSL_HTTP_parse_url(url.c_str(), - nullptr, // SSL + &pssl, nullptr, // user &phost, nullptr, // port (str) @@ -116,6 +117,12 @@ parseURL(const std::string& url, std::string& host, int& port, std::string& path )) { return libcdoc::DATA_FORMAT_ERROR; } + if (!pssl) { + OPENSSL_free(phost); + OPENSSL_free(ppath); + LOG_ERROR("URL scheme must be https: {}", url); + return libcdoc::CONFIGURATION_ERROR; + } host = phost; port = pport; path = ppath; @@ -160,6 +167,146 @@ operator<<(std::ostream& escaped, urlEncode src) return escaped; } +EtsiRecipientId +parseEtsiRecipientId(std::string_view rcpt_id) +{ + constexpr std::string_view kPrefix{"etsi/PNO"}; + constexpr size_t kCountryCodeLen = 2; + constexpr size_t kSeparatorLen = 1; + constexpr size_t kMaxNationalIdLen = 32; + + // Need at least: prefix + 2 country chars + '-' + 1 id digit. + if (rcpt_id.size() < kPrefix.size() + kCountryCodeLen + kSeparatorLen + 1) { + return {}; + } + if (rcpt_id.substr(0, kPrefix.size()) != kPrefix) { + return {}; + } + + std::string_view cc = rcpt_id.substr(kPrefix.size(), kCountryCodeLen); + for (char c : cc) { + if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { + return {}; + } + } + + if (rcpt_id[kPrefix.size() + kCountryCodeLen] != '-') { + return {}; + } + + std::string_view nat_id = rcpt_id.substr(kPrefix.size() + kCountryCodeLen + kSeparatorLen); + if (nat_id.empty() || nat_id.size() > kMaxNationalIdLen) { + return {}; + } + for (char c : nat_id) { + if (c < '0' || c > '9') { + return {}; + } + } + + EtsiRecipientId out; + out.country.reserve(kCountryCodeLen); + for (char c : cc) { + out.country.push_back(char((c >= 'a' && c <= 'z') ? (c - 'a' + 'A') : c)); + } + out.national_id.assign(nat_id); + return out; +} + +std::string +sanitiseExtractedFilename(std::string_view name) +{ + // 1. Reject anything whose UTF-8 is malformed or contains NUL/control + // characters. NUL is particularly dangerous: many Windows APIs + // truncate at NUL while the filesystem treats the full name, which + // has historically been used to mask malicious extensions. + if (name.empty()) return {}; + for (unsigned char c : name) { + if (c == 0u) return {}; + if (c < 0x20u && c != '\t') return {}; // strip ASCII control bytes + } + + // 2. Strip every directory component. We split on BOTH '/' and '\\' + // on every platform: an attacker who crafts a Windows-style path on + // Linux is still trying to escape, and vice versa. We always take + // the last non-empty component. + size_t last_sep = name.find_last_of("\\/"); + std::string_view base = (last_sep == std::string_view::npos) + ? name + : name.substr(last_sep + 1); + + // 3. Reject Windows drive-letter prefixes that survived the slash split + // (e.g. "C:foo.txt" with no slash is drive-relative on Windows and + // refers to the current directory of drive C:, NOT the current + // working directory). We strip "X:" if the prefix looks like one. + if (base.size() >= 2 && base[1] == ':' && + ((base[0] >= 'A' && base[0] <= 'Z') || + (base[0] >= 'a' && base[0] <= 'z'))) { + base = base.substr(2); + } + + // 4. Trim trailing dots and whitespace. Windows silently strips these + // when creating files, so "evil.exe.." resolves to "evil.exe" and + // can collide with or hide a legitimate file. Trim leading + // whitespace too, for symmetry. + while (!base.empty() && (base.back() == '.' || base.back() == ' ')) + base.remove_suffix(1); + while (!base.empty() && (base.front() == ' ' || base.front() == '\t')) + base.remove_prefix(1); + + // 5. Reject "." and ".." outright. These appear standalone after + // stripping a leading directory component (e.g. name == ".."). + if (base.empty() || base == "." || base == "..") return {}; + + // 6. Reject reserved Windows device names. The check is case-insensitive + // and applies to both the bare name and the name before any extension. + { + size_t dot = base.find('.'); + std::string_view stem = base.substr(0, dot); + std::string upper(stem.size(), '\0'); + for (size_t i = 0; i < stem.size(); ++i) { + unsigned char ch = uint8_t(stem[i]); + upper[i] = char((ch >= 'a' && ch <= 'z') ? (ch - 'a' + 'A') : ch); + } + static constexpr std::string_view reserved[] = { + "CON", "PRN", "AUX", "NUL", + "COM1", "COM2", "COM3", "COM4", "COM5", + "COM6", "COM7", "COM8", "COM9", + "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", + "LPT6", "LPT7", "LPT8", "LPT9", + }; + for (const auto &r : reserved) { + if (upper == r) return {}; + } + } + + // 7. Cap to a sensible byte length. The practical filename limit on + // every filesystem libcdoc supports is 255 bytes (NTFS, ext4, APFS). + // A name longer than that would fail filesystem operations anyway; + // truncating up-front gives a uniform error mode. We truncate from + // the end while keeping the file extension if there is one. + constexpr size_t MAX_BYTES = 255; + if (base.size() > MAX_BYTES) { + size_t dot = base.find_last_of('.'); + if (dot != std::string_view::npos && + dot > 0 && + base.size() - dot < 16) { + // Preserve a short extension; truncate the stem. + std::string_view ext = base.substr(dot); + std::string_view stem = base.substr(0, dot); + size_t keep_stem = MAX_BYTES - ext.size(); + std::string out; + out.reserve(MAX_BYTES); + out.assign(stem.data(), keep_stem); + out.append(ext.data(), ext.size()); + return out; + } + return std::string(base.substr(0, MAX_BYTES)); + } + + return std::string(base); +} + std::vector JsonToStringArray(std::string_view json) { diff --git a/cdoc/Utils.h b/cdoc/Utils.h index dad25f3a..68dda1c6 100644 --- a/cdoc/Utils.h +++ b/cdoc/Utils.h @@ -54,7 +54,7 @@ static std::string toBase64(const std::vector &data) { return toBase64(data.data(), data.size()); } -std::vector fromBase64(const std::string& data); +std::vector fromBase64(std::string_view data); template static std::string toHex(const F &data) @@ -143,6 +143,87 @@ readAllBytes(std::string_view filename) int parseURL(const std::string& url, std::string& host, int& port, std::string& path, bool end_with_slash = false); std::string buildURL(const std::string& host, int port); +/** + * @brief Sanitise an attacker-controlled file name for safe extraction. + * + * @p name comes from a CDoc1/DDoc/CDoc2 archive header and is fully under the + * control of whoever produced the container. The function strips every + * filesystem-significant component that could let the path escape the + * caller-supplied @p base directory or trick a Windows API into doing + * something other than "create a normal file inside @p base": + * + * - all leading directory components (slashes, backslashes, drive letters), + * - "." and ".." segments, + * - NUL bytes and other ASCII control characters, + * - leading/trailing whitespace and dots (Windows trims these silently), + * - reserved Windows device names (CON, PRN, AUX, NUL, COM1..COM9, LPT1..LPT9), + * - excessively long names (capped at 255 bytes after sanitisation, the + * practical filename limit on every filesystem libcdoc supports). + * + * The returned string is a relative file name (no slashes), or empty if no + * safe name could be derived. A caller that gets an empty return value MUST + * either skip the entry or replace it with a generated placeholder; it MUST + * NOT fall back to the raw @p name. This function does not consult the + * filesystem; the caller is still expected to verify, after composing + * @p base / sanitisedName, that the resulting absolute path stays within + * @p base (e.g. by comparing weakly_canonical(base / safe).parent_path() + * against weakly_canonical(base)). The two checks are complementary: + * sanitisation eliminates known-malicious shapes up-front, the post-compose + * check protects against symlinks pointed at by previously-extracted files. + * + * @param name the unsafe input file name + * @return a relative file name guaranteed not to contain path-traversal + * elements, or an empty string when no safe name can be produced. + */ +CDOC_EXPORT std::string sanitiseExtractedFilename(std::string_view name); + +/** + * @brief Parsed components of an ETSI Smart-ID / Mobile-ID recipient identifier. + * + * The on-the-wire format used by SK's Smart-ID and Mobile-ID services is + * @c etsi/PNO-, e.g. @c etsi/PNOEE-30303039914. The + * @c field is the ISO-3166-1 alpha-2 country code; the + * @c field is the personal identifier issued by that + * country (in Estonia: 11 ASCII digits). + * + * @ref parseEtsiRecipientId returns this struct after validating the + * shape of the input; an empty @ref country / @ref national_id pair + * indicates a parse failure. + */ +struct EtsiRecipientId { + /// ISO-3166-1 alpha-2 country code (e.g. "EE"). Empty on parse failure. + std::string country; + /// National identifier portion (digits only). Empty on parse failure. + std::string national_id; + + /// Convenience: true iff the input parsed cleanly. + [[nodiscard]] bool valid() const noexcept { + return !country.empty() && !national_id.empty(); + } +}; + +/** + * @brief Parse an ETSI recipient identifier into its country and national-id parts. + * + * The accepted shape is @c etsi/PNO-: + * + * - exactly the literal prefix @c "etsi/PNO"; + * - exactly two ASCII letters of country code (case-insensitive on input, + * normalised to upper case in the result); + * - a literal @c '-' separator; + * - a non-empty national identifier composed of ASCII digits and at + * most 32 characters total (a generous upper bound that comfortably + * covers all current SK formats while rejecting megabyte payloads). + * + * Returns an @ref EtsiRecipientId with empty fields if any of the above + * is violated. The function never throws, never logs, and never reads + * past the end of the input. + * + * @param rcpt_id the recipient identifier to parse + * @return parsed components; check @ref EtsiRecipientId::valid() to test + */ +CDOC_EXPORT EtsiRecipientId parseEtsiRecipientId(std::string_view rcpt_id); + struct urlEncode { std::string_view src; friend std::ostream& operator<<(std::ostream& escaped, urlEncode src); @@ -230,10 +311,14 @@ static inline void LogFormat(LogLevel level, std::string_view file, int line, st #ifdef NDEBUG #define LOG_TRACE(...) -#define LOG_TRACE_KEY(MSG, KEY) #else #define LOG_TRACE(...) LogFormat(libcdoc::LEVEL_TRACE, __FILE__, __LINE__, __VA_ARGS__) +#endif + +#ifdef LIBCDOC_CRYPTO_TRACE #define LOG_TRACE_KEY(MSG, KEY) LogFormat(libcdoc::LEVEL_TRACE, __FILE__, __LINE__, MSG, toHex(KEY)) +#else +#define LOG_TRACE_KEY(MSG, KEY) #endif } // namespace libcdoc diff --git a/cdoc/WinBackend.cpp b/cdoc/WinBackend.cpp index fbc8b55a..9fb9634c 100644 --- a/cdoc/WinBackend.cpp +++ b/cdoc/WinBackend.cpp @@ -19,15 +19,43 @@ #include "WinBackend.h" #include "CDoc2.h" +#include "Crypto.h" #include "Logger.h" #include "Utils.h" +#include "utils/memory.h" #include #include +// Convert a UTF-8 std::string to a std::wstring (UTF-16) using the Windows +// API. The previous implementation zero-extended each byte into a wchar_t, +// which silently mangled any non-ASCII input - in particular non-ASCII PINs +// and key names. A single mis-converted PIN byte is enough to fail +// authentication; on smart cards this consumes a retry slot and can +// permanently lock the card after exhausting the retry counter. static std::wstring toWide(const std::string &in) { - return {in.cbegin(), in.cend()}; + if (in.empty()) return {}; + int needed = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + in.data(), int(in.size()), + nullptr, 0); + if (needed <= 0) { + LOG_ERROR("WinBackend::toWide: invalid UTF-8 input (GetLastError={})", + DWORD(GetLastError())); + return {}; + } + std::wstring out(size_t(needed), L'\0'); + int written = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + in.data(), int(in.size()), + out.data(), needed); + if (written != needed) { + LOG_ERROR("WinBackend::toWide: MultiByteToWideChar mismatch " + "(GetLastError={})", DWORD(GetLastError())); + // Wipe the partially-populated buffer before discarding it. + SecureZeroMemory(out.data(), out.size() * sizeof(wchar_t)); + return {}; + } + return out; } struct libcdoc::WinBackend::Private { @@ -123,16 +151,59 @@ libcdoc::WinBackend::useKey(const std::string& name, const std::string& pin) NCryptFreeObject(d->key); d->key = 0; } + + // Reject invalid UTF-8 in the key name early instead of silently + // truncating it. toWide() returns an empty string both for an empty + // input and for invalid UTF-8 - distinguish the two. + if (name.empty()) { + LOG_ERROR("WinBackend::useKey: empty key name"); + return WRONG_ARGUMENTS; + } std::wstring wname = toWide(name); + if (wname.empty()) { + LOG_ERROR("WinBackend::useKey: invalid UTF-8 in key name"); + return WRONG_ARGUMENTS; + } + SECURITY_STATUS err = NCryptOpenKey(d->prov, &d->key, wname.c_str(), 0, NCRYPT_SILENT_FLAG); - if (err != ERROR_SUCCESS) return CRYPTO_ERROR; - if (!pin.empty()) { - std::wstring wpin = toWide(pin); - err = NCryptSetProperty(d->key, NCRYPT_PIN_PROPERTY, PBYTE(wpin.data()), DWORD(wpin.size()), NCRYPT_SILENT_FLAG); - if (err != ERROR_SUCCESS) { - NCryptFreeObject(d->key); - d->key = 0; - } + if (err != ERROR_SUCCESS) { + LOG_ERROR("WinBackend::useKey: NCryptOpenKey failed (status={:#x})", DWORD(err)); + return CRYPTO_ERROR; + } + + if (pin.empty()) { + return OK; + } + + std::wstring wpin = toWide(pin); + if (wpin.empty()) { + // toWide() already logged the reason. Treat invalid UTF-8 in the PIN + // as a hard failure so we do NOT submit a partial / mangled PIN to + // the card and consume a retry slot. + NCryptFreeObject(d->key); + d->key = 0; + return WRONG_ARGUMENTS; + } + + // NCryptSetProperty(NCRYPT_PIN_PROPERTY) expects cbInput in *bytes*, + // not wide-character count. The previous code passed wpin.size(), which + // is the WCHAR count - i.e. half the actual byte length - so only half + // of the PIN was forwarded to the card. Pass the byte length explicitly, + // and include the trailing NUL since CNG documents the PIN as a + // null-terminated wide string. + const DWORD pin_bytes = DWORD((wpin.size() + 1) * sizeof(wchar_t)); + err = NCryptSetProperty(d->key, NCRYPT_PIN_PROPERTY, + PBYTE(wpin.data()), pin_bytes, + NCRYPT_SILENT_FLAG); + // Wipe the wide PIN buffer regardless of the outcome before discarding + // it. std::wstring::data() is contiguous and writeable since C++17. + SecureZeroMemory(wpin.data(), wpin.size() * sizeof(wchar_t)); + + if (err != ERROR_SUCCESS) { + LOG_ERROR("WinBackend::useKey: NCryptSetProperty(PIN) failed (status={:#x})", DWORD(err)); + NCryptFreeObject(d->key); + d->key = 0; + return CRYPTO_ERROR; } return OK; } @@ -144,6 +215,71 @@ libcdoc::WinBackend::decryptRSA(std::vector& dst, const std::vectorkey, PBYTE(data.data()), DWORD(data.size()), nullptr, nullptr, 0, &em_size, 0); + if (err != ERROR_SUCCESS) { + LOG_ERROR("WinBackend::decryptRSA: NCryptDecrypt(size) failed (status={:#x})", DWORD(err)); + return CRYPTO_ERROR; + } + std::vector em(em_size, 0); + err = NCryptDecrypt(d->key, PBYTE(data.data()), DWORD(data.size()), nullptr, PBYTE(em.data()), em_size, &em_size, 0); + if (err != ERROR_SUCCESS) { + libcdoc::cleanse(em); + LOG_ERROR("WinBackend::decryptRSA: NCryptDecrypt failed (status={:#x})", DWORD(err)); + return CRYPTO_ERROR; + } + em.resize(em_size); + + // Build a stable synthetic seed from a token-bound public value (the + // public key blob) plus the ciphertext. The private key never leaves + // CNG's keystore, but the public modulus is exportable and serves the + // same role of "private to this key, public-immutable" that OpenSSL's + // i2d_PrivateKey gives us elsewhere. + std::vector seed_key; + { + DWORD blob_size = 0; + if (NCryptExportKey(d->key, 0, BCRYPT_RSAPUBLIC_BLOB, nullptr, nullptr, 0, &blob_size, 0) == ERROR_SUCCESS && + blob_size > 0) { + std::vector blob(blob_size, 0); + if (NCryptExportKey(d->key, 0, BCRYPT_RSAPUBLIC_BLOB, nullptr, blob.data(), blob_size, &blob_size, 0) == ERROR_SUCCESS) { + blob.resize(blob_size); + const std::string_view tag{"cdoc1-rsa-implicit-reject-cng"}; + seed_key.reserve(tag.size() + blob.size()); + seed_key.insert(seed_key.end(), tag.begin(), tag.end()); + seed_key.insert(seed_key.end(), blob.begin(), blob.end()); + } + } + // If export failed, fall back to a fixed tag - still length-uniform + // but slightly less unpredictable. Better than leaking the failure. + if (seed_key.empty()) { + const std::string_view tag{"cdoc1-rsa-implicit-reject-cng-fallback"}; + seed_key.assign(tag.begin(), tag.end()); + } + } + std::vector prk = libcdoc::Crypto::sign_hmac(seed_key, data); + libcdoc::cleanse(seed_key); + std::vector synth = libcdoc::Crypto::expand(prk, "cdoc1-rsa-implicit-reject", int(dst.size())); + libcdoc::cleanse(prk); + if (synth.size() != dst.size()) + synth.assign(dst.size(), 0); + + int rv = libcdoc::Crypto::rsaImplicitRejectFromEM(dst, em, data, synth, dst.size()); + libcdoc::cleanse(em); + libcdoc::cleanse(synth); + return rv; + } + // With oaep == true CNG will apply OAEP padding and the implicit-rejection countermeasure internally, + // so we can just call NCryptDecrypt directly with the right flags. BCRYPT_OAEP_PADDING_INFO padding {BCRYPT_SHA256_ALGORITHM, nullptr, 0}; PVOID paddingInfo = oaep ? &padding : nullptr; DWORD flags = oaep ? NCRYPT_PAD_OAEP_FLAG : NCRYPT_PAD_PKCS1_FLAG; @@ -152,7 +288,11 @@ libcdoc::WinBackend::decryptRSA(std::vector& dst, const std::vectorkey, PBYTE(data.data()), DWORD(data.size()), paddingInfo, PBYTE(dst.data()), DWORD(dst.size()), &size, flags); - if (err != ERROR_SUCCESS) return CRYPTO_ERROR; + if (err != ERROR_SUCCESS) { + libcdoc::cleanse(dst); + dst.clear(); + return CRYPTO_ERROR; + } return OK; } @@ -263,19 +403,30 @@ libcdoc::WinBackend::sign(std::vector& dst, HashAlgorithm algorithm, co int result = connectToKey(idx, true); if (result != OK) return result; - BCRYPT_PSS_PADDING_INFO rsaPSS { NCRYPT_SHA256_ALGORITHM, 32 }; - switch(algorithm) { - case libcdoc::CryptoBackend::HashAlgorithm::SHA_224: - rsaPSS = { L"SHA224", 24 }; break; - case libcdoc::CryptoBackend::HashAlgorithm::SHA_256: - rsaPSS = { NCRYPT_SHA256_ALGORITHM, 32 }; break; - case libcdoc::CryptoBackend::HashAlgorithm::SHA_384: - rsaPSS = { NCRYPT_SHA384_ALGORITHM, 48 }; break; - case libcdoc::CryptoBackend::HashAlgorithm::SHA_512: - rsaPSS = { NCRYPT_SHA256_ALGORITHM, 64 }; break; - default: + // BCRYPT_PSS_PADDING_INFO::pszAlgId selects BOTH the PSS hash and MGF1 + // hash. It must match the hash that produced `digest`, otherwise CNG + // either rejects the call (when the digest length disagrees with the + // declared hash) or silently produces a signature that no verifier + // will accept. Salt length conventionally equals the hash output size. + // + // Note: CNG (BCrypt/NCrypt) does not expose a SHA-224 PSS algorithm + // identifier, so SHA-224 is rejected here rather than silently + // forwarded with a non-standard string. + BCRYPT_PSS_PADDING_INFO rsaPSS { BCRYPT_SHA256_ALGORITHM, 32 }; + switch(algorithm) { + case libcdoc::CryptoBackend::HashAlgorithm::SHA_256: + rsaPSS = { BCRYPT_SHA256_ALGORITHM, 32 }; break; + case libcdoc::CryptoBackend::HashAlgorithm::SHA_384: + rsaPSS = { BCRYPT_SHA384_ALGORITHM, 48 }; break; + case libcdoc::CryptoBackend::HashAlgorithm::SHA_512: + rsaPSS = { BCRYPT_SHA512_ALGORITHM, 64 }; break; + case libcdoc::CryptoBackend::HashAlgorithm::SHA_224: + // SHA-224 is not supported by CNG's RSA-PSS implementation. + LOG_ERROR("WinBackend: RSA-PSS with SHA-224 is not supported by CNG"); + return NOT_IMPLEMENTED; + default: return INVALID_PARAMS; - } + } BCRYPT_PKCS1_PADDING_INFO rsaPKCS1 { rsaPSS.pszAlgId }; DWORD size; NCryptGetProperty(d->key, NCRYPT_ALGORITHM_GROUP_PROPERTY, nullptr, 0, &size, 0); diff --git a/cdoc/XmlReader.cpp b/cdoc/XmlReader.cpp index d9428bcc..60cc3b6d 100644 --- a/cdoc/XmlReader.cpp +++ b/cdoc/XmlReader.cpp @@ -19,6 +19,8 @@ #include "XmlReader.h" #include "Crypto.h" +#include "Io.h" +#include "Utils.h" #include @@ -41,6 +43,22 @@ static std::string tostring(pcxmlChar tmp) return result; } +#if LIBXML_VERSION < 21300 +static xmlParserInputPtr +nullExternalEntityLoader(const char *, const char *, xmlParserCtxtPtr) +{ + return nullptr; +} + +struct XmlInit { + XmlInit() { + xmlSetExternalEntityLoader(nullExternalEntityLoader); + xmlSubstituteEntitiesDefault(0); + } +}; +static XmlInit xmlInit; +#endif + XMLReader::XMLReader(libcdoc::DataSource &src) : d(xmlReaderForIO([](void *context, char *buffer, int len) -> int { auto *src = reinterpret_cast(context); @@ -56,6 +74,7 @@ XMLReader::~XMLReader() noexcept std::string XMLReader::attribute(const char *attr) const { + if (!d) return {}; xmlChar *tmp = xmlTextReaderGetAttribute(d, pcxmlChar(attr)); std::string result = tostring(tmp); xmlFree(tmp); @@ -64,27 +83,32 @@ std::string XMLReader::attribute(const char *attr) const bool XMLReader::isEndElement() const { + if (!d) return false; return xmlTextReaderNodeType(d) == XML_READER_TYPE_END_ELEMENT; } bool XMLReader::isElement(const char *elem) const { + if (!d) return false; return xmlStrEqual(xmlTextReaderConstLocalName(d), pcxmlChar(elem)) == 1; } bool XMLReader::read() { + if (!d) return false; return xmlTextReaderRead(d) == 1; } std::vector XMLReader::readBase64() { + if (!d) return {}; xmlTextReaderRead(d); return libcdoc::Crypto::decodeBase64(xmlTextReaderConstValue(d)); } std::string XMLReader::readText() { + if (!d) return {}; xmlTextReaderRead(d); return tostring(xmlTextReaderConstValue(d)); } diff --git a/cdoc/ZStream.h b/cdoc/ZStream.h index 28b8ddb3..eecfc5b5 100644 --- a/cdoc/ZStream.h +++ b/cdoc/ZStream.h @@ -24,6 +24,7 @@ #include #include +#include namespace libcdoc { @@ -45,24 +46,29 @@ struct ZConsumer : public DataConsumer { libcdoc::result_t write(const uint8_t *src, size_t size) noexcept final { if (_fail) return OUTPUT_ERROR; - _s.next_in = (z_const Bytef *) src; - _s.avail_in = uInt(size); + size_t total_written = 0; std::array out{}; - while(true) { - _s.next_out = (Bytef *)out.data(); - _s.avail_out = out.size(); - int res = deflate(&_s, flush); - if(res == Z_STREAM_ERROR) - return OUTPUT_ERROR; - auto o_size = out.size() - _s.avail_out; - if(o_size > 0) { - int64_t result = _dst->write(out.data(), o_size); - if (result != o_size) return result; + do { + size_t chunk = std::min(size - total_written, std::numeric_limits::max()); + _s.next_in = (z_const Bytef *) (src ? src + total_written : nullptr); + _s.avail_in = uInt(chunk); + while(true) { + _s.next_out = (Bytef *)out.data(); + _s.avail_out = out.size(); + int res = deflate(&_s, flush); + if(res == Z_STREAM_ERROR) + return OUTPUT_ERROR; + auto o_size = out.size() - _s.avail_out; + if(o_size > 0) { + int64_t result = _dst->write(out.data(), o_size); + if (result != o_size) return result; + } + if(res == Z_STREAM_END) break; + if(flush == Z_FINISH) continue; + if(_s.avail_in == 0) break; } - if(res == Z_STREAM_END) break; - if(flush == Z_FINISH) continue; - if(_s.avail_in == 0) break; - } + total_written += chunk; + } while (total_written < size); return size; } @@ -72,8 +78,8 @@ struct ZConsumer : public DataConsumer { libcdoc::result_t close() noexcept final { flush = Z_FINISH; - write (nullptr, 0); - deflateEnd(&_s); + libcdoc::result_t rv = write(nullptr, 0); + if (rv < 0) return rv; return _owned ? _dst->close() : OK; } }; @@ -99,34 +105,42 @@ struct ZSource : public DataSource { libcdoc::result_t read(uint8_t *dst, size_t size) noexcept final try { if (_error) return _error; - _s.next_out = (Bytef *) dst; - _s.avail_out = uInt (size); + size_t total_produced = 0; std::array in{}; - int res = Z_OK; - while((_s.avail_out > 0) && (res == Z_OK)) { - int64_t n_read = _src->read(in.data(), in.size()); - if (n_read > 0) { - buf.insert(buf.end(), in.begin(), in.begin() + n_read); - } else if (n_read != 0) { - _error = n_read; - return _error; - } - _s.next_in = (z_const Bytef *) buf.data(); - _s.avail_in = uInt(buf.size()); - res = inflate(&_s, flush); - switch(res) { - case Z_OK: - buf.erase(buf.begin(), buf.end() - _s.avail_in); - break; - case Z_STREAM_END: - buf.clear(); - break; - default: - _error = ZLIB_ERROR; - return _error; + while (total_produced < size) { + size_t chunk = std::min(size - total_produced, std::numeric_limits::max()); + _s.next_out = (Bytef *) (dst + total_produced); + _s.avail_out = uInt(chunk); + int res = Z_OK; + while((_s.avail_out > 0) && (res == Z_OK)) { + int64_t n_read = _src->read(in.data(), in.size()); + if (n_read > 0) { + buf.insert(buf.end(), in.begin(), in.begin() + n_read); + } else if (n_read != 0) { + _error = n_read; + return _error; + } + size_t buf_chunk = std::min(buf.size(), std::numeric_limits::max()); + _s.next_in = (z_const Bytef *) buf.data(); + _s.avail_in = uInt(buf_chunk); + res = inflate(&_s, flush); + switch(res) { + case Z_OK: + buf.erase(buf.begin(), buf.begin() + (buf_chunk - _s.avail_in)); + break; + case Z_STREAM_END: + buf.clear(); + break; + default: + _error = ZLIB_ERROR; + return _error; + } } + size_t produced = chunk - _s.avail_out; + total_produced += produced; + if (produced == 0) break; // no progress (EOF or stream end) } - return size - _s.avail_out; + return total_produced; } catch(...) { return INPUT_STREAM_ERROR; } diff --git a/cdoc/cdoc-tool.cpp b/cdoc/cdoc-tool.cpp index daddb3a3..3e655b74 100644 --- a/cdoc/cdoc-tool.cpp +++ b/cdoc/cdoc-tool.cpp @@ -124,7 +124,7 @@ load_certs(ToolConf& conf, const std::string& filename) } static void -inputSecret(std::vector& secret, std::string_view text) +inputSecret(SecureBytes& secret, std::string_view text) { cout << text; #ifdef _WIN32 @@ -305,14 +305,16 @@ parse_rcpt(ToolConf& conf, std::vector& rcpts, int& arg_idx, #ifndef NDEBUG // For debugging - cout << "Method: " << method << endl; - cout << "Slot: " << rcpt.p11.slot << endl; - if (!rcpt.secret.empty()) - cout << "Pin: " << string(rcpt.secret.cbegin(), rcpt.secret.cend()) << endl; + LOG_DBG("Method: {}", method); + LOG_DBG("Slot: {}", rcpt.p11.slot); + if (!rcpt.secret.empty()) { + string str(rcpt.secret.cbegin(), rcpt.secret.cend()); + LOG_TRACE("Pin: {}", str); + } if (!rcpt.p11.key_id.empty()) - cout << "Key ID: " << toHex(rcpt.p11.key_id) << endl; + LOG_DBG("Key ID: {}", toHex(rcpt.p11.key_id)); if (!rcpt.p11.key_label.empty()) - cout << "Key label: " << rcpt.p11.key_label << endl; + LOG_DBG("Key label: {}", rcpt.p11.key_label); #endif } else if (method == "share") { // label:share:RECIPIENT_ID @@ -334,7 +336,7 @@ struct LockData { int64_t slot = -1; vector key_id; string key_label; - vector secret; + SecureBytes secret; int validate(ToolConf& conf) { if (lock_idx == 0) { @@ -709,7 +711,7 @@ int main(int argc, char *argv[]) return 1; } - libcdoc::setLogLevel(LEVEL_TRACE); + libcdoc::setLogLevel(LEVEL_WARNING); string_view command(argv[1]); diff --git a/cdoc/httplib.h b/cdoc/httplib.h index 465219d0..41fef190 100644 --- a/cdoc/httplib.h +++ b/cdoc/httplib.h @@ -1442,7 +1442,7 @@ class ClientImpl { void set_proxy(const std::string &host, int port); void set_proxy_basic_auth(const std::string &username, - const std::string &password); + const std::string_view &password); void set_proxy_bearer_token_auth(const std::string &token); #ifdef CPPHTTPLIB_OPENSSL_SUPPORT void set_proxy_digest_auth(const std::string &username, @@ -1875,7 +1875,7 @@ class Client { void set_proxy(const std::string &host, int port); void set_proxy_basic_auth(const std::string &username, - const std::string &password); + const std::string_view &password); void set_proxy_bearer_token_auth(const std::string &token); #ifdef CPPHTTPLIB_OPENSSL_SUPPORT void set_proxy_digest_auth(const std::string &username, @@ -8786,7 +8786,7 @@ inline void ClientImpl::set_proxy(const std::string &host, int port) { } inline void ClientImpl::set_proxy_basic_auth(const std::string &username, - const std::string &password) { + const std::string_view &password) { proxy_basic_auth_username_ = username; proxy_basic_auth_password_ = password; } @@ -10190,7 +10190,7 @@ inline void Client::set_proxy(const std::string &host, int port) { cli_->set_proxy(host, port); } inline void Client::set_proxy_basic_auth(const std::string &username, - const std::string &password) { + const std::string_view &password) { cli_->set_proxy_basic_auth(username, password); } inline void Client::set_proxy_bearer_token_auth(const std::string &token) { diff --git a/cdoc/json/base.h b/cdoc/json/base.h index 7258b2e7..3682abac 100644 --- a/cdoc/json/base.h +++ b/cdoc/json/base.h @@ -163,7 +163,7 @@ namespace jwt { } }; - inline padding count_padding(const std::string& base, const std::vector& fills) { + inline padding count_padding(std::string_view base, const std::vector& fills) { for (const auto& fill : fills) { if (base.size() < fill.size()) continue; // Does the end of the input exactly match the fill pattern? @@ -225,7 +225,7 @@ namespace jwt { return res; } - inline std::string decode(const std::string& base, const std::array& rdata, + inline std::string decode(std::string_view base, const std::array& rdata, const std::vector& fill) { const auto pad = count_padding(base, fill); if (pad.count > 2) throw std::runtime_error("Invalid input: too much fill"); @@ -271,7 +271,7 @@ namespace jwt { return res; } - inline std::string decode(const std::string& base, const std::array& rdata, + inline std::string decode(std::string_view base, const std::array& rdata, const std::string& fill) { return decode(base, rdata, std::vector{fill}); } diff --git a/cdoc/utils/ct.h b/cdoc/utils/ct.h new file mode 100644 index 00000000..375a9721 --- /dev/null +++ b/cdoc/utils/ct.h @@ -0,0 +1,71 @@ +/* + * libcdoc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#pragma once + +#include +#include + +// Branch-free, data-independent helpers used by the constant-time PKCS#1 v1.5 +// unpadding implementation. +// +// The compiler is allowed - in principle - to "optimise" any of these into +// branches; in practice, on GCC/Clang/MSVC at any reasonable optimisation +// level, none of them produce conditional jumps because the inputs are +// integer expressions without short-circuit operators. We rely on this +// observation, periodically verify it via dudect-style timing tests, and +// avoid further hardening (assembly, OPENSSL_cleanse-style barriers) to keep +// the code portable across the platforms libcdoc targets. + +namespace libcdoc::ct { + +// Returns 0xFF when a == b, otherwise 0x00. Branch-free for 8-bit inputs. +constexpr uint8_t eq8(uint8_t a, uint8_t b) noexcept { + // x is 0 iff a == b; otherwise 1..255. Subtracting 1 underflows to a + // very large value when x == 0, so the high byte of (x - 1) is 0xFF + // exactly when a == b. + uint16_t x = uint16_t(a ^ b); + return uint8_t(((uint32_t(x) - 1u) >> 8) & 0xFFu); +} + +// Returns 0xFF when a >= b, otherwise 0x00. Branch-free for size_t inputs. +constexpr uint8_t ge_size(size_t a, size_t b) noexcept { + // (b - a - 1) wraps to a huge value when a >= b, putting 1 in the top bit + constexpr size_t shift = sizeof(size_t) * 8u - 1u; + size_t top_bit = (b - a - 1u) >> shift; // 1 if a < b, 0 if a >= b + return uint8_t(top_bit * 0xFFu); +} + +// Returns 0xFF when a == b, otherwise 0x00 (32-bit operands). +constexpr uint8_t eq32(uint32_t a, uint32_t b) noexcept { + uint32_t x = a ^ b; + // (x - 1) >> 31 is 1 iff x == 0 + return uint8_t(((x - 1u) >> 31) & 1u) * 0xFFu; +} + +// Constant-time conditional-copy: out[i] = mask ? a[i] : b[i] for n bytes. +// `mask` must be 0x00 or 0xFF. +inline void cmov(uint8_t *out, const uint8_t *a, const uint8_t *b, + size_t n, uint8_t mask) noexcept { + const uint8_t inv = uint8_t(~mask); + for (size_t i = 0; i < n; ++i) { + out[i] = uint8_t((a[i] & mask) | (b[i] & inv)); + } +} + +} // namespace libcdoc::ct diff --git a/cdoc/utils/memory.h b/cdoc/utils/memory.h index 708b313a..c441a5a2 100644 --- a/cdoc/utils/memory.h +++ b/cdoc/utils/memory.h @@ -19,11 +19,227 @@ #pragma once #include +#include #include +#include #include +#define OPENSSL_SUPPRESS_DEPRECATED +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + namespace libcdoc { +template +void cleanse(std::vector& v) noexcept +{ + if (!v.empty()) { + OPENSSL_cleanse(v.data(), v.size() * sizeof(T)); + } +} + +template +void cleanse(std::array& a) noexcept +{ + if (!a.empty()) { + OPENSSL_cleanse(a.data(), a.size() * sizeof(T)); + } +} + +class SecureBytes { + std::vector data_; + bool locked_ = false; + + void lock() noexcept { + if (!data_.empty() && !locked_) { +#ifdef _WIN32 + locked_ = VirtualLock(data_.data(), data_.size()); +#else + locked_ = (mlock(data_.data(), data_.size()) == 0); +#endif + } + } + + void unlock() noexcept { + if (!data_.empty() && locked_) { +#ifdef _WIN32 + VirtualUnlock(data_.data(), data_.size()); +#else + munlock(data_.data(), data_.size()); +#endif + locked_ = false; + } + } + +public: + using iterator = std::vector::iterator; + using const_iterator = std::vector::const_iterator; + + SecureBytes() noexcept = default; + + ~SecureBytes() { + cleanse(); + unlock(); + } + + SecureBytes(const SecureBytes& other) : data_(other.data_) { + lock(); + } + + SecureBytes(SecureBytes&& other) noexcept : data_(std::move(other.data_)), locked_(other.locked_) { + other.locked_ = false; + } + + SecureBytes& operator=(const SecureBytes& other) { + if (this != &other) { + cleanse(); + unlock(); + data_ = other.data_; + lock(); + } + return *this; + } + + SecureBytes& operator=(SecureBytes&& other) noexcept { + if (this != &other) { + cleanse(); + unlock(); + data_ = std::move(other.data_); + locked_ = other.locked_; + other.locked_ = false; + } + return *this; + } + + SecureBytes& operator=(std::vector v) { + cleanse(); + unlock(); + data_ = std::move(v); + lock(); + return *this; + } + + SecureBytes(std::vector v) noexcept : data_(std::move(v)) { + lock(); + } + + SecureBytes(const std::string& s) : data_(s.cbegin(), s.cend()) { + lock(); + } + + template + SecureBytes(InputIt first, InputIt last) : data_(first, last) { + lock(); + } + + explicit SecureBytes(size_t size) : data_(size) { + lock(); + } + + template + void assign(InputIt first, InputIt last) { + cleanse(); + unlock(); + data_.assign(first, last); + lock(); + } + + [[nodiscard]] bool empty() const noexcept { return data_.empty(); } + [[nodiscard]] size_t size() const noexcept { return data_.size(); } + [[nodiscard]] const uint8_t* data() const noexcept { return data_.data(); } + [[nodiscard]] uint8_t* data() noexcept { return data_.data(); } + [[nodiscard]] const uint8_t& operator[](size_t i) const noexcept { return data_[i]; } + [[nodiscard]] uint8_t& operator[](size_t i) noexcept { return data_[i]; } + + [[nodiscard]] const_iterator cbegin() const noexcept { return data_.cbegin(); } + [[nodiscard]] const_iterator cend() const noexcept { return data_.cend(); } + [[nodiscard]] iterator begin() noexcept { return data_.begin(); } + [[nodiscard]] iterator end() noexcept { return data_.end(); } + [[nodiscard]] const_iterator begin() const noexcept { return data_.begin(); } + [[nodiscard]] const_iterator end() const noexcept { return data_.end(); } + + void resize(size_t n) { + unlock(); + data_.resize(n); + lock(); + } + + void clear() { + cleanse(); + unlock(); + data_.clear(); + } + + void cleanse() noexcept { + ::libcdoc::cleanse(data_); + } + + [[nodiscard]] operator const std::vector&() const noexcept { return data_; } + + [[nodiscard]] bool operator==(const SecureBytes& other) const noexcept { + if (data_.size() != other.data_.size()) return false; + return CRYPTO_memcmp(data_.data(), other.data_.data(), data_.size()) == 0; + } + + [[nodiscard]] bool operator!=(const SecureBytes& other) const noexcept { + return !(*this == other); + } +}; + +/** + * @brief Scope guard that wipes a contiguous secret on destruction. + * + * Wraps a reference to a @c std::vector (or @c std::array) + * and calls @ref libcdoc::cleanse on it from the destructor, including the + * exceptional and early-return paths. Intended for the short-lived KEK / FMK + * pre-master / shared-secret buffers in CDoc2Reader / CDoc2Writer where every + * function has multiple early-return branches and remembering to cleanse at + * each one is fragile. + * + * Note: this only wipes the *currently-allocated* storage. It does NOT wipe + * earlier allocations that @c std::vector may have freed during a resize. + * For long-lived secrets that get assigned over multiple times, use + * @ref SecureBytes (which serialises through cleanse/unlock on each resize) + * or a fixed-size container. + * + * Usage: + * @code + * std::vector kek; + * Cleanser kek_guard(kek); // wipes `kek` on every exit from this scope + * ... + * if (failure) return ERROR; // kek is wiped before unwind + * ... + * @endcode + */ +template +class Cleanser { +public: + explicit Cleanser(Container& c) noexcept : c_(c) {} + ~Cleanser() noexcept { libcdoc::cleanse(c_); } + + Cleanser(const Cleanser&) = delete; + Cleanser& operator=(const Cleanser&) = delete; + Cleanser(Cleanser&&) = delete; + Cleanser& operator=(Cleanser&&) = delete; +private: + Container& c_; +}; + +// Class template argument deduction: `Cleanser g(vec);` infers the type. +template +Cleanser(Container&) -> Cleanser; + +inline bool constant_time_compare(const std::vector& a, const std::vector& b) noexcept +{ + if (a.size() != b.size()) return false; + return CRYPTO_memcmp(a.data(), b.data(), a.size()) == 0; +} + template struct free_deleter { @@ -89,4 +305,4 @@ constexpr auto d2i(const std::vector &data, Args&&... args) noexcept return make_unique_ptr(F(std::forward(args)..., &p, long(data.size())), Free); } -} \ No newline at end of file +} diff --git a/doc/usage.md b/doc/usage.md index b778ff51..27eb6d74 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -138,7 +138,7 @@ Returns the client's TLS certificate for authentication with the key-server. int getPeerTLSCertificates(std::vector> &dst) ``` -Returns the list of acceptable peer certificates for the key-server. +Returns the list of acceptable peer certificates for the key-server. Returning empty list disables TLS peer certificate check in debug builds but results in CONFIGURATION_ERROR in release builds. #### `signTLS` diff --git a/examples/java/src/main/java/ee/ria/cdoc/CDocTool.java b/examples/java/src/main/java/ee/ria/cdoc/CDocTool.java index e2a2f7c5..2b0cf6be 100644 --- a/examples/java/src/main/java/ee/ria/cdoc/CDocTool.java +++ b/examples/java/src/main/java/ee/ria/cdoc/CDocTool.java @@ -301,7 +301,7 @@ static void encrypt(String file, String label, String password, Collection +#include +%} + +namespace std { + +%naturalvar string_view; + +class string_view; + +// string_view +%typemap(jni) string_view "jstring" +%typemap(jtype) string_view "String" +%typemap(jstype) string_view "String" +%typemap(javadirectorin) string_view "$jniinput" +%typemap(javadirectorout) string_view "$javacall" + +%typemap(in) string_view +%{ if(!$input) { + SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string"); + return $null; + } + const char *$1_pstr = jenv->GetStringUTFChars($input, 0); + if (!$1_pstr) return $null; + $1 = std::string_view($1_pstr); %} + +/* std::string_view requires the string data to remain valid while the + * string_view is in use. */ +%typemap(freearg) string_view +%{ jenv->ReleaseStringUTFChars($input, $1_pstr); %} + +%typemap(directorout,warning=SWIGWARN_TYPEMAP_THREAD_UNSAFE_MSG) string_view +%{ if(!$input) { + if (!jenv->ExceptionCheck()) { + SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string"); + } + return $null; + } + const char *$1_pstr = jenv->GetStringUTFChars($input, 0); + if (!$1_pstr) return $null; + /* possible thread/reentrant code problem */ + thread_local std::string $1_str; + $1_str = $1_pstr; + $result = std::string_view($1_str); + jenv->ReleaseStringUTFChars($input, $1_pstr); %} + +/* std::string_view::data() isn't zero-byte terminated, but NewStringUTF() + * requires a zero byte so it seems we have to make a copy (ick). The + * cleanest way to do that seems to be via a temporary std::string. + */ +%typemap(directorin,descriptor="Ljava/lang/String;") string_view +%{ $input = jenv->NewStringUTF(std::string($1).c_str()); + Swig::LocalRefGuard $1_refguard(jenv, $input); %} + +%typemap(out) string_view +%{ $result = jenv->NewStringUTF(std::string($1).c_str()); %} + +%typemap(javain) string_view "$javainput" + +%typemap(javaout) string_view { + return $jnicall; + } + +%typemap(typecheck) string_view = char *; + +%typemap(throws) string_view +%{ SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, std::string($1).c_str()); + return $null; %} + +// const string_view & +%typemap(jni) const string_view & "jstring" +%typemap(jtype) const string_view & "String" +%typemap(jstype) const string_view & "String" +%typemap(javadirectorin) const string_view & "$jniinput" +%typemap(javadirectorout) const string_view & "$javacall" + +%typemap(in) const string_view & +%{ if(!$input) { + SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string"); + return $null; + } + const char *$1_pstr = jenv->GetStringUTFChars($input, 0); + if (!$1_pstr) return $null; + $*1_ltype $1_str($1_pstr); + $1 = &$1_str; %} + +/* std::string_view requires the string data to remain valid while the + * string_view is in use. */ +%typemap(freearg) const string_view & +%{ jenv->ReleaseStringUTFChars($input, $1_pstr); %} + +%typemap(directorout,warning=SWIGWARN_TYPEMAP_THREAD_UNSAFE_MSG) const string_view & +%{ if(!$input) { + SWIG_JavaThrowException(jenv, SWIG_JavaNullPointerException, "null string"); + return $null; + } + const char *$1_pstr = jenv->GetStringUTFChars($input, 0); + if (!$1_pstr) return $null; + /* possible thread/reentrant code problem */ + thread_local std::string $1_str; + $1_str = $1_pstr; + thread_local $*1_ltype $1_strview; + $1_strview = $1_str; + $result = &$1_strview; + jenv->ReleaseStringUTFChars($input, $1_pstr); %} + +%typemap(directorin,descriptor="Ljava/lang/String;") const string_view & +%{ $input = jenv->NewStringUTF(std::string($1).c_str()); + Swig::LocalRefGuard $1_refguard(jenv, $input); %} + +%typemap(out) const string_view & +%{ $result = jenv->NewStringUTF(std::string(*$1).c_str()); %} + +%typemap(javain) const string_view & "$javainput" + +%typemap(javaout) const string_view & { + return $jnicall; + } + +%typemap(typecheck) const string_view & = char *; + +%typemap(throws) const string_view & +%{ SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException, std::string($1).c_str()); + return $null; %} + +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a5850d1c..31f758d0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,7 @@ add_executable(unittests libcdoc_boost.cpp ../cdoc/Crypto.cpp + ../cdoc/Tar.cpp ) target_link_libraries(unittests diff --git a/test/libcdoc_boost.cpp b/test/libcdoc_boost.cpp index 8573e4f6..3ed35c89 100644 --- a/test/libcdoc_boost.cpp +++ b/test/libcdoc_boost.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -312,6 +313,7 @@ decrypt(const std::vector& files, const std::string& container, con libcdoc::RcptInfo rcpt {.type=libcdoc::RcptInfo::LOCK, .secret=key, .lock_idx=idx}; decrypt(files, container, dir, rcpt, remove); } + static int unicode_to_utf8 (unsigned int uval, uint8_t *d, uint64_t size) { @@ -391,7 +393,7 @@ BOOST_FIXTURE_TEST_CASE_WITH_DECOR(CDoc2EncryptErrors, EncryptFixture, BOOST_TEST(wrt->finishEncryption() == libcdoc::WORKFLOW_ERROR); // Add recipient - libcdoc::Recipient rcpt = libcdoc::Recipient::makeSymmetric("test-recipient", 65536); + libcdoc::Recipient rcpt = libcdoc::Recipient::makeSymmetric("test-recipient", 600000); BOOST_TEST(wrt->addRecipient(rcpt) == libcdoc::OK); // Encryption cannot proceed before beginEncryption is called BOOST_TEST(wrt->addFile("testfile", 1024) == libcdoc::WORKFLOW_ERROR); @@ -660,7 +662,7 @@ BOOST_FIXTURE_TEST_CASE_WITH_DECOR(EncryptWithPasswordAndLabel, FixtureBase, * u // Create writer libcdoc::CDocWriter *writer = libcdoc::CDocWriter::createWriter(2, &pipec, false, nullptr, &pcrypto, nullptr); BOOST_TEST(writer != nullptr); - libcdoc::Recipient rcpt = libcdoc::Recipient::makeSymmetric("test", 65536); + libcdoc::Recipient rcpt = libcdoc::Recipient::makeSymmetric("test", 600000); BOOST_TEST(writer->addRecipient(rcpt) == libcdoc::OK); BOOST_TEST(writer->beginEncryption() == libcdoc::OK); @@ -859,6 +861,120 @@ BOOST_FIXTURE_TEST_CASE(NonAsciiFilename, PaxFixture) BOOST_TEST(fs::exists(outDir / namePath)); } +// Build a single 512-byte ustar header block with the given typeflag, +// name and declared size. The checksum is computed correctly so the +// header passes Header::verify(). Returns a 512-byte vector. +static std::vector +makeTarHeader(char typeflag, std::string_view name, int64_t size) +{ + std::vector block(512, 0); + + // name (100 bytes, NUL-terminated within the field) + std::copy(name.begin(), + name.begin() + std::min(name.size(), 99), + block.begin()); + + // mode "0000600\0", uid "0000000\0", gid "0000000\0" + auto write_octal_field = [&](size_t offset, size_t width, int64_t value) { + std::string s(width - 1, '0'); + for (size_t i = 0; i < width - 1 && value > 0; ++i) { + s[width - 2 - i] = char('0' + (value & 7)); + value >>= 3; + } + std::copy(s.begin(), s.end(), block.begin() + offset); + // trailing NUL is already zero-filled + }; + write_octal_field(100, 8, 0600); // mode + write_octal_field(108, 8, 0); // uid + write_octal_field(116, 8, 0); // gid + write_octal_field(124, 12, size); // size <-- attacker-tamperable + write_octal_field(136, 12, 0); // mtime + + // chksum field: 8 spaces during checksum calculation + std::fill(block.begin() + 148, block.begin() + 156, uint8_t(' ')); + + // typeflag + block[156] = uint8_t(typeflag); + + // ustar magic + version + constexpr std::string_view magic{"ustar\0", 6}; + std::copy(magic.begin(), magic.end(), block.begin() + 257); + block[263] = '0'; + block[264] = '0'; + + // Compute and write the checksum: unsigned sum of all bytes with + // chksum replaced by spaces. Field is 6 octal digits + NUL + space. + int64_t sum = 0; + for (uint8_t b : block) sum += b; + std::string chk(7, '0'); + for (size_t i = 0; i < 6 && sum > 0; ++i) { + chk[5 - i] = char('0' + (sum & 7)); + sum >>= 3; + } + chk[6] = '\0'; + std::copy(chk.begin(), chk.end(), block.begin() + 148); + block[155] = ' '; + + return block; +} + +BOOST_AUTO_TEST_CASE(RejectsOversizedPaxExtendedHeader) +{ + // Craft a valid 'x' (extended PAX) header that declares a 100 MiB + // payload. The traditional ustar size field is 12 bytes (11 octal + // digits + NUL), capping the directly-encoded size at ~8 GiB minus + // one; we pick a value comfortably below that ceiling but still + // many orders of magnitude above the 64 KiB cap on auxiliary + // headers. Without H-2 in place, TarSource::readPaxHeader would + // happily allocate 100 MiB and try to read 100 MiB from the stream + // - times every malicious 'x' header, which is the DoS the cap + // exists to prevent. + constexpr int64_t kBadSize = 100LL * 1024 * 1024; + std::vector stream = makeTarHeader('x', "PaxHeaders/x", kBadSize); + + libcdoc::VectorSource src(stream); + libcdoc::TarSource tar_src(&src, /*take_ownership=*/false); + std::string name; + int64_t size = 0; + libcdoc::result_t rv = tar_src.next(name, size); + + BOOST_CHECK_EQUAL(rv, libcdoc::DATA_FORMAT_ERROR); + BOOST_CHECK(tar_src.isError()); +} + +BOOST_AUTO_TEST_CASE(RejectsOversizedGlobalPaxHeader) +{ + // Same defence on the 'g' (global PAX) skip path. next() must reject + // the header without spinning the upstream source through 100 MiB. + constexpr int64_t kBadSize = 100LL * 1024 * 1024; + std::vector stream = makeTarHeader('g', "PaxHeaders/g", kBadSize); + + libcdoc::VectorSource src(stream); + libcdoc::TarSource tar_src(&src, /*take_ownership=*/false); + std::string name; + int64_t size = 0; + libcdoc::result_t rv = tar_src.next(name, size); + + BOOST_CHECK_EQUAL(rv, libcdoc::DATA_FORMAT_ERROR); + BOOST_CHECK(tar_src.isError()); +} + +BOOST_AUTO_TEST_CASE(AllowsReasonablePaxHeaderSize) +{ + // Sanity check: a PAX header with a small, plausible size (one + // 'path' record for a 50-byte name) must still parse. We do not + // include the actual data in the stream, so readPaxHeader will + // surface INPUT_STREAM_ERROR after the cap check passes - the + // important thing is that DATA_FORMAT_ERROR is NOT returned. + std::vector stream = makeTarHeader('x', "PaxHeaders/x", 60); + libcdoc::VectorSource src(stream); + libcdoc::TarSource tar_src(&src, /*take_ownership=*/false); + std::string name; + int64_t size = 0; + libcdoc::result_t rv = tar_src.next(name, size); + BOOST_CHECK_NE(rv, libcdoc::DATA_FORMAT_ERROR); +} + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE(StreamingDecryption) @@ -907,3 +1023,263 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(constructor, Buf, BufTypes) } BOOST_AUTO_TEST_SUITE_END() + +// Regression coverage for libcdoc::sanitiseExtractedFilename(). All inputs +// here come from attacker-controlled archive headers (tar / DDoc); the +// helper is the single chokepoint that decides whether an entry can ever +// reach the filesystem. +BOOST_AUTO_TEST_SUITE(SanitiseExtractedFilename) + +BOOST_AUTO_TEST_CASE(PassesThroughOrdinaryNames) +{ + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("hello.txt"), "hello.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a-b_c.dat"), "a-b_c.dat"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("file with spaces.txt"), + "file with spaces.txt"); + // Non-ASCII (UTF-8) names must round-trip - libcdoc treats names as + // opaque UTF-8. + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("\xC3\xB5\xC3\xA4\xC3\xB6.txt"), + "\xC3\xB5\xC3\xA4\xC3\xB6.txt"); +} + +BOOST_AUTO_TEST_CASE(StripsLeadingDirectoryComponents) +{ + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a/b/c.txt"), "c.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a\\b\\c.txt"), "c.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("/etc/passwd"), "passwd"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("../foo.txt"), "foo.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a/../foo.txt"), "foo.txt"); +} + +BOOST_AUTO_TEST_CASE(RejectsTraversalAndEmpty) +{ + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(""), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("."), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(".."), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("../"), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("..\\"), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("foo/.."), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("/"), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a/b/"), ""); +} + +BOOST_AUTO_TEST_CASE(StripsWindowsDriveRelativeNames) +{ + // "C:foo" with NO slash is a drive-relative path on Windows. On POSIX + // it would normally pass through, but libcdoc applies the same filter + // on every platform so a malicious archive cannot rely on platform- + // specific quirks. + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("C:foo.txt"), "foo.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("z:bar"), "bar"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("C:"), ""); + // After a slash strip, the drive prefix on the leaf is also handled. + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a/C:foo"), "foo"); +} + +BOOST_AUTO_TEST_CASE(RejectsControlCharsAndNul) +{ + // Embedded NUL is a Windows API truncation hazard. + std::string with_nul("foo\0bar.txt", 11); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(with_nul), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(std::string("a\x01" "b")), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(std::string("a\x1F" "b")), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(std::string("a\nb")), ""); + // Tab is allowed (whitespace, not a control character that breaks + // filesystems on the platforms libcdoc supports). + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("a\tb"), "a\tb"); +} + +BOOST_AUTO_TEST_CASE(TrimsTrailingDotsAndSpaces) +{ + // Windows silently strips trailing dots/spaces when creating files, + // so "evil.exe " and "evil.exe." both resolve to "evil.exe". Strip + // them before composing the path so we can't be tricked into + // colliding with a legitimate name. + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("foo.txt..."), "foo.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("foo.txt "), "foo.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("foo.txt . . "), "foo.txt"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("..."), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(" "), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(" hello "), "hello"); +} + +BOOST_AUTO_TEST_CASE(RejectsReservedWindowsDeviceNames) +{ + // On Windows these are device handles regardless of working + // directory. They would not actually create a file at base/CON, but + // would open the console device and any subsequent write goes there. + for (auto name : {"CON", "PRN", "AUX", "NUL", + "com1", "Com2", "LPT1", "lpt9"}) { + BOOST_TEST_INFO("name=" << name); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename(name), ""); + } + // Reserved name with extension is also reserved on Windows. + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("CON.txt"), ""); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("nul.tar.gz"), ""); + // Names that *contain* a reserved word as a substring are NOT + // reserved (e.g. "console.log"). + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("console.log"), "console.log"); + BOOST_CHECK_EQUAL(libcdoc::sanitiseExtractedFilename("nullable"), "nullable"); +} + +BOOST_AUTO_TEST_CASE(TruncatesOverlongNames) +{ + std::string long_stem(300, 'a'); + auto result = libcdoc::sanitiseExtractedFilename(long_stem + ".dat"); + BOOST_CHECK_LE(result.size(), 255u); + BOOST_CHECK(result.ends_with(".dat")); // extension preserved + // No-extension version simply truncates. + auto truncated = libcdoc::sanitiseExtractedFilename(std::string(400, 'b')); + BOOST_CHECK_EQUAL(truncated.size(), 255u); +} + +BOOST_AUTO_TEST_SUITE_END() + +// Coverage for libcdoc::Cleanser, the RAII guard used by CDoc2Reader::getFMK +// and CDoc2Writer::buildHeader to wipe short-lived KEK / FMK material on +// every exit including exceptions. +BOOST_AUTO_TEST_SUITE(CleanserGuard) + +BOOST_AUTO_TEST_CASE(WipesVectorOnScopeExit) +{ + std::vector secret(32, 0xAA); + { + libcdoc::Cleanser guard(secret); + BOOST_CHECK_EQUAL(secret.front(), 0xAA); // not yet wiped + } + // After the scope exits the destructor runs OPENSSL_cleanse on the + // current allocation; the vector keeps its size but every byte is 0. + BOOST_CHECK_EQUAL(secret.size(), 32u); + for (uint8_t b : secret) + BOOST_CHECK_EQUAL(b, 0u); +} + +BOOST_AUTO_TEST_CASE(WipesArrayOnScopeExit) +{ + std::array secret{}; + secret.fill(0x55); + { + libcdoc::Cleanser guard(secret); + } + for (uint8_t b : secret) + BOOST_CHECK_EQUAL(b, 0u); +} + +BOOST_AUTO_TEST_CASE(WipesOnException) +{ + // The whole point of the RAII guard: on an exception thrown out of + // the protected scope, the destructor still fires and the secret is + // wiped before the exception unwinds past the caller. This is the + // failure mode where the audit found the missing cleanses in + // CDoc2Reader::getFMK. + std::vector secret(8, 0xCC); + auto throws = [&]{ + libcdoc::Cleanser guard(secret); + throw std::runtime_error("boom"); + }; + BOOST_CHECK_THROW(throws(), std::runtime_error); + for (uint8_t b : secret) + BOOST_CHECK_EQUAL(b, 0u); +} + +BOOST_AUTO_TEST_CASE(EmptyVectorIsHarmless) +{ + // Edge case: cleanse() short-circuits on an empty container. The + // guard must not crash or call OPENSSL_cleanse with a null pointer. + std::vector empty; + { + libcdoc::Cleanser guard(empty); + } + BOOST_CHECK(empty.empty()); +} + +BOOST_AUTO_TEST_SUITE_END() + +// Coverage for libcdoc::parseEtsiRecipientId. The helper is the input- +// validation chokepoint for the Mobile-ID / Smart-ID code paths; +// signMID in particular previously called rcpt_id.substr(11, 11) +// without checking the input, which threw std::out_of_range on short +// ids and silently truncated medium-length ones. +BOOST_AUTO_TEST_SUITE(EtsiRecipientIdParsing) + +BOOST_AUTO_TEST_CASE(AcceptsCanonicalEstonian) +{ + auto p = libcdoc::parseEtsiRecipientId("etsi/PNOEE-30303039914"); + BOOST_TEST_REQUIRE(p.valid()); + BOOST_CHECK_EQUAL(p.country, "EE"); + BOOST_CHECK_EQUAL(p.national_id, "30303039914"); +} + +BOOST_AUTO_TEST_CASE(AcceptsOtherCountryCodes) +{ + // The PNO format is shared across SK markets; all that matters is + // that the country code is two ASCII letters. + auto p = libcdoc::parseEtsiRecipientId("etsi/PNOLT-12345678901"); + BOOST_TEST_REQUIRE(p.valid()); + BOOST_CHECK_EQUAL(p.country, "LT"); + BOOST_CHECK_EQUAL(p.national_id, "12345678901"); +} + +BOOST_AUTO_TEST_CASE(NormalisesCountryToUpperCase) +{ + auto p = libcdoc::parseEtsiRecipientId("etsi/PNOee-30303039914"); + BOOST_TEST_REQUIRE(p.valid()); + BOOST_CHECK_EQUAL(p.country, "EE"); +} + +BOOST_AUTO_TEST_CASE(RejectsShortInput) +{ + // The previous implementation in signMID threw std::out_of_range + // for any input shorter than 11 characters. The helper must reject + // these cleanly with .valid() == false. + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNO").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEE").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEE-").valid()); + // 11 characters but not the right shape. + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/short!").valid()); +} + +BOOST_AUTO_TEST_CASE(RejectsBadPrefix) +{ + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("ETSI/PNOEE-30303039914").valid()); // case-sensitive prefix + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/IDEE-30303039914").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("foo/PNOEE-30303039914").valid()); +} + +BOOST_AUTO_TEST_CASE(RejectsNonLetterCountryCode) +{ + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNO12-30303039914").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNO-E-30303039914").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOE -30303039914").valid()); +} + +BOOST_AUTO_TEST_CASE(RejectsMissingSeparator) +{ + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEE.30303039914").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEE/30303039914").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEEX0303039914").valid()); +} + +BOOST_AUTO_TEST_CASE(RejectsNonDigitNationalId) +{ + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEE-30303039 14").valid()); + BOOST_CHECK(!libcdoc::parseEtsiRecipientId("etsi/PNOEE-3030303991a").valid()); + // Embedded NUL. + BOOST_CHECK(!libcdoc::parseEtsiRecipientId(std::string("etsi/PNOEE-3030\0039914", 22)).valid()); +} + +BOOST_AUTO_TEST_CASE(RejectsOversizedNationalId) +{ + // 32-byte national id is the documented upper bound; one byte more + // is rejected. + auto p32 = libcdoc::parseEtsiRecipientId("etsi/PNOEE-" + std::string(32, '1')); + BOOST_CHECK(p32.valid()); + auto p33 = libcdoc::parseEtsiRecipientId("etsi/PNOEE-" + std::string(33, '1')); + BOOST_CHECK(!p33.valid()); + auto pHuge = libcdoc::parseEtsiRecipientId("etsi/PNOEE-" + std::string(1024, '1')); + BOOST_CHECK(!pHuge.valid()); +} + +BOOST_AUTO_TEST_SUITE_END()