Use custom utf8/16 conversion instead of iconv

We can avoid the additional dependency by using few functions from systemd.
2025-12-05 16:00:05 +01:00 · 2021-12-31 13:56:13 +01:00
parent e12ce642a1
commit 6e47fb6d85
7 changed files with 322 additions and 79 deletions
--- a/configure.ac
+++ b/configure.ac
@@ -34,7 +34,6 @@ AC_PROG_MKDIR_P
 AC_ENABLE_STATIC(no)
 LT_INIT
 PKG_PROG_PKG_CONFIG
 AM_ICONV
 dnl ==========================================================================
 dnl define PKG_CHECK_VAR for old pkg-config <= 0.28
--- a/lib/Makemodule.am
+++ b/lib/Makemodule.am
@@ -33,7 +33,6 @@ libcryptsetup_la_LIBADD = \
 	@JSON_C_LIBS@		\
 	@BLKID_LIBS@		\
 	@DL_LIBS@		\
 	$(LTLIBICONV)		\
 	$(LTLIBINTL)		\
 	libcrypto_backend.la	\
 	libutils_io.la
--- a/lib/bitlk/bitlk.c
+++ b/lib/bitlk/bitlk.c
@@ -24,7 +24,6 @@
 #include <string.h>
 #include <uuid/uuid.h>
 #include <time.h>
 #include <iconv.h>
 #include <limits.h>
 #include "bitlk.h"
@@ -247,73 +246,6 @@ static uint64_t filetime_to_unixtime(uint64_t time)
 	return (time - EPOCH_AS_FILETIME) / HUNDREDS_OF_NANOSECONDS;
 }
 static int convert_to_utf8(struct crypt_device *cd, uint8_t *input, size_t inlen, char **out)
 {
 	char *outbuf = NULL;
 	iconv_t ic;
 	size_t ic_inlen = inlen;
 	size_t ic_outlen = inlen;
 	char *ic_outbuf = NULL;
 	size_t r = 0;
 	outbuf = malloc(inlen);
 	if (outbuf == NULL)
 		return -ENOMEM;
 	memset(outbuf, 0, inlen);
 	ic_outbuf = outbuf;
 	ic = iconv_open("UTF-8", "UTF-16LE");
 	r = iconv(ic, (char **) &input, &ic_inlen, &ic_outbuf, &ic_outlen);
 	iconv_close(ic);
 	if (r == 0)
 		*out = strdup(outbuf);
 	else {
 		*out = NULL;
 		log_dbg(cd, "Failed to convert volume description: %s", strerror(errno));
 		r = 0;
 	}
 	free(outbuf);
 	return r;
 }
 static int passphrase_to_utf16(struct crypt_device *cd, char *input, size_t inlen, char **out)
 {
 	char *outbuf = NULL;
 	iconv_t ic;
 	size_t ic_inlen = inlen;
 	size_t ic_outlen = inlen * 2;
 	char *ic_outbuf = NULL;
 	size_t r = 0;
 	if (inlen == 0)
 		return r;
 	outbuf = crypt_safe_alloc(inlen * 2);
 	if (outbuf == NULL)
 		return -ENOMEM;
 	memset(outbuf, 0, inlen * 2);
 	ic_outbuf = outbuf;
 	ic = iconv_open("UTF-16LE", "UTF-8");
 	r = iconv(ic, &input, &ic_inlen, &ic_outbuf, &ic_outlen);
 	iconv_close(ic);
 	if (r == 0) {
 		*out = outbuf;
 	} else {
 		*out = NULL;
 		crypt_safe_free(outbuf);
 		log_dbg(cd, "Failed to convert passphrase: %s", strerror(errno));
 		r = -errno;
 	}
 	return r;
 }
 static int parse_vmk_entry(struct crypt_device *cd, uint8_t *data, int start, int end, struct bitlk_vmk **vmk)
 {
 	uint16_t key_entry_size = 0;
@@ -324,6 +256,7 @@ static int parse_vmk_entry(struct crypt_device *cd, uint8_t *data, int start, in
 	const char *key = NULL;
 	struct volume_key *vk = NULL;
 	bool supported = false;
 	int r = 0;
 	/* only passphrase or recovery passphrase vmks are supported (can be used to activate) */
 	supported = (*vmk)->protection == BITLK_PROTECTION_PASSPHRASE ||
@@ -393,9 +326,13 @@ static int parse_vmk_entry(struct crypt_device *cd, uint8_t *data, int start, in
 		} else if (key_entry_value == BITLK_ENTRY_VALUE_RECOVERY_TIME) {
 			;
 		} else if (key_entry_value == BITLK_ENTRY_VALUE_STRING) {
-			if (convert_to_utf8(cd, data + start + BITLK_ENTRY_HEADER_LEN, key_entry_size - BITLK_ENTRY_HEADER_LEN, &string) < 0) {
+			string = malloc((key_entry_size - BITLK_ENTRY_HEADER_LEN) * 2 + 1);
 			if (!string)
 				return -ENOMEM;
 			r = crypt_utf16_to_utf8(&string, (const char16_t *) (data + start + BITLK_ENTRY_HEADER_LEN),
 						     key_entry_size - BITLK_ENTRY_HEADER_LEN);
 			if (r < 0 || !string) {
 				log_err(cd, _("Invalid string found when parsing Volume Master Key."));
 				free(string);
 				return -EINVAL;
 			} else if ((*vmk)->name != NULL) {
 				if (supported) {
@@ -486,6 +423,7 @@ int BITLK_read_sb(struct crypt_device *cd, struct bitlk_metadata *params)
 	int end = 0;
 	size_t key_size = 0;
 	const char *key = NULL;
 	char *description = NULL;
 	struct bitlk_vmk *vmk = NULL;
 	struct bitlk_vmk *vmk_p = params->vmks;
@@ -738,13 +676,17 @@ int BITLK_read_sb(struct crypt_device *cd, struct bitlk_metadata *params)
 			params->volume_header_size = le64_to_cpu(entry_header.size);
 		/* volume description (utf-16 string) */
 		} else if (entry_type == BITLK_ENTRY_TYPE_DESCRIPTION) {
-			r = convert_to_utf8(cd, fve_entries + start + BITLK_ENTRY_HEADER_LEN,
+			description = malloc((entry_size - BITLK_ENTRY_HEADER_LEN - BITLK_ENTRY_HEADER_LEN) * 2 + 1);
-					    entry_size - BITLK_ENTRY_HEADER_LEN,
+			if (!description)
-					    &(params->description));
+				return -ENOMEM;
-			if (r < 0) {
+			r = crypt_utf16_to_utf8(&description, (const char16_t *) (fve_entries + start + BITLK_ENTRY_HEADER_LEN),
 					                  entry_size - BITLK_ENTRY_HEADER_LEN);
 			if (r < 0 || !description) {
 				BITLK_bitlk_vmk_free(vmk);
 				log_err(cd, _("Failed to convert BITLK volume description"));
 				goto out;
 			}
 			params->description = description;
 		}
 		start += entry_size;
@@ -1008,7 +950,7 @@ static int bitlk_kdf(struct crypt_device *cd,
 	struct bitlk_kdf_data kdf = {};
 	struct crypt_hash *hd = NULL;
 	int len = 0;
-	char *utf16Password = NULL;
+	char16_t *utf16Password = NULL;
 	int i = 0;
 	int r = 0;
@@ -1025,11 +967,12 @@ static int bitlk_kdf(struct crypt_device *cd,
 	if (!recovery) {
 		/* passphrase: convert to UTF-16 first, then sha256(sha256(pw)) */
-		r = passphrase_to_utf16(cd, CONST_CAST(char*)password, passwordLen, &utf16Password);
+		utf16Password = crypt_safe_alloc(sizeof(char16_t) * passwordLen + 1);
 		r = crypt_utf8_to_utf16(&utf16Password, CONST_CAST(char*)password, passwordLen);
 		if (r < 0)
 			goto out;
-		crypt_hash_write(hd, utf16Password, passwordLen * 2);
+		crypt_hash_write(hd, (char*)utf16Password, passwordLen * 2);
 		r = crypt_hash_final(hd, kdf.initial_sha256, len);
 		if (r < 0)
 			goto out;
--- a/lib/crypto_backend/Makemodule.am
+++ b/lib/crypto_backend/Makemodule.am
@@ -10,6 +10,7 @@ libcrypto_backend_la_SOURCES = \
 	lib/crypto_backend/pbkdf_check.c \
 	lib/crypto_backend/crc32.c \
 	lib/crypto_backend/base64.c \
 	lib/crypto_backend/utf8.c \
 	lib/crypto_backend/argon2_generic.c \
 	lib/crypto_backend/cipher_generic.c \
 	lib/crypto_backend/cipher_check.c
--- a/lib/crypto_backend/crypto_backend.h
+++ b/lib/crypto_backend/crypto_backend.h
@@ -25,6 +25,7 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 #include <uchar.h>
 struct crypt_hash;
 struct crypt_hmac;
@@ -87,6 +88,10 @@ uint32_t crypt_crc32(uint32_t seed, const unsigned char *buf, size_t len);
 int crypt_base64_encode(char **out, size_t *out_length, const char *in, size_t in_length);
 int crypt_base64_decode(char **out, size_t *out_length, const char *in, size_t in_length);
 /* UTF8/16 */
 int crypt_utf16_to_utf8(char **out, const char16_t *s, size_t length /* bytes! */);
 int crypt_utf8_to_utf16(char16_t **out, const char *s, size_t length);
 /* Block ciphers */
 int crypt_cipher_ivsize(const char *name, const char *mode);
 int crypt_cipher_wrapped_key(const char *name, const char *mode);
--- a/lib/crypto_backend/utf8.c
+++ b/lib/crypto_backend/utf8.c
@@ -0,0 +1,292 @@
 /*
 * UTF8/16 helpers, copied and adapted from systemd project.
 *
 * Copyright (C) 2010 Lennart Poettering
 *
 * cryptsetup related changes
 * Copyright (C) 2021 Vojtech Trefny
 * Parts of the original systemd implementation are based on the GLIB utf8
 * validation functions.
 * gutf8.c - Operations on UTF-8 strings.
 *
 * Copyright (C) 1999 Tom Tromey
 * Copyright (C) 2000 Red Hat, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include <assert.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <uchar.h>
 #include <unistd.h>
 #include "crypto_backend.h"
 #include "internal.h"
 static inline bool utf16_is_surrogate(char16_t c)
 {
 	return c >= 0xd800U && c <= 0xdfffU;
 }
 static inline bool utf16_is_trailing_surrogate(char16_t c)
 {
 	return c >= 0xdc00U && c <= 0xdfffU;
 }
 static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail)
 {
 	return ((((char32_t) lead - 0xd800U) << 10) + ((char32_t) trail - 0xdc00U) + 0x10000U);
 }
 /**
 * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8
 * @out_utf8: output buffer of at least 4 bytes or NULL
 * @g: UCS-4 character to encode
 *
 * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
 * The length of the character is returned. It is not zero-terminated! If the
 * output buffer is NULL, only the length is returned.
 *
 * Returns: The length in bytes that the UTF-8 representation does or would
 *          occupy.
 */
 static size_t utf8_encode_unichar(char *out_utf8, char32_t g)
 {
 	if (g < (1 << 7)) {
 		if (out_utf8)
 			out_utf8[0] = g & 0x7f;
 		return 1;
 	} else if (g < (1 << 11)) {
 		if (out_utf8) {
 			out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
 			out_utf8[1] = 0x80 | (g & 0x3f);
 		}
 		return 2;
 	} else if (g < (1 << 16)) {
 		if (out_utf8) {
 			out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
 			out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
 			out_utf8[2] = 0x80 | (g & 0x3f);
 		}
 		return 3;
 	} else if (g < (1 << 21)) {
 		if (out_utf8) {
 			out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
 			out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
 			out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
 			out_utf8[3] = 0x80 | (g & 0x3f);
 		}
 		return 4;
 	}
 	return 0;
 }
 /**
 * crypt_utf16_to_utf8()
 * @out: output buffer, should be 2 * @length + 1 long
 * @s: string to convert
 * @length: length of @s in bytes
 *
 * Converts a UTF16LE encoded string to a UTF8 encoded string.
 *
 * Returns: 0 on success, negative errno otherwise
 */
 int crypt_utf16_to_utf8(char **out, const char16_t *s, size_t length /* bytes! */)
 {
 	const uint8_t *f;
 	char *t;
 	assert(s);
 	/* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
 	 * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
 	if (length * 2 < length)
 		return -EOVERFLOW; /* overflow */
 	f = (const uint8_t*) s;
 	t = *out;
 	while (f + 1 < (const uint8_t*) s + length) {
 		char16_t w1, w2;
 		/* see RFC 2781 section 2.2 */
 		w1 = f[1] << 8 | f[0];
 		f += 2;
 		if (!utf16_is_surrogate(w1)) {
 			t += utf8_encode_unichar(t, w1);
 			continue;
 		}
 		if (utf16_is_trailing_surrogate(w1))
 			continue; /* spurious trailing surrogate, ignore */
 		if (f + 1 >= (const uint8_t*) s + length)
 			break;
 		w2 = f[1] << 8 | f[0];
 		f += 2;
 		if (!utf16_is_trailing_surrogate(w2)) {
 			f -= 2;
 			continue; /* surrogate missing its trailing surrogate, ignore */
 		}
 		t += utf8_encode_unichar(t, utf16_surrogate_pair_to_unichar(w1, w2));
 	}
 	*t = 0;
 	return 0;
 }
 /* count of characters used to encode one unicode char */
 static size_t utf8_encoded_expected_len(uint8_t c)
 {
 	if (c < 0x80)
 		return 1;
 	if ((c & 0xe0) == 0xc0)
 		return 2;
 	if ((c & 0xf0) == 0xe0)
 		return 3;
 	if ((c & 0xf8) == 0xf0)
 		return 4;
 	if ((c & 0xfc) == 0xf8)
 		return 5;
 	if ((c & 0xfe) == 0xfc)
 		return 6;
 	return 0;
 }
 /* decode one unicode char */
 static int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar)
 {
 	char32_t unichar;
 	size_t len;
 	assert(str);
 	len = utf8_encoded_expected_len(str[0]);
 	switch (len) {
 	case 1:
 		*ret_unichar = (char32_t)str[0];
 		return 0;
 	case 2:
 		unichar = str[0] & 0x1f;
 		break;
 	case 3:
 		unichar = (char32_t)str[0] & 0x0f;
 		break;
 	case 4:
 		unichar = (char32_t)str[0] & 0x07;
 		break;
 	case 5:
 		unichar = (char32_t)str[0] & 0x03;
 		break;
 	case 6:
 		unichar = (char32_t)str[0] & 0x01;
 		break;
 	default:
 		return -EINVAL;
 	}
 	for (size_t i = 1; i < len; i++) {
 		if (((char32_t)str[i] & 0xc0) != 0x80)
 			return -EINVAL;
 		unichar <<= 6;
 		unichar |= (char32_t)str[i] & 0x3f;
 	}
 	*ret_unichar = unichar;
 	return 0;
 }
 static size_t utf16_encode_unichar(char16_t *out, char32_t c)
 {
 	/* Note that this encodes as little-endian. */
 	switch (c) {
 	case 0 ... 0xd7ffU:
 	case 0xe000U ... 0xffffU:
 		out[0] = htole16(c);
 		return 1;
 	case 0x10000U ... 0x10ffffU:
 		c -= 0x10000U;
 		out[0] = htole16((c >> 10) + 0xd800U);
 		out[1] = htole16((c & 0x3ffU) + 0xdc00U);
 		return 2;
 	default: /* A surrogate (invalid) */
 		return 0;
 	}
 }
 /**
 * crypt_utf8_to_utf16()
 * @out: output buffer, should be @length + 1 long
 * @s: string to convert
 * @length: length of @s in bytes
 *
 * Converts a UTF8 encoded string to a UTF16LE encoded string.
 *
 * Returns: 0 on success, negative errno otherwise
 */
 int crypt_utf8_to_utf16(char16_t **out, const char *s, size_t length)
 {
 	char16_t *p;
 	int r;
 	assert(s);
 	p = *out;
 	for (size_t i = 0; i < length;) {
 		char32_t unichar;
 		size_t e;
 		e = utf8_encoded_expected_len(s[i]);
 		if (e <= 1) /* Invalid and single byte characters are copied as they are */
 			goto copy;
 		if (i + e > length) /* sequence longer than input buffer, then copy as-is */
 			goto copy;
 		r = utf8_encoded_to_unichar(s + i, &unichar);
 		if (r < 0) /* sequence invalid, then copy as-is */
 			goto copy;
 		p += utf16_encode_unichar(p, unichar);
 		i += e;
 		continue;
 	copy:
 		*(p++) = htole16(s[i++]);
 	}
 	*p = 0;
 	return 0;
 }
--- a/tests/bitlk-compat-test
+++ b/tests/bitlk-compat-test
@@ -51,6 +51,10 @@ function check_dump()
 	# load variables for this image from config file
 	load_vars $file
 	# description
 	dump_desc=$(echo "$dump" | grep Description: | cut -d: -f2 | tr -d "\t\n ")
 	[ "${dump_desc:0:7}" = "DESKTOP" -o  "${dump_desc:0:3}" = "WIN" ] || fail " Description check from dump failed."
 	# GUID
 	dump_guid=$(echo "$dump" | grep Version -A 1 | tail -1 | cut -d: -f2 | tr -d "\t\n ")
 	[ ! -z "$GUID" -a "$dump_guid" = "$GUID"  ] || fail " GUID check from dump failed."