core: crypto: add support for SM4

Adds support for the SM4 cipher [1] using the API defined in the
GlobalPlatform TEE Internal Core API v1.2.

ECB, CBC and CTR modes are implemented. Other modes are valid but are
not included in the GP specification, so they are not considered here.

This implementation is based on code published on Gitlab [2]. The
project contains no licensing terms, so I contacted the author
(goldboar@163.com), asking for permission to re-use the code in OP-TEE
under a BSD-2-Clause license. I received the following reply:

 "[...] If you like you can use it [...]"

I have reworked the source to better fit the OP-TEE coding style. I
have also added the CTR mode of operation.

I do not think we will need to merge any change from upstream in the
future.

[1] https://tools.ietf.org/html/draft-ribose-cfrg-sm4-10
[2] https://gitlab.com/otpfree/sm234

Signed-off-by: Jerome Forissier <jerome@forissier.org>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Acked-by: Etienne Carriere <etienne.carriere@linaro.org>
diff --git a/core/crypto.mk b/core/crypto.mk
index 4e753b3..4a87c19 100644
--- a/core/crypto.mk
+++ b/core/crypto.mk
@@ -9,6 +9,7 @@
 # Ciphers
 CFG_CRYPTO_AES ?= y
 CFG_CRYPTO_DES ?= y
+CFG_CRYPTO_SM4 ?= y
 
 # Cipher block modes
 CFG_CRYPTO_ECB ?= y
diff --git a/core/crypto/crypto.c b/core/crypto/crypto.c
index 07b2a51..1272b1b 100644
--- a/core/crypto/crypto.c
+++ b/core/crypto/crypto.c
@@ -122,6 +122,15 @@
 	case TEE_ALG_DES3_CBC_NOPAD:
 		res = crypto_des3_cbc_alloc_ctx(&c);
 		break;
+	case TEE_ALG_SM4_ECB_NOPAD:
+		res = crypto_sm4_ecb_alloc_ctx(&c);
+		break;
+	case TEE_ALG_SM4_CBC_NOPAD:
+		res = crypto_sm4_cbc_alloc_ctx(&c);
+		break;
+	case TEE_ALG_SM4_CTR:
+		res = crypto_sm4_ctr_alloc_ctx(&c);
+		break;
 	default:
 		return TEE_ERROR_NOT_IMPLEMENTED;
 	}
@@ -192,6 +201,9 @@
 	case TEE_MAIN_ALGO_DES3:
 		*size = TEE_DES_BLOCK_SIZE;
 		return TEE_SUCCESS;
+	case TEE_MAIN_ALGO_SM4:
+		*size = TEE_SM4_BLOCK_SIZE;
+		return TEE_SUCCESS;
 	default:
 		return TEE_ERROR_NOT_SUPPORTED;
 	}
diff --git a/core/crypto/sm4-cbc.c b/core/crypto/sm4-cbc.c
new file mode 100644
index 0000000..4b96a47
--- /dev/null
+++ b/core/crypto/sm4-cbc.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2019 Huawei Technologies Co., Ltd
+ */
+
+#include <assert.h>
+#include <crypto/crypto.h>
+#include <crypto/crypto_impl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string_ext.h>
+#include <tee_api_types.h>
+#include <util.h>
+
+#include "sm4.h"
+
+struct sm4_cbc_ctx {
+	struct crypto_cipher_ctx ctx;
+	struct sm4_context state;
+	uint8_t iv[16];
+};
+
+static const struct crypto_cipher_ops sm4_cbc_ops;
+
+static struct sm4_cbc_ctx *to_sm4_cbc_ctx(struct crypto_cipher_ctx *ctx)
+{
+	assert(ctx && ctx->ops == &sm4_cbc_ops);
+
+	return container_of(ctx, struct sm4_cbc_ctx, ctx);
+}
+
+static TEE_Result sm4_cbc_init(struct crypto_cipher_ctx *ctx,
+			       TEE_OperationMode mode, const uint8_t *key1,
+			       size_t key1_len, const uint8_t *key2 __unused,
+			       size_t key2_len __unused,
+			       const uint8_t *iv, size_t iv_len)
+{
+	struct sm4_cbc_ctx *c = to_sm4_cbc_ctx(ctx);
+
+	if (key1_len != 16 || iv_len != sizeof(c->iv))
+		return TEE_ERROR_BAD_PARAMETERS;
+
+	if (mode == TEE_MODE_ENCRYPT)
+		sm4_setkey_enc(&c->state, key1);
+	else
+		sm4_setkey_dec(&c->state, key1);
+
+	memcpy(c->iv, iv, sizeof(c->iv));
+
+	return TEE_SUCCESS;
+}
+
+static TEE_Result sm4_cbc_update(struct crypto_cipher_ctx *ctx,
+				 bool last_block __unused,
+				 const uint8_t *data, size_t len, uint8_t *dst)
+{
+	struct sm4_cbc_ctx *c = to_sm4_cbc_ctx(ctx);
+
+	sm4_crypt_cbc(&c->state, len, c->iv, data, dst);
+
+	return TEE_SUCCESS;
+}
+
+static void sm4_cbc_final(struct crypto_cipher_ctx *ctx)
+{
+	struct sm4_cbc_ctx *c = to_sm4_cbc_ctx(ctx);
+
+	memzero_explicit(&c->state, sizeof(c->state));
+	memzero_explicit(&c->iv, sizeof(c->iv));
+}
+
+static void sm4_cbc_free_ctx(struct crypto_cipher_ctx *ctx)
+{
+	free(to_sm4_cbc_ctx(ctx));
+}
+
+static void sm4_cbc_copy_state(struct crypto_cipher_ctx *dst_ctx,
+			       struct crypto_cipher_ctx *src_ctx)
+{
+	struct sm4_cbc_ctx *src = to_sm4_cbc_ctx(src_ctx);
+	struct sm4_cbc_ctx *dst = to_sm4_cbc_ctx(dst_ctx);
+
+	dst->state = src->state;
+	memcpy(dst->iv, src->iv, sizeof(src->iv));
+}
+
+static const struct crypto_cipher_ops sm4_cbc_ops = {
+	.init = sm4_cbc_init,
+	.update = sm4_cbc_update,
+	.final = sm4_cbc_final,
+	.free_ctx = sm4_cbc_free_ctx,
+	.copy_state = sm4_cbc_copy_state,
+};
+
+TEE_Result crypto_sm4_cbc_alloc_ctx(struct crypto_cipher_ctx **ctx_ret)
+{
+	struct sm4_cbc_ctx *c = NULL;
+
+	c = calloc(1, sizeof(*c));
+	if (!c)
+		return TEE_ERROR_OUT_OF_MEMORY;
+
+	c->ctx.ops = &sm4_cbc_ops;
+	*ctx_ret = &c->ctx;
+
+	return TEE_SUCCESS;
+}
diff --git a/core/crypto/sm4-ctr.c b/core/crypto/sm4-ctr.c
new file mode 100644
index 0000000..0b17400
--- /dev/null
+++ b/core/crypto/sm4-ctr.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2019 Huawei Technologies Co., Ltd
+ */
+
+#include <assert.h>
+#include <crypto/crypto.h>
+#include <crypto/crypto_impl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string_ext.h>
+#include <tee_api_types.h>
+#include <util.h>
+
+#include "sm4.h"
+
+struct sm4_ctr_ctx {
+	struct crypto_cipher_ctx ctx;
+	struct sm4_context state;
+	uint8_t ctr[16];
+};
+
+static const struct crypto_cipher_ops sm4_ctr_ops;
+
+static struct sm4_ctr_ctx *to_sm4_ctr_ctx(struct crypto_cipher_ctx *ctx)
+{
+	assert(ctx && ctx->ops == &sm4_ctr_ops);
+
+	return container_of(ctx, struct sm4_ctr_ctx, ctx);
+}
+
+static TEE_Result sm4_ctr_init(struct crypto_cipher_ctx *ctx,
+			       TEE_OperationMode mode __unused,
+			       const uint8_t *key1, size_t key1_len,
+			       const uint8_t *key2 __unused,
+			       size_t key2_len __unused,
+			       const uint8_t *iv, size_t iv_len)
+{
+	struct sm4_ctr_ctx *c = to_sm4_ctr_ctx(ctx);
+
+	if (key1_len != 16 || iv_len != sizeof(c->ctr))
+		return TEE_ERROR_BAD_PARAMETERS;
+
+	sm4_setkey_enc(&c->state, key1);
+	memcpy(c->ctr, iv, sizeof(c->ctr));
+
+	return TEE_SUCCESS;
+}
+
+static TEE_Result sm4_ctr_update(struct crypto_cipher_ctx *ctx,
+				 bool last_block __unused,
+				 const uint8_t *data, size_t len, uint8_t *dst)
+{
+	struct sm4_ctr_ctx *c = to_sm4_ctr_ctx(ctx);
+
+	sm4_crypt_ctr(&c->state, len, c->ctr, data, dst);
+
+	return TEE_SUCCESS;
+}
+
+static void sm4_ctr_final(struct crypto_cipher_ctx *ctx)
+{
+	struct sm4_ctr_ctx *c = to_sm4_ctr_ctx(ctx);
+
+	memzero_explicit(&c->state, sizeof(c->state));
+	memzero_explicit(&c->ctr, sizeof(c->ctr));
+}
+
+static void sm4_ctr_free_ctx(struct crypto_cipher_ctx *ctx)
+{
+	free(to_sm4_ctr_ctx(ctx));
+}
+
+static void sm4_ctr_copy_state(struct crypto_cipher_ctx *dst_ctx,
+			       struct crypto_cipher_ctx *src_ctx)
+{
+	struct sm4_ctr_ctx *src = to_sm4_ctr_ctx(src_ctx);
+	struct sm4_ctr_ctx *dst = to_sm4_ctr_ctx(dst_ctx);
+
+	dst->state = src->state;
+	memcpy(dst->ctr, src->ctr, sizeof(src->ctr));
+}
+
+static const struct crypto_cipher_ops sm4_ctr_ops = {
+	.init = sm4_ctr_init,
+	.update = sm4_ctr_update,
+	.final = sm4_ctr_final,
+	.free_ctx = sm4_ctr_free_ctx,
+	.copy_state = sm4_ctr_copy_state,
+};
+
+TEE_Result crypto_sm4_ctr_alloc_ctx(struct crypto_cipher_ctx **ctx_ret)
+{
+	struct sm4_ctr_ctx *c = NULL;
+
+	c = calloc(1, sizeof(*c));
+	if (!c)
+		return TEE_ERROR_OUT_OF_MEMORY;
+
+	c->ctx.ops = &sm4_ctr_ops;
+	*ctx_ret = &c->ctx;
+
+	return TEE_SUCCESS;
+}
diff --git a/core/crypto/sm4-ecb.c b/core/crypto/sm4-ecb.c
new file mode 100644
index 0000000..2cd394d
--- /dev/null
+++ b/core/crypto/sm4-ecb.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2019 Huawei Technologies Co., Ltd
+ */
+
+#include <assert.h>
+#include <crypto/crypto.h>
+#include <crypto/crypto_impl.h>
+#include <stdlib.h>
+#include <string_ext.h>
+#include <tee_api_types.h>
+#include <util.h>
+
+#include "sm4.h"
+
+struct sm4_ecb_ctx {
+	struct crypto_cipher_ctx ctx;
+	struct sm4_context state;
+};
+
+static const struct crypto_cipher_ops sm4_ecb_ops;
+
+static struct sm4_ecb_ctx *to_sm4_ecb_ctx(struct crypto_cipher_ctx *ctx)
+{
+	assert(ctx && ctx->ops == &sm4_ecb_ops);
+
+	return container_of(ctx, struct sm4_ecb_ctx, ctx);
+}
+
+static TEE_Result sm4_ecb_init(struct crypto_cipher_ctx *ctx,
+			       TEE_OperationMode mode, const uint8_t *key1,
+			       size_t key1_len, const uint8_t *key2 __unused,
+			       size_t key2_len __unused,
+			       const uint8_t *iv __unused,
+			       size_t iv_len __unused)
+{
+	struct sm4_ecb_ctx *c = to_sm4_ecb_ctx(ctx);
+
+	if (key1_len != 16)
+		return TEE_ERROR_BAD_STATE;
+
+	if (mode == TEE_MODE_ENCRYPT)
+		sm4_setkey_enc(&c->state, key1);
+	else
+		sm4_setkey_dec(&c->state, key1);
+
+	return TEE_SUCCESS;
+}
+
+static TEE_Result sm4_ecb_update(struct crypto_cipher_ctx *ctx,
+				 bool last_block __unused,
+				 const uint8_t *data, size_t len, uint8_t *dst)
+{
+	struct sm4_ecb_ctx *c = to_sm4_ecb_ctx(ctx);
+
+	sm4_crypt_ecb(&c->state, len, data, dst);
+
+	return TEE_SUCCESS;
+}
+
+static void sm4_ecb_final(struct crypto_cipher_ctx *ctx)
+{
+	struct sm4_ecb_ctx *c = to_sm4_ecb_ctx(ctx);
+
+	memzero_explicit(&c->state, sizeof(c->state));
+}
+
+static void sm4_ecb_free_ctx(struct crypto_cipher_ctx *ctx)
+{
+	free(to_sm4_ecb_ctx(ctx));
+}
+
+static void sm4_ecb_copy_state(struct crypto_cipher_ctx *dst_ctx,
+			       struct crypto_cipher_ctx *src_ctx)
+{
+	struct sm4_ecb_ctx *src = to_sm4_ecb_ctx(src_ctx);
+	struct sm4_ecb_ctx *dst = to_sm4_ecb_ctx(dst_ctx);
+
+	dst->state = src->state;
+}
+
+static const struct crypto_cipher_ops sm4_ecb_ops = {
+	.init = sm4_ecb_init,
+	.update = sm4_ecb_update,
+	.final = sm4_ecb_final,
+	.free_ctx = sm4_ecb_free_ctx,
+	.copy_state = sm4_ecb_copy_state,
+};
+
+TEE_Result crypto_sm4_ecb_alloc_ctx(struct crypto_cipher_ctx **ctx_ret)
+{
+	struct sm4_ecb_ctx *c = NULL;
+
+	c = calloc(1, sizeof(*c));
+	if (!c)
+		return TEE_ERROR_OUT_OF_MEMORY;
+
+	c->ctx.ops = &sm4_ecb_ops;
+	*ctx_ret = &c->ctx;
+
+	return TEE_SUCCESS;
+}
diff --git a/core/crypto/sm4.c b/core/crypto/sm4.c
new file mode 100644
index 0000000..b895660
--- /dev/null
+++ b/core/crypto/sm4.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright Copyright (c) 2019 Huawei Technologies Co., Ltd
+ */
+/*
+ * SM4 Encryption algorithm (SMS4 algorithm)
+ * GM/T 0002-2012 Chinese National Standard ref:http://www.oscca.gov.cn/
+ * thanks to Xyssl
+ * thnaks and refers to http://hi.baidu.com/numax/blog/item/80addfefddfb93e4cf1b3e61.html
+ * author:goldboar
+ * email:goldboar@163.com
+ * 2012-4-20
+ */
+
+#include "sm4.h"
+#include <assert.h>
+#include <string.h>
+
+#define GET_UINT32_BE(n, b, i)				\
+	do {						\
+		(n) = ((uint32_t)(b)[(i)] << 24)     |	\
+		      ((uint32_t)(b)[(i) + 1] << 16) |	\
+		      ((uint32_t)(b)[(i) + 2] <<  8) |	\
+		      ((uint32_t)(b)[(i) + 3]);		\
+	} while (0)
+
+#define PUT_UINT32_BE(n, b, i)				\
+	do {						\
+		(b)[(i)] = (uint8_t)((n) >> 24);	\
+		(b)[(i) + 1] = (uint8_t)((n) >> 16);	\
+		(b)[(i) + 2] = (uint8_t)((n) >>  8);	\
+		(b)[(i) + 3] = (uint8_t)((n));		\
+	} while (0)
+
+#define SHL(x, n)	(((x) & 0xFFFFFFFF) << (n))
+#define ROTL(x, n)	(SHL((x), (n)) | ((x) >> (32 - (n))))
+
+#define SWAP(a, b)	{ uint32_t t = a; a = b; b = t; t = 0; }
+
+/*
+ * Expanded SM4 S-boxes
+ */
+static const uint8_t SboxTable[16][16] =  {
+	{0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+	 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05},
+	{0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+	 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99},
+	{0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+	 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62},
+	{0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+	 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6},
+	{0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+	 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8},
+	{0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+	 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35},
+	{0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+	 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87},
+	{0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+	 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e},
+	{0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+	 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1},
+	{0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+	 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3},
+	{0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+	 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f},
+	{0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+	 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51},
+	{0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+	 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8},
+	{0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+	 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0},
+	{0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+	 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84},
+	{0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+	 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48}
+};
+
+/* System parameter */
+static const uint32_t FK[4] = {
+	0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+/* Fixed parameter */
+static const uint32_t CK[32] = {
+	0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+	0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+	0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+	0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+	0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+	0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+	0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+	0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static uint8_t sm4Sbox(uint8_t inch)
+{
+	uint8_t *tab = (uint8_t *)SboxTable;
+
+	return tab[inch];
+}
+
+static uint32_t sm4Lt(uint32_t ka)
+{
+	uint32_t bb = 0;
+	uint8_t a[4];
+	uint8_t b[4];
+
+	PUT_UINT32_BE(ka, a, 0);
+	b[0] = sm4Sbox(a[0]);
+	b[1] = sm4Sbox(a[1]);
+	b[2] = sm4Sbox(a[2]);
+	b[3] = sm4Sbox(a[3]);
+	GET_UINT32_BE(bb, b, 0);
+
+	return bb ^ ROTL(bb, 2) ^ ROTL(bb, 10) ^ ROTL(bb, 18) ^ ROTL(bb, 24);
+}
+
+static uint32_t sm4F(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
+		     uint32_t rk)
+{
+	return x0 ^ sm4Lt(x1 ^ x2 ^ x3 ^ rk);
+}
+
+static uint32_t sm4CalciRK(uint32_t ka)
+{
+	uint32_t bb = 0;
+	uint8_t a[4];
+	uint8_t b[4];
+
+	PUT_UINT32_BE(ka, a, 0);
+	b[0] = sm4Sbox(a[0]);
+	b[1] = sm4Sbox(a[1]);
+	b[2] = sm4Sbox(a[2]);
+	b[3] = sm4Sbox(a[3]);
+	GET_UINT32_BE(bb, b, 0);
+
+	return bb ^ ROTL(bb, 13) ^ ROTL(bb, 23);
+}
+
+static void sm4_setkey(uint32_t SK[32], const uint8_t key[16])
+{
+	uint32_t MK[4];
+	uint32_t k[36];
+	uint32_t i = 0;
+
+	GET_UINT32_BE(MK[0], key, 0);
+	GET_UINT32_BE(MK[1], key, 4);
+	GET_UINT32_BE(MK[2], key, 8);
+	GET_UINT32_BE(MK[3], key, 12);
+
+	k[0] = MK[0] ^ FK[0];
+	k[1] = MK[1] ^ FK[1];
+	k[2] = MK[2] ^ FK[2];
+	k[3] = MK[3] ^ FK[3];
+
+	for (i = 0; i < 32; i++) {
+		k[i + 4] = k[i] ^ sm4CalciRK(k[i + 1] ^ k[i + 2] ^ k[i + 3] ^
+					     CK[i]);
+		SK[i] = k[i + 4];
+	}
+}
+
+static void sm4_one_round(uint32_t sk[32], const uint8_t input[16],
+			  uint8_t output[16])
+{
+	uint32_t i = 0;
+	uint32_t ulbuf[36];
+
+	memset(ulbuf, 0, sizeof(ulbuf));
+
+	GET_UINT32_BE(ulbuf[0], input, 0);
+	GET_UINT32_BE(ulbuf[1], input, 4);
+	GET_UINT32_BE(ulbuf[2], input, 8);
+	GET_UINT32_BE(ulbuf[3], input, 12);
+
+	for (i = 0; i < 32; i++)
+		ulbuf[i + 4] = sm4F(ulbuf[i], ulbuf[i + 1], ulbuf[i + 2],
+				    ulbuf[i + 3], sk[i]);
+
+	PUT_UINT32_BE(ulbuf[35], output, 0);
+	PUT_UINT32_BE(ulbuf[34], output, 4);
+	PUT_UINT32_BE(ulbuf[33], output, 8);
+	PUT_UINT32_BE(ulbuf[32], output, 12);
+}
+
+void sm4_setkey_enc(struct sm4_context *ctx, const uint8_t key[16])
+{
+	ctx->mode = SM4_ENCRYPT;
+	sm4_setkey(ctx->sk, key);
+}
+
+void sm4_setkey_dec(struct sm4_context *ctx, const uint8_t key[16])
+{
+	int i;
+
+	ctx->mode = SM4_DECRYPT;
+	sm4_setkey(ctx->sk, key);
+
+	for (i = 0; i < 16; i++)
+		SWAP(ctx->sk[i], ctx->sk[31 - i]);
+}
+
+void sm4_crypt_ecb(struct sm4_context *ctx, size_t length, const uint8_t *input,
+		   uint8_t *output)
+{
+	assert(!(length % 16));
+
+	while (length > 0) {
+		sm4_one_round(ctx->sk, input, output);
+		input  += 16;
+		output += 16;
+		length -= 16;
+	}
+}
+
+void sm4_crypt_cbc(struct sm4_context *ctx, size_t length, uint8_t iv[16],
+		   const uint8_t *input, uint8_t *output)
+{
+	int i;
+	uint8_t temp[16];
+
+	assert(!(length % 16));
+
+	if (ctx->mode == SM4_ENCRYPT) {
+		while (length > 0) {
+			for (i = 0; i < 16; i++)
+				output[i] = (uint8_t)(input[i] ^ iv[i]);
+			sm4_one_round(ctx->sk, output, output);
+			memcpy(iv, output, 16);
+			input  += 16;
+			output += 16;
+			length -= 16;
+		}
+	} else {
+		/* SM4_DECRYPT */
+		while (length > 0) {
+			memcpy(temp, input, 16);
+			sm4_one_round(ctx->sk, input, output);
+			for (i = 0; i < 16; i++)
+				output[i] = (uint8_t)(output[i] ^ iv[i]);
+			memcpy(iv, temp, 16);
+			input  += 16;
+			output += 16;
+			length -= 16;
+		}
+	}
+}
+
+void sm4_crypt_ctr(struct sm4_context *ctx, size_t length, uint8_t ctr[16],
+		   const uint8_t *input, uint8_t *output)
+{
+	int i;
+	uint8_t temp[16];
+
+	assert(!(length % 16));
+
+	while (length > 0) {
+		memcpy(temp, ctr, 16);
+		sm4_one_round(ctx->sk, ctr, ctr);
+		for (i = 0; i < 16; i++)
+			output[i] = (uint8_t)(input[i] ^ ctr[i]);
+		memcpy(ctr, temp, 16);
+		for (i = 16; i > 0; i--)
+			if (++ctr[i - 1])
+				break;
+		input  += 16;
+		output += 16;
+		length -= 16;
+	}
+}
diff --git a/core/crypto/sm4.h b/core/crypto/sm4.h
new file mode 100644
index 0000000..73724ba
--- /dev/null
+++ b/core/crypto/sm4.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright (c) 2019 Huawei Technologies Co., Ltd
+ */
+#ifndef CORE_CRYPTO_SM4_H
+#define CORE_CRYPTO_SM4_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define SM4_ENCRYPT	1
+#define SM4_DECRYPT	0
+
+struct sm4_context {
+	int mode;         /* SM4_ENCRYPT/SM4_DECRYPT */
+	uint32_t sk[32];  /* SM4 subkeys */
+};
+
+void sm4_setkey_enc(struct sm4_context *ctx, const uint8_t key[16]);
+void sm4_setkey_dec(struct sm4_context *ctx, const uint8_t key[16]);
+void sm4_crypt_ecb(struct sm4_context *ctx, size_t length, const uint8_t *input,
+		   uint8_t *output);
+void sm4_crypt_cbc(struct sm4_context *ctx, size_t length, uint8_t iv[16],
+		   const uint8_t *input, uint8_t *output);
+void sm4_crypt_ctr(struct sm4_context *ctx, size_t length, uint8_t ctr[16],
+		   const uint8_t *input, uint8_t *output);
+
+#endif /* CORE_CRYPTO_SM4_H */
diff --git a/core/crypto/sub.mk b/core/crypto/sub.mk
index 80f08d8..21b8b6f 100644
--- a/core/crypto/sub.mk
+++ b/core/crypto/sub.mk
@@ -20,3 +20,9 @@
 ifneq ($(CFG_CRYPTO_CTS_FROM_CRYPTOLIB),y)
 srcs-$(CFG_CRYPTO_CTS) += aes-cts.c
 endif
+ifeq ($(CFG_CRYPTO_SM4),y)
+srcs-y += sm4.c
+srcs-$(CFG_CRYPTO_ECB) += sm4-ecb.c
+srcs-$(CFG_CRYPTO_CBC) += sm4-cbc.c
+srcs-$(CFG_CRYPTO_CTR) += sm4-ctr.c
+endif
diff --git a/core/include/crypto/crypto_impl.h b/core/include/crypto/crypto_impl.h
index 26af706..4836efb 100644
--- a/core/include/crypto/crypto_impl.h
+++ b/core/include/crypto/crypto_impl.h
@@ -195,6 +195,24 @@
 CRYPTO_ALLOC_CTX_NOT_IMPLEMENTED(des3_cbc, cipher)
 #endif
 
+#if defined(CFG_CRYPTO_SM4) && defined(CFG_CRYPTO_ECB)
+TEE_Result crypto_sm4_ecb_alloc_ctx(struct crypto_cipher_ctx **ctx);
+#else
+CRYPTO_ALLOC_CTX_NOT_IMPLEMENTED(sm4_ecb, cipher)
+#endif
+
+#if defined(CFG_CRYPTO_SM4) && defined(CFG_CRYPTO_CBC)
+TEE_Result crypto_sm4_cbc_alloc_ctx(struct crypto_cipher_ctx **ctx);
+#else
+CRYPTO_ALLOC_CTX_NOT_IMPLEMENTED(sm4_cbc, cipher)
+#endif
+
+#if defined(CFG_CRYPTO_SM4) && defined(CFG_CRYPTO_CTR)
+TEE_Result crypto_sm4_ctr_alloc_ctx(struct crypto_cipher_ctx **ctx);
+#else
+CRYPTO_ALLOC_CTX_NOT_IMPLEMENTED(sm4_ctr, cipher)
+#endif
+
 /*
  * The crypto context used by the crypto_authen_*() functions below is
  * defined by struct crypto_authenc_ctx.
diff --git a/core/tee/tee_cryp_utl.c b/core/tee/tee_cryp_utl.c
index e12f50b..7a6a3f8 100644
--- a/core/tee/tee_cryp_utl.c
+++ b/core/tee/tee_cryp_utl.c
@@ -119,6 +119,9 @@
 	case TEE_ALG_AES_XTS:
 	case TEE_ALG_AES_CCM:
 	case TEE_ALG_AES_GCM:
+	case TEE_ALG_SM4_ECB_NOPAD:
+	case TEE_ALG_SM4_CBC_NOPAD:
+	case TEE_ALG_SM4_CTR:
 		*size = 16;
 		break;
 
@@ -168,6 +171,8 @@
 		case TEE_ALG_AES_CBC_NOPAD:
 		case TEE_ALG_DES_CBC_NOPAD:
 		case TEE_ALG_DES3_CBC_NOPAD:
+		case TEE_ALG_SM4_ECB_NOPAD:
+		case TEE_ALG_SM4_CBC_NOPAD:
 			return TEE_ERROR_BAD_PARAMETERS;
 
 		case TEE_ALG_AES_CTR:
diff --git a/core/tee/tee_svc_cryp.c b/core/tee/tee_svc_cryp.c
index e9cce8b..e9e7b6e 100644
--- a/core/tee/tee_svc_cryp.c
+++ b/core/tee/tee_svc_cryp.c
@@ -409,6 +409,9 @@
 		*/
 		192 / 8 + sizeof(struct tee_cryp_obj_secret),
 		tee_cryp_obj_secret_value_attrs),
+	PROP(TEE_TYPE_SM4, 128, 128, 128,
+		128 / 8 + sizeof(struct tee_cryp_obj_secret),
+		tee_cryp_obj_secret_value_attrs),
 	PROP(TEE_TYPE_HMAC_MD5, 8, 64, 512,
 		512 / 8 + sizeof(struct tee_cryp_obj_secret),
 		tee_cryp_obj_secret_value_attrs),
@@ -1815,6 +1818,7 @@
 	case TEE_TYPE_AES:
 	case TEE_TYPE_DES:
 	case TEE_TYPE_DES3:
+	case TEE_TYPE_SM4:
 	case TEE_TYPE_HMAC_MD5:
 	case TEE_TYPE_HMAC_SHA1:
 	case TEE_TYPE_HMAC_SHA224:
@@ -1975,6 +1979,9 @@
 	case TEE_MAIN_ALGO_DES3:
 		req_key_type = TEE_TYPE_DES3;
 		break;
+	case TEE_MAIN_ALGO_SM4:
+		req_key_type = TEE_TYPE_SM4;
+		break;
 	case TEE_MAIN_ALGO_RSA:
 		req_key_type = TEE_TYPE_RSA_KEYPAIR;
 		if (mode == TEE_MODE_ENCRYPT || mode == TEE_MODE_VERIFY)
diff --git a/lib/libutee/include/tee_api_defines.h b/lib/libutee/include/tee_api_defines.h
index c71108a..e504a7d 100644
--- a/lib/libutee/include/tee_api_defines.h
+++ b/lib/libutee/include/tee_api_defines.h
@@ -135,6 +135,9 @@
 #define TEE_ALG_DES3_CBC_NOPAD                  0x10000113
 #define TEE_ALG_DES3_CBC_MAC_NOPAD              0x30000113
 #define TEE_ALG_DES3_CBC_MAC_PKCS5              0x30000513
+#define TEE_ALG_SM4_ECB_NOPAD                   0x10000014
+#define TEE_ALG_SM4_CBC_NOPAD                   0x10000114
+#define TEE_ALG_SM4_CTR                         0x10000214
 #define TEE_ALG_RSASSA_PKCS1_V1_5_MD5           0x70001830
 #define TEE_ALG_RSASSA_PKCS1_V1_5_SHA1          0x70002830
 #define TEE_ALG_RSASSA_PKCS1_V1_5_SHA224        0x70003830
@@ -197,6 +200,7 @@
 #define TEE_TYPE_AES                        0xA0000010
 #define TEE_TYPE_DES                        0xA0000011
 #define TEE_TYPE_DES3                       0xA0000013
+#define TEE_TYPE_SM4                        0xA0000014
 #define TEE_TYPE_HMAC_MD5                   0xA0000001
 #define TEE_TYPE_HMAC_SHA1                  0xA0000002
 #define TEE_TYPE_HMAC_SHA224                0xA0000003
diff --git a/lib/libutee/include/utee_defines.h b/lib/libutee/include/utee_defines.h
index 2eaa0b2..b2a48ce 100644
--- a/lib/libutee/include/utee_defines.h
+++ b/lib/libutee/include/utee_defines.h
@@ -22,6 +22,7 @@
 #define TEE_MAIN_ALGO_DES        0x11
 #define TEE_MAIN_ALGO_DES2       0x12
 #define TEE_MAIN_ALGO_DES3       0x13
+#define TEE_MAIN_ALGO_SM4        0x14 /* Not in v1.2, extrapolated */
 #define TEE_MAIN_ALGO_RSA        0x30
 #define TEE_MAIN_ALGO_DSA        0x31
 #define TEE_MAIN_ALGO_DH         0x32
@@ -80,6 +81,7 @@
 
 #define TEE_AES_BLOCK_SIZE  16UL
 #define TEE_DES_BLOCK_SIZE  8UL
+#define TEE_SM4_BLOCK_SIZE  16UL
 
 #define TEE_AES_MAX_KEY_SIZE    32UL
 
diff --git a/lib/libutee/tee_api_operations.c b/lib/libutee/tee_api_operations.c
index 3e39ac8..e0f530d 100644
--- a/lib/libutee/tee_api_operations.c
+++ b/lib/libutee/tee_api_operations.c
@@ -117,8 +117,13 @@
 	case TEE_ALG_DES_CBC_NOPAD:
 	case TEE_ALG_DES3_ECB_NOPAD:
 	case TEE_ALG_DES3_CBC_NOPAD:
+	case TEE_ALG_SM4_ECB_NOPAD:
+	case TEE_ALG_SM4_CBC_NOPAD:
+	case TEE_ALG_SM4_CTR:
 		if (TEE_ALG_GET_MAIN_ALG(algorithm) == TEE_MAIN_ALGO_AES)
 			block_size = TEE_AES_BLOCK_SIZE;
+		else if (TEE_ALG_GET_MAIN_ALG(algorithm) == TEE_MAIN_ALGO_SM4)
+			block_size = TEE_SM4_BLOCK_SIZE;
 		else
 			block_size = TEE_DES_BLOCK_SIZE;
 		/* FALLTHROUGH */
@@ -1066,7 +1071,9 @@
 	    operation->info.algorithm == TEE_ALG_DES_ECB_NOPAD ||
 	    operation->info.algorithm == TEE_ALG_DES_CBC_NOPAD ||
 	    operation->info.algorithm == TEE_ALG_DES3_ECB_NOPAD ||
-	    operation->info.algorithm == TEE_ALG_DES3_CBC_NOPAD) {
+	    operation->info.algorithm == TEE_ALG_DES3_CBC_NOPAD ||
+	    operation->info.algorithm == TEE_ALG_SM4_ECB_NOPAD ||
+	    operation->info.algorithm == TEE_ALG_SM4_CBC_NOPAD) {
 		if (((operation->buffer_offs + srcLen) % operation->block_size)
 		    != 0) {
 			res = TEE_ERROR_BAD_PARAMETERS;