[FFmpeg-devel] [PATCH v2 1/2] avutil/aes_ctr: simplify and optimize av_aes_ctr_crypt()

James Almer jamrial at gmail.com
Tue Apr 1 18:29:06 EEST 2025


Process data in chunks of four or eight bytes, depending on host, instead of
one at a time.

before:
55561 decicycles in av_aes_ctr_crypt

after:
52204 decicycles in av_aes_ctr_crypt

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavutil/aes_ctr.c | 47 ++++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/libavutil/aes_ctr.c b/libavutil/aes_ctr.c
index c2d6d570e7..d720aa6aaf 100644
--- a/libavutil/aes_ctr.c
+++ b/libavutil/aes_ctr.c
@@ -24,6 +24,7 @@
 #include "aes_ctr.h"
 #include "aes.h"
 #include "aes_internal.h"
+#include "intreadwrite.h"
 #include "macros.h"
 #include "mem.h"
 #include "random_seed.h"
@@ -32,8 +33,7 @@
 
 typedef struct AVAESCTR {
     uint8_t counter[AES_BLOCK_SIZE];
-    uint8_t encrypted_counter[AES_BLOCK_SIZE];
-    int block_offset;
+    DECLARE_ALIGNED(8, uint8_t, encrypted_counter)[AES_BLOCK_SIZE];
     AVAES aes;
 } AVAESCTR;
 
@@ -46,13 +46,11 @@ void av_aes_ctr_set_iv(struct AVAESCTR *a, const uint8_t* iv)
 {
     memcpy(a->counter, iv, AES_CTR_IV_SIZE);
     memset(a->counter + AES_CTR_IV_SIZE, 0, sizeof(a->counter) - AES_CTR_IV_SIZE);
-    a->block_offset = 0;
 }
 
 void av_aes_ctr_set_full_iv(struct AVAESCTR *a, const uint8_t* iv)
 {
     memcpy(a->counter, iv, sizeof(a->counter));
-    a->block_offset = 0;
 }
 
 const uint8_t* av_aes_ctr_get_iv(struct AVAESCTR *a)
@@ -75,7 +73,6 @@ int av_aes_ctr_init(struct AVAESCTR *a, const uint8_t *key)
     av_aes_init(&a->aes, key, 128, 0);
 
     memset(a->counter, 0, sizeof(a->counter));
-    a->block_offset = 0;
 
     return 0;
 }
@@ -101,31 +98,29 @@ void av_aes_ctr_increment_iv(struct AVAESCTR *a)
 {
     av_aes_ctr_increment_be64(a->counter);
     memset(a->counter + AES_CTR_IV_SIZE, 0, sizeof(a->counter) - AES_CTR_IV_SIZE);
-    a->block_offset = 0;
 }
 
 void av_aes_ctr_crypt(struct AVAESCTR *a, uint8_t *dst, const uint8_t *src, int count)
 {
-    const uint8_t* src_end = src + count;
-    const uint8_t* cur_end_pos;
-    uint8_t* encrypted_counter_pos;
-
-    while (src < src_end) {
-        if (a->block_offset == 0) {
-            av_aes_crypt(&a->aes, a->encrypted_counter, a->counter, 1, NULL, 0);
-
-            av_aes_ctr_increment_be64(a->counter + 8);
-        }
-
-        encrypted_counter_pos = a->encrypted_counter + a->block_offset;
-        cur_end_pos = src + AES_BLOCK_SIZE - a->block_offset;
-        cur_end_pos = FFMIN(cur_end_pos, src_end);
-
-        a->block_offset += cur_end_pos - src;
-        a->block_offset &= (AES_BLOCK_SIZE - 1);
+    while (count >= AES_BLOCK_SIZE) {
+        av_aes_crypt(&a->aes, a->encrypted_counter, a->counter, 1, NULL, 0);
+        av_aes_ctr_increment_be64(a->counter + 8);
+#if HAVE_FAST_64BIT
+        for (int len = 0; len < AES_BLOCK_SIZE; len += 8)
+            AV_WN64(&dst[len], AV_RN64(&src[len]) ^ AV_RN64A(&a->encrypted_counter[len]));
+#else
+        for (int len = 0; len < AES_BLOCK_SIZE; len += 4)
+            AV_WN32(&dst[len], AV_RN32(&src[len]) ^ AV_RN32A(&a->encrypted_counter[len]));
+#endif
+        dst += AES_BLOCK_SIZE;
+        src += AES_BLOCK_SIZE;
+        count -= AES_BLOCK_SIZE;
+    }
 
-        while (src < cur_end_pos) {
-            *dst++ = *src++ ^ *encrypted_counter_pos++;
-        }
+    if (count > 0) {
+        av_aes_crypt(&a->aes, a->encrypted_counter, a->counter, 1, NULL, 0);
+        av_aes_ctr_increment_be64(a->counter + 8);
+        for (int len = 0; len < count; len++)
+            dst[len] = src[len] ^ a->encrypted_counter[len];
     }
 }
-- 
2.49.0



More information about the ffmpeg-devel mailing list