算法详解

ChaCha20 是一种基于流密码的加密算法,由 Daniel J. Bernstein 于 2008 年提出,是 Salsa20 的改进版本。它的主要优点是速度快安全性高易于实现。ChaCha20 将密钥(256 位)、**随机数(Nonce,96 位)计数器(Counter,32 位)**经过一系列混合运算生成密钥流(Key Stream),再与明文进行按位异或得到密文。

ChaCha20 内部状态为一个 4×4 的 32 位无符号整数矩阵,初始排列如下:

1
2
3
4
[ 常量 ][ 常量 ][ 常量 ][ 常量 ]
[ key0 ][ key1 ][ key2 ][ key3 ]
[ key4 ][ key5 ][ key6 ][ key7 ]
[counter][nonce0][nonce1][nonce2]

每一次加密生成 64 字节的密钥流块,ChaCha20 的核心是 Quarter Round(四分之一轮),它使用加法、异或、循环左移(ROTL)混合数据。

加密流程示意图:

实战识别

在逆向分析中判断 ChaCha20 的常见特征:

  1. 常量字符串 "expand 32-byte k" 出现在初始化矩阵中(可能是 ASCII 值形式)。
  2. 内部循环 20 轮(10 次双轮),每轮包含多次 Quarter Round。
  3. 使用 循环左移 16、12、8、7 位的 ROTL 操作。
  4. 处理的数据块大小为 64 字节

代码实现

下面是ChaCha20算法过程的C语言函数实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>

#define U32C(x) ((uint32_t)(x))
#define ROTL32(v,n) (U32C((v) << (n)) | U32C((v) >> (32 - (n))))

/* -------------------- 工具:小端装载/存储 -------------------- */
static uint32_t load32_le(const uint8_t *p) {
return U32C(p[0]) | (U32C(p[1]) << 8) | (U32C(p[2]) << 16) | (U32C(p[3]) << 24);
}
static void store32_le(uint8_t *p, uint32_t x) {
p[0] = (uint8_t)(x);
p[1] = (uint8_t)(x >> 8);
p[2] = (uint8_t)(x >> 16);
p[3] = (uint8_t)(x >> 24);
}

/* -------------------- ChaCha20 核心 -------------------- */
#define QR(a,b,c,d) \
do { \
a += b; d ^= a; d = ROTL32(d,16); \
c += d; b ^= c; b = ROTL32(b,12); \
a += b; d ^= a; d = ROTL32(d, 8); \
c += d; b ^= c; b = ROTL32(b, 7); \
} while(0)

/* 对 16×32bit 状态做 20 轮 ChaCha20,out = in + 轮后状态 */
static void chacha20_block(uint32_t out[16], const uint32_t in[16]) {
uint32_t x[16];
memcpy(x, in, sizeof(x));

for (int i = 0; i < 10; i++) { // 20轮 = 10次 column+diagonal
// column round
QR(x[0], x[4], x[8], x[12]);
QR(x[1], x[5], x[9], x[13]);
QR(x[2], x[6], x[10], x[14]);
QR(x[3], x[7], x[11], x[15]);
// diagonal round
QR(x[0], x[5], x[10], x[15]);
QR(x[1], x[6], x[11], x[12]);
QR(x[2], x[7], x[8], x[13]);
QR(x[3], x[4], x[9], x[14]);
}

for (int i = 0; i < 16; i++) out[i] = x[i] + in[i];
}

/* -------------------- ChaCha20 加/解密 -------------------- */
/* key: 32字节; nonce: 8字节; counter: 64位块计数 */
void chacha20_xor(uint8_t *out, const uint8_t *in, size_t len,
const uint8_t key[32], const uint8_t nonce[8], uint64_t counter)
{
static const uint32_t c0 = 0x61707865; // "expa"
static const uint32_t c1 = 0x3320646e; // "nd 3"
static const uint32_t c2 = 0x79622d32; // "2-by"
static const uint32_t c3 = 0x6b206574; // "te k"

uint32_t state[16];
uint8_t ks[64];
size_t off = 0;

state[0] = c0;
state[1] = c1;
state[2] = c2;
state[3] = c3;

state[4] = load32_le(key + 0);
state[5] = load32_le(key + 4);
state[6] = load32_le(key + 8);
state[7] = load32_le(key + 12);
state[8] = load32_le(key + 16);
state[9] = load32_le(key + 20);
state[10] = load32_le(key + 24);
state[11] = load32_le(key + 28);

state[12] = (uint32_t)(counter & 0xFFFFFFFFu);
state[13] = (uint32_t)(counter >> 32);

state[14] = load32_le(nonce + 0);
state[15] = load32_le(nonce + 4);

while (len) {
uint32_t block[16];
chacha20_block(block, state);

for (int i = 0; i < 16; i++) store32_le(ks + 4*i, block[i]);

size_t n = len < 64 ? len : 64;
for (size_t i = 0; i < n; i++) out[off + i] = in[off + i] ^ ks[i];

off += n;
len -= n;

state[12]++;
if (state[12] == 0) state[13]++;
}
}

/* -------------------- Demo 测试 -------------------- */
static void hexprint(const uint8_t *p, size_t n) {
for (size_t i = 0; i < n; i++) {
printf("%02X%s", p[i], (i + 1 == n) ? "" : ((i % 16 == 15) ? "\n" : " "));
}
if (n && n % 16) puts("");
}

int main(void) {
uint8_t key[32] = "12345678901234567890123456789012";
uint8_t nonce[8] = {0,0,0,0,0,0,0,0};
const char *msg = "Hello, ChaCha20! This is a test message.";
size_t len = strlen(msg);

uint8_t *pt = (uint8_t *)msg;
uint8_t *ct = (uint8_t *)malloc(len);
uint8_t *dec = (uint8_t *)malloc(len);

if (!ct || !dec) return 1;

chacha20_xor(ct, pt, len, key, nonce, 0);
chacha20_xor(dec, ct, len, key, nonce, 0);

printf("Plaintext: %.*s\n", (int)len, pt);
printf("Ciphertext (hex):\n");
hexprint(ct, len);
printf("Decrypted: %.*s\n", (int)len, dec);

free(ct);
free(dec);
return 0;
}

魔改

  1. 修改加密轮数

    标准 ChaCha20 使用20 轮(rounds),而一轮其实是两步 quarterround(偶数轮和奇数轮各一次),所以通常代码里会写成:

    1
    2
    for (i = 0; i < 10; i++) // 10 * 2 = 20 rounds
    { /*算法内容*/ }

    如果你遇到的是 Salsa8(8 轮)或者 Salsa12(12 轮),就要将10这个数字改为46

  2. 修改密钥长度

    Salsa20有 32 字节16 字节 两种密钥长度,在16字节长度模式下需要将秘钥扩展常量修改为“expand 16-byte k”,即”0x61707865”, “0x3120646e”, “0x79622d36”, “0x6b206574”,然后再把16字节的秘钥分两次填入。

  3. State矩阵布局改动
    ChaCha20 的初始 state 是:

    1
    constant[4] | key[8] | counter[1] | nonce[3]

    常见魔改:

    调换 key / nonce / counter 的位置

    把 counter 改成 64 位甚至放到 nonce 位置

    constant 不用 “expand 32-byte k”,改成其他字节

  4. Quarter Round 改动

    1. 旋转位数改动

      原版是 (16, 12, 8, 7),魔改可能会变成 (7, 9, 13, 18)(像 Salsa20)或完全随机的旋转常数。

      解法:用题目提供的加密脚本或反编译代码比对原版,找出常数差异,修改解密脚本即可。

    2. 运算顺序调整

      例如把 a += b 和 d ^= a 的顺序对调,或者改成 a ^= b。

      解法:分析汇编或伪代码,重写对应的 quarter round。

    3. 替换运算

      原本用加法 +,改成减法 -,或者将 XOR 改成 AND/OR(少见)。

      解法:直接根据魔改的运算定义,反推对应的逆运算。

  5. ChaCha20-Poly1305

    其为一种AEAD(Authenticated Encryption with Associated Data)算法,实现加密 + 完整性认证。

    结构:

    ChaCha20 负责加密数据(保密性)。

    Poly1305 负责生成认证标签(完整性+身份验证)。

    原理简述:

    1. 一次性密钥生成:ChaCha20 用 key 和 nonce 生成 Poly1305 的一次性密钥(不是直接用原 key)。

    2. 数据加密:ChaCha20 加密明文得到密文。

    3. MAC 计算:Poly1305 根据密文和附加数据(AAD,例如协议头信息)生成 16 字节的 认证标签(tag)。

    4. 发送:把密文和 tag 一起发给接收方。

    5. 解密验证:

      接收方用相同 key/nonce 重新计算 Poly1305 标签。

      如果标签不匹配,直接丢弃数据(防篡改、抗伪造)

    完整实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/*
* ChaCha20-Poly1305 AEAD — single-file C implementation (RFC 8439)
*
* - Portable C99, no external deps
* - 32-bit friendly (Poly1305 uses 26-bit limbs; works on MSVC/Clang/GCC)
* - Constant-time tag comparison
* - API:
* aead_chacha20poly1305_encrypt(...)
* aead_chacha20poly1305_decrypt(...)
*
* This is educational reference code. Review and test before production.
*/

#include <stdint.h>
#include <stddef.h>
#include <string.h>

/* ============================= Utilities ============================= */
static inline uint32_t load32_le(const void *p){
const uint8_t *b=(const uint8_t*)p; return (uint32_t)b[0] | ((uint32_t)b[1]<<8) | ((uint32_t)b[2]<<16) | ((uint32_t)b[3]<<24);
}
static inline uint64_t load64_le(const void *p){
const uint8_t *b=(const uint8_t*)p; return (uint64_t)load32_le(b) | ((uint64_t)load32_le(b+4)<<32);
}
static inline void store32_le(void *p,uint32_t v){
uint8_t *b=(uint8_t*)p; b[0]=(uint8_t)v; b[1]=(uint8_t)(v>>8); b[2]=(uint8_t)(v>>16); b[3]=(uint8_t)(v>>24);
}
static inline void store64_le(void *p,uint64_t v){
store32_le(p,(uint32_t)v); store32_le((uint8_t*)p+4,(uint32_t)(v>>32));
}
static inline uint32_t rotl32(uint32_t x,int n){ return (x<<n) | (x>>(32-n)); }

static int ct_mem_eq(const void *a,const void *b,size_t n){
const uint8_t *x=(const uint8_t*)a, *y=(const uint8_t*)b; uint32_t d=0; for(size_t i=0;i<n;i++) d |= (uint32_t)(x[i]^y[i]);
/* fold to 0/1 */ d |= d>>16; d |= d>>8; return (int)((d^0xFF)==0xFF);
}

/* ============================= ChaCha20 ============================== */
static void chacha20_block(uint8_t out[64], const uint8_t key[32], uint32_t counter, const uint8_t nonce[12]){
static const uint32_t C[4]={0x61707865,0x3320646e,0x79622d32,0x6b206574};
uint32_t s[16];
s[0]=C[0]; s[1]=C[1]; s[2]=C[2]; s[3]=C[3];
for(int i=0;i<8;i++) s[4+i]=load32_le(key+4*i);
s[12]=counter;
s[13]=load32_le(nonce+0);
s[14]=load32_le(nonce+4);
s[15]=load32_le(nonce+8);

uint32_t w[16];
for(int i=0;i<16;i++) w[i]=s[i];

#define QR(a,b,c,d) \
a+=b; d^=a; d=rotl32(d,16); \
c+=d; b^=c; b=rotl32(b,12); \
a+=b; d^=a; d=rotl32(d,8); \
c+=d; b^=c; b=rotl32(b,7);

for(int i=0;i<10;i++){
/* column rounds */
QR(w[0],w[4],w[8],w[12]);
QR(w[1],w[5],w[9],w[13]);
QR(w[2],w[6],w[10],w[14]);
QR(w[3],w[7],w[11],w[15]);
/* diagonal rounds */
QR(w[0],w[5],w[10],w[15]);
QR(w[1],w[6],w[11],w[12]);
QR(w[2],w[7],w[8],w[13]);
QR(w[3],w[4],w[9],w[14]);
}
#undef QR

for(int i=0;i<16;i++) w[i]+=s[i];
for(int i=0;i<16;i++) store32_le(out+4*i,w[i]);
}

static void chacha20_xor(uint8_t *out, const uint8_t *in, size_t len, const uint8_t key[32], const uint8_t nonce[12], uint32_t counter){
uint8_t block[64]; size_t off=0;
while(len){
chacha20_block(block,key,counter,nonce); counter++;
size_t n = len>64?64:len;
for(size_t i=0;i<n;i++) out[off+i] = in[off+i] ^ block[i];
off+=n; len-=n;
}
}

/* ============================= Poly1305 ============================== */
/* 26-bit limb implementation (portable; based on the original paper approach) */

typedef struct { uint32_t r[5]; uint32_t s[5]; uint32_t h[5]; } poly1305_state;

static void poly1305_init(poly1305_state *st, const uint8_t key[32]){
uint32_t t0 = load32_le(key+0);
uint32_t t1 = load32_le(key+4);
uint32_t t2 = load32_le(key+8);
uint32_t t3 = load32_le(key+12);
uint32_t t4 = load32_le(key+16);
uint32_t t5 = load32_le(key+20);
uint32_t t6 = load32_le(key+24);
uint32_t t7 = load32_le(key+28);

/* clamp r */
uint64_t r0 = (uint64_t)( t0 ) & 0x3ffffff; // 26 bits
uint64_t r1 = ((uint64_t)(t0>>26) | ((uint64_t)t1<<6)) & 0x3ffff03; // 26 bits with clamp
uint64_t r2 = ((uint64_t)(t1>>20) | ((uint64_t)t2<<12)) & 0x3ffc0ff;
uint64_t r3 = ((uint64_t)(t2>>14) | ((uint64_t)t3<<18)) & 0x3f03fff;
uint64_t r4 = ((uint64_t)(t3>>8)) & 0x00fffff;

st->r[0]=(uint32_t)r0; st->r[1]=(uint32_t)r1; st->r[2]=(uint32_t)r2; st->r[3]=(uint32_t)r3; st->r[4]=(uint32_t)r4;

/* s = r * 5 */
st->s[0]=st->r[0]*5u; st->s[1]=st->r[1]*5u; st->s[2]=st->r[2]*5u; st->s[3]=st->r[3]*5u; st->s[4]=st->r[4]*5u;

/* h = 0 */
st->h[0]=st->h[1]=st->h[2]=st->h[3]=st->h[4]=0;

/* s-part of key (pad) is t4..t7 */
(void)t4; (void)t5; (void)t6; (void)t7; /* stored later in finish */
}

static void poly1305_blocks(poly1305_state *st, const uint8_t *m, size_t bytes, uint32_t hibit){
/* hibit = 1<<24 for full blocks, 0 for last partial */
uint32_t r0=st->r[0], r1=st->r[1], r2=st->r[2], r3=st->r[3], r4=st->r[4];
uint32_t s1=st->s[1], s2=st->s[2], s3=st->s[3], s4=st->s[4];
uint32_t h0=st->h[0], h1=st->h[1], h2=st->h[2], h3=st->h[3], h4=st->h[4];

while(bytes >= 16){
uint64_t t0 = load32_le(m+0);
uint64_t t1 = load32_le(m+4);
uint64_t t2 = load32_le(m+8);
uint64_t t3 = load32_le(m+12);

/* h += m */
h0 += (uint32_t)( t0 & 0x3ffffff);
h1 += (uint32_t)(((t0>>26) | (t1<<6)) & 0x3ffffff);
h2 += (uint32_t)(((t1>>20) | (t2<<12))& 0x3ffffff);
h3 += (uint32_t)(((t2>>14) | (t3<<18))& 0x3ffffff);
h4 += (uint32_t)(( t3>>8 ) ) + hibit;

/* h *= r (mod 2^130-5) */
uint64_t d0 = (uint64_t)h0*r0 + (uint64_t)h1*s4 + (uint64_t)h2*s3 + (uint64_t)h3*s2 + (uint64_t)h4*s1;
uint64_t d1 = (uint64_t)h0*r1 + (uint64_t)h1*r0 + (uint64_t)h2*s4 + (uint64_t)h3*s3 + (uint64_t)h4*s2;
uint64_t d2 = (uint64_t)h0*r2 + (uint64_t)h1*r1 + (uint64_t)h2*r0 + (uint64_t)h3*s4 + (uint64_t)h4*s3;
uint64_t d3 = (uint64_t)h0*r3 + (uint64_t)h1*r2 + (uint64_t)h2*r1 + (uint64_t)h3*r0 + (uint64_t)h4*s4;
uint64_t d4 = (uint64_t)h0*r4 + (uint64_t)h1*r3 + (uint64_t)h2*r2 + (uint64_t)h3*r1 + (uint64_t)h4*r0;

/* carry propagation */
uint64_t c;
c = (d0 >> 26); h0 = (uint32_t)(d0 & 0x3ffffff); d1 += c;
c = (d1 >> 26); h1 = (uint32_t)(d1 & 0x3ffffff); d2 += c;
c = (d2 >> 26); h2 = (uint32_t)(d2 & 0x3ffffff); d3 += c;
c = (d3 >> 26); h3 = (uint32_t)(d3 & 0x3ffffff); d4 += c;
c = (d4 >> 26); h4 = (uint32_t)(d4 & 0x3ffffff); h0 += (uint32_t)(c * 5);
c = h0 >> 26; h0 &= 0x3ffffff; h1 += (uint32_t)c;

m += 16; bytes -= 16;
}

st->h[0]=h0; st->h[1]=h1; st->h[2]=h2; st->h[3]=h3; st->h[4]=h4;
}

static void poly1305_finish(uint8_t mac[16], poly1305_state *st, const uint8_t key[32]){
uint32_t h0=st->h[0], h1=st->h[1], h2=st->h[2], h3=st->h[3], h4=st->h[4];

/* final carries */
uint32_t c;
c = h1 >> 26; h1 &= 0x3ffffff; h2 += c;
c = h2 >> 26; h2 &= 0x3ffffff; h3 += c;
c = h3 >> 26; h3 &= 0x3ffffff; h4 += c;
c = h4 >> 26; h4 &= 0x3ffffff; h0 += c * 5;
c = h0 >> 26; h0 &= 0x3ffffff; h1 += c;

/* compute h + -p (i.e., compare with p) */
uint32_t g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
uint32_t g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
uint32_t g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
uint32_t g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
uint32_t g4 = h4 + c - (1u<<26);

/* select h if h<p else g (constant time) */
uint32_t mask = (g4 >> 31) - 1; /* all 1s if no borrow */
h0 = (h0 & ~mask) | (g0 & mask);
h1 = (h1 & ~mask) | (g1 & mask);
h2 = (h2 & ~mask) | (g2 & mask);
h3 = (h3 & ~mask) | (g3 & mask);
h4 = (h4 & ~mask) | (g4 & mask);

/* serialize h (little endian 128-bit) */
uint64_t f0 = ((uint64_t)h0 ) | ((uint64_t)h1<<26);
uint64_t f1 = ((uint64_t)h2 ) | ((uint64_t)h3<<26) | ((uint64_t)h4<<52);

/* add s (pad) */
uint64_t s0 = load64_le(key+16);
uint64_t s1 = load64_le(key+24);
f0 += s0;
f1 += s1 + (f0 < s0);

store64_le(mac+0, f0);
store64_le(mac+8, f1);
}

static void poly1305_auth(uint8_t mac[16], const uint8_t *m, size_t mlen, const uint8_t one_time_key[32]){
poly1305_state st; poly1305_init(&st, one_time_key);
/* process full 16-byte blocks with hibit=1<<24 */
if(mlen){
size_t n = mlen & ~(size_t)15; /* largest multiple of 16 */
if(n){ poly1305_blocks(&st, m, n, 1u<<24); m += n; mlen -= n; }
}
/* last partial block */
if(mlen){
uint8_t last[16]={0};
for(size_t i=0;i<mlen;i++) last[i]=m[i];
last[mlen]=1; /* append 1 */
poly1305_blocks(&st, last, 16, 0);
}
poly1305_finish(mac, &st, one_time_key);
}

/* =============== AEAD: ChaCha20-Poly1305 (RFC 8439) ================= */

/* Derive Poly1305 one-time key: block 0 keystream */
static void aead_poly1305_keygen(uint8_t otk[32], const uint8_t key[32], const uint8_t nonce[12]){
uint8_t block0[64];
chacha20_block(block0, key, 0, nonce);
memcpy(otk, block0, 32);
/* wipe */ memset(block0,0,sizeof(block0));
}

/* Encrypt: out = ciphertext (same length as plaintext), tag[16] */
int aead_chacha20poly1305_encrypt(
uint8_t *out, uint8_t tag[16],
const uint8_t *plaintext, size_t plen,
const uint8_t *aad, size_t aad_len,
const uint8_t key[32], const uint8_t nonce[12])
{
uint8_t otk[32]; aead_poly1305_keygen(otk,key,nonce);

/* ciphertext = ChaCha20(key, counter=1, nonce) ^ plaintext */
chacha20_xor(out, plaintext, plen, key, nonce, 1);

/* Compute Poly1305 over: AAD || pad16 || ciphertext || pad16 || len_aad(8) || len_ct(8) */
/* Build in streaming manner to avoid allocation */
uint8_t block[16];

/* process AAD */
if(aad_len){ poly1305_auth(tag, NULL, 0, otk); /* no-op to ensure init? left for clarity */ }
{
/* stream AAD */
poly1305_state st; poly1305_init(&st, otk);
size_t n = aad_len & ~(size_t)15;
if(n) poly1305_blocks(&st, aad, n, 1u<<24);
/* pad if needed */
if(aad_len & 15){
uint8_t tmp[16]={0};
size_t r = aad_len & 15; memcpy(tmp, aad+n, r); tmp[r]=1; /* 1 then zeroes */
poly1305_blocks(&st, tmp, 16, 0);
}
/* ciphertext blocks */
n = plen & ~(size_t)15;
if(n) poly1305_blocks(&st, out, n, 1u<<24);
if(plen & 15){
uint8_t tmp2[16]={0}; size_t r = plen & 15; memcpy(tmp2, out+n, r); tmp2[r]=1; poly1305_blocks(&st, tmp2, 16, 0);
}
/* lengths */
uint8_t lens[16];
store64_le(lens+0, (uint64_t)aad_len);
store64_le(lens+8, (uint64_t)plen);
poly1305_blocks(&st, lens, 16, 0);
/* finish */
poly1305_finish(tag, &st, otk);
}

/* wipe */ memset(otk,0,sizeof(otk)); memset(block,0,sizeof(block));
return 1;
}

/* Decrypt: returns 1 on success (tag OK), 0 on failure (tag mismatch). */
int aead_chacha20poly1305_decrypt(
uint8_t *out, const uint8_t *ciphertext, size_t clen,
const uint8_t tag[16],
const uint8_t *aad, size_t aad_len,
const uint8_t key[32], const uint8_t nonce[12])
{
uint8_t otk[32]; uint8_t comp[16]; aead_poly1305_keygen(otk,key,nonce);

/* compute tag over ciphertext */
{
poly1305_state st; poly1305_init(&st, otk);
size_t n = aad_len & ~(size_t)15;
if(n) poly1305_blocks(&st, aad, n, 1u<<24);
if(aad_len & 15){ uint8_t t[16]={0}; size_t r=aad_len&15; memcpy(t,aad+n,r); t[r]=1; poly1305_blocks(&st,t,16,0); }
n = clen & ~(size_t)15;
if(n) poly1305_blocks(&st, ciphertext, n, 1u<<24);
if(clen & 15){ uint8_t t2[16]={0}; size_t r=clen&15; memcpy(t2,ciphertext+n,r); t2[r]=1; poly1305_blocks(&st,t2,16,0);}
uint8_t lens[16]; store64_le(lens+0,(uint64_t)aad_len); store64_le(lens+8,(uint64_t)clen); poly1305_blocks(&st,lens,16,0);
poly1305_finish(comp,&st,otk);
}

int ok = ct_mem_eq(comp, tag, 16);

/* If tag is correct, decrypt */
if(ok){ chacha20_xor(out, ciphertext, clen, key, nonce, 1); }

/* wipe */ memset(otk,0,sizeof(otk)); memset((void*)comp,0,sizeof(comp));

return ok;
}

/* ============================= Demo / Test =========================== */
#ifdef TEST_MAIN
#include <stdio.h>

static void hex(const char *label, const uint8_t *b, size_t n){
printf("%s", label); for(size_t i=0;i<n;i++) printf("%02x", b[i]); printf("\n");
}

int main(void){
/* simple round-trip demo (not an RFC vector) */
uint8_t key[32]={0}; for(int i=0;i<32;i++) key[i]=(uint8_t)i;
uint8_t nonce[12]={0}; for(int i=0;i<12;i++) nonce[i]=(uint8_t)(0xA0+i);
const uint8_t aad[] = "example aead aad";
const uint8_t msg[] = "ChaCha20-Poly1305 test message";

uint8_t ct[sizeof msg];
uint8_t tag[16];
aead_chacha20poly1305_encrypt(ct, tag, msg, sizeof msg, aad, sizeof aad-1, key, nonce);

uint8_t pt[sizeof msg];
int ok = aead_chacha20poly1305_decrypt(pt, ct, sizeof ct, tag, aad, sizeof aad-1, key, nonce);

printf("verify: %s\n", ok?"OK":"FAIL");
hex("tag: ", tag, 16);
hex("ct : ", ct, sizeof ct);
printf("pt : %.*s\n", (int)sizeof pt, pt);
return ok?0:1;
}
#endif

例题

暂略