GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: src/utf8.c Lines: 0 65 0.0 %
Date: 2020-12-10 21:44:00 Branches: 0 8 0.0 %

Line Branch Exec Source
1
/**
2
 * Thanks to the AUTHORS of https://github.com/lemire/fastvalidate-utf-8/
3
 *
4
 * Permission is hereby granted, free of charge, to any
5
 * person obtaining a copy of this software and associated
6
 * documentation files (the "Software"), to deal in the
7
 * Software without restriction, including without
8
 * limitation the rights to use, copy, modify, merge,
9
 * publish, distribute, sublicense, and/or sell copies of
10
 * the Software, and to permit persons to whom the Software
11
 * is furnished to do so, subject to the following
12
 * conditions:
13
 *
14
 * The above copyright notice and this permission notice
15
 * shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
19
 * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
20
 * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
21
 * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
22
 * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
25
 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26
 * DEALINGS IN THE SOFTWARE.
27
 */
28
29
#include <stdbool.h>
30
#include <stddef.h>
31
#include <stdint.h>
32
#include <string.h>
33
34
#if defined(_MSC_VER)
35
/* Microsoft C/C++-compatible compiler */
36
#include <intrin.h>
37
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
38
/* GCC-compatible compiler, targeting x86/x86-64 */
39
#include <x86intrin.h>
40
#elif defined(__GNUC__) && defined(__ARM_NEON__)
41
/* GCC-compatible compiler, targeting ARM with NEON */
42
#include <arm_neon.h>
43
#elif defined(__GNUC__) && defined(__IWMMXT__)
44
/* GCC-compatible compiler, targeting ARM with WMMX */
45
#include <mmintrin.h>
46
#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
47
/* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
48
#include <altivec.h>
49
#elif defined(__GNUC__) && defined(__SPE__)
50
/* GCC-compatible compiler, targeting PowerPC with SPE */
51
#include <spe.h>
52
#endif
53
54
#if defined(__AVX512F__) && defined(__AVX512VL__) && defined(__AVX512VBMI__)
55
56
/*****************************/
57
static inline __m512i avx512_push_last_byte_of_a_to_b(__m512i a, __m512i b) {
58
    __m512i indexes = _mm512_set_epi64(0x3E3D3C3B3A393837, 0x363534333231302F,
59
            0x2E2D2C2B2A292827, 0x262524232221201F,
60
            0x1E1D1C1B1A191817, 0x161514131211100F,
61
            0x0E0D0C0B0A090807, 0x060504030201007F);
62
    return _mm512_permutex2var_epi8(b, indexes, a);
63
}
64
65
static inline __m512i avx512_push_last_2bytes_of_a_to_b(__m512i a, __m512i b) {
66
    __m512i indexes = _mm512_set_epi64(0x3D3C3B3A39383736, 0x3534333231302F2E,
67
            0x2D2C2B2A29282726, 0x2524232221201F1E,
68
            0x1D1C1B1A19181716, 0x1514131211100F0E,
69
            0x0D0C0B0A09080706, 0x0504030201007F7E);
70
    return _mm512_permutex2var_epi8(b, indexes, a);
71
}
72
73
// all byte values must be no larger than 0xF4
74
static inline void avx512_checkSmallerThan0xF4(__m512i current_bytes,
75
        __mmask64 *has_error) {
76
    *has_error =
77
        _kor_mask64(*has_error, _mm512_cmpgt_epu8_mask(current_bytes,
78
                    _mm512_set1_epi8(0xF4)));
79
}
80
81
static inline __m512i avx512_continuationLengths(__m512i high_nibbles) {
82
    return _mm512_shuffle_epi8(
83
            _mm512_setr4_epi32(0x01010101, 0x01010101, 0x00000000,
84
                0x04030202), // see avx2 version for clarity
85
            high_nibbles);
86
}
87
88
static inline __m512i avx512_carryContinuations(__m512i initial_lengths,
89
        __m512i previous_carries) {
90
91
    __m512i right1 = _mm512_subs_epu8(
92
            avx512_push_last_byte_of_a_to_b(previous_carries, initial_lengths),
93
            _mm512_set1_epi8(1));
94
    __m512i sum = _mm512_add_epi8(initial_lengths, right1);
95
96
    __m512i right2 =
97
        _mm512_subs_epu8(avx512_push_last_2bytes_of_a_to_b(previous_carries, sum),
98
                _mm512_set1_epi8(2));
99
    return _mm512_add_epi8(sum, right2);
100
}
101
102
static inline void avx512_checkContinuations(__m512i initial_lengths,
103
        __m512i carries,
104
        __mmask64 *has_error) {
105
    // overlap || underlap
106
    // carry > length && length > 0 || !(carry > length) && !(length > 0)
107
    // (carries > length) == (lengths > 0)
108
    *has_error = _kor_mask64(
109
            *has_error,
110
            _kxnor_mask64(
111
                _mm512_cmpgt_epi8_mask(carries, initial_lengths),
112
                _mm512_cmpgt_epi8_mask(initial_lengths, _mm512_setzero_si512())));
113
}
114
115
// when 0xED is found, next byte must be no larger than 0x9F
116
// when 0xF4 is found, next byte must be no larger than 0x8F
117
// next byte must be continuation, ie sign bit is set, so signed < is ok
118
static inline void avx512_checkFirstContinuationMax(__m512i current_bytes,
119
        __m512i off1_current_bytes,
120
        __mmask64 *has_error) {
121
    __mmask64 maskED =
122
        _mm512_cmpeq_epi8_mask(off1_current_bytes, _mm512_set1_epi8((char)0xED));
123
    __mmask64 maskF4 =
124
        _mm512_cmpeq_epi8_mask(off1_current_bytes, _mm512_set1_epi8((char)0xF4));
125
    __mmask64 badfollowED = _kand_mask64(
126
            _mm512_cmpgt_epi8_mask(current_bytes, _mm512_set1_epi8((char)0x9F)), maskED);
127
    __mmask64 badfollowF4 = _kand_mask64(
128
            _mm512_cmpgt_epi8_mask(current_bytes, _mm512_set1_epi8((char)0x8F)), maskF4);
129
130
    *has_error = _kor_mask64(*has_error, _kor_mask64(badfollowED, badfollowF4));
131
}
132
133
// map off1_hibits => error condition
134
// hibits     off1    cur
135
// C       => < C2 && true
136
// E       => < E1 && < A0
137
// F       => < F1 && < 90
138
// else      false && false
139
static inline void avx512_checkOverlong(__m512i current_bytes,
140
        __m512i off1_current_bytes,
141
        __m512i hibits, __m512i previous_hibits,
142
        __mmask64 *has_error) {
143
    __m512i off1_hibits =
144
        avx512_push_last_byte_of_a_to_b(previous_hibits, hibits);
145
    __m512i initial_mins = _mm512_shuffle_epi8(
146
            _mm512_setr4_epi32(0x80808080, 0x80808080, 0x80808080,
147
                0xF1E180C2), // see avx2 version for clarity
148
            off1_hibits);
149
150
    __mmask64 initial_under =
151
        _mm512_cmpgt_epi8_mask(initial_mins, off1_current_bytes);
152
153
    __m512i second_mins = _mm512_shuffle_epi8(
154
            _mm512_setr4_epi32(0x80808080, 0x80808080, 0x80808080,
155
                0x90A07F7F), // see avx2 version for clarity
156
            off1_hibits);
157
    __mmask64 second_under = _mm512_cmpgt_epi8_mask(second_mins, current_bytes);
158
    *has_error =
159
        _kor_mask64(*has_error, _kand_mask64(initial_under, second_under));
160
}
161
162
struct avx512_processed_utf_bytes {
163
    __m512i rawbytes;
164
    __m512i high_nibbles;
165
    __m512i carried_continuations;
166
};
167
168
static inline void
169
avx512_count_nibbles(__m512i bytes, struct avx512_processed_utf_bytes *answer) {
170
    answer->rawbytes = bytes;
171
    answer->high_nibbles =
172
        _mm512_and_si512(_mm512_srli_epi16(bytes, 4), _mm512_set1_epi8((char)0x0F));
173
}
174
175
// check whether the current bytes are valid UTF-8
176
// at the end of the function, previous gets updated
177
static struct avx512_processed_utf_bytes
178
avx512_checkUTF8Bytes(__m512i current_bytes,
179
        struct avx512_processed_utf_bytes *previous,
180
        __mmask64 *has_error) {
181
    struct avx512_processed_utf_bytes pb;
182
    avx512_count_nibbles(current_bytes, &pb);
183
184
    avx512_checkSmallerThan0xF4(current_bytes, has_error);
185
186
    __m512i initial_lengths = avx512_continuationLengths(pb.high_nibbles);
187
188
    pb.carried_continuations = avx512_carryContinuations(
189
            initial_lengths, previous->carried_continuations);
190
191
    avx512_checkContinuations(initial_lengths, pb.carried_continuations,
192
            has_error);
193
194
    __m512i off1_current_bytes =
195
        avx512_push_last_byte_of_a_to_b(previous->rawbytes, pb.rawbytes);
196
    avx512_checkFirstContinuationMax(current_bytes, off1_current_bytes,
197
            has_error);
198
199
    avx512_checkOverlong(current_bytes, off1_current_bytes, pb.high_nibbles,
200
            previous->high_nibbles, has_error);
201
    return pb;
202
}
203
204
// check whether the current bytes are valid UTF-8
205
// at the end of the function, previous gets updated
206
static struct avx512_processed_utf_bytes
207
avx512_checkUTF8Bytes_asciipath(__m512i current_bytes,
208
        struct avx512_processed_utf_bytes *previous,
209
        __mmask64 *has_error) {
210
    if (!_mm512_cmpge_epu8_mask(current_bytes,
211
                _mm512_set1_epi8((char)0x80))) { // fast ascii path
212
        *has_error = _kor_mask64(
213
                *has_error,
214
                _mm512_cmpgt_epi8_mask(
215
                    previous->carried_continuations,
216
                    _mm512_setr_epi32(0x09090909, 0x09090909, 0x09090909, 0x09090909,
217
                        0x09090909, 0x09090909, 0x09090909, 0x09090909,
218
                        0x09090909, 0x09090909, 0x09090909, 0x09090909,
219
                        0x09090909, 0x09090909, 0x09090909, 0x01090909)));
220
        return *previous;
221
    }
222
223
    struct avx512_processed_utf_bytes pb;
224
    avx512_count_nibbles(current_bytes, &pb);
225
226
    avx512_checkSmallerThan0xF4(current_bytes, has_error);
227
228
    __m512i initial_lengths = avx512_continuationLengths(pb.high_nibbles);
229
230
    pb.carried_continuations = avx512_carryContinuations(
231
            initial_lengths, previous->carried_continuations);
232
233
    avx512_checkContinuations(initial_lengths, pb.carried_continuations,
234
            has_error);
235
236
    __m512i off1_current_bytes =
237
        avx512_push_last_byte_of_a_to_b(previous->rawbytes, pb.rawbytes);
238
    avx512_checkFirstContinuationMax(current_bytes, off1_current_bytes,
239
            has_error);
240
241
    avx512_checkOverlong(current_bytes, off1_current_bytes, pb.high_nibbles,
242
            previous->high_nibbles, has_error);
243
    return pb;
244
}
245
246
static bool validate_utf8_fast_avx512_asciipath(const char *src, size_t len) {
247
    size_t i = 0;
248
    __mmask64 has_error = 0;
249
    struct avx512_processed_utf_bytes previous = {
250
        .rawbytes = _mm512_setzero_si512(),
251
        .high_nibbles = _mm512_setzero_si512(),
252
        .carried_continuations = _mm512_setzero_si512()
253
    };
254
    if (len >= 64) {
255
        for (; i <= len - 64; i += 64) {
256
            __m512i current_bytes = _mm512_loadu_si512((const __m512i *)(src + i));
257
            previous =
258
                avx512_checkUTF8Bytes_asciipath(current_bytes, &previous, &has_error);
259
        }
260
    }
261
262
    // last part
263
    if (i < len) {
264
        char buffer[64];
265
        memset(buffer, 0, 64);
266
        memcpy(buffer, src + i, len - i);
267
        __m512i current_bytes = _mm512_loadu_si512((const __m512i *)(buffer));
268
        previous = avx512_checkUTF8Bytes(current_bytes, &previous, &has_error);
269
    } else {
270
        has_error = _kor_mask64(
271
                has_error,
272
                _mm512_cmpgt_epi8_mask(
273
                    previous.carried_continuations,
274
                    _mm512_setr_epi32(0x09090909, 0x09090909, 0x09090909, 0x09090909,
275
                        0x09090909, 0x09090909, 0x09090909, 0x09090909,
276
                        0x09090909, 0x09090909, 0x09090909, 0x09090909,
277
                        0x09090909, 0x09090909, 0x09090909, 0x01090909)));
278
    }
279
280
    return !has_error;
281
}
282
283
bool utf8_check(const char *src, size_t len) {
284
    size_t i = 0;
285
    __mmask64 has_error = 0;
286
    struct avx512_processed_utf_bytes previous = {
287
        .rawbytes = _mm512_setzero_si512(),
288
        .high_nibbles = _mm512_setzero_si512(),
289
        .carried_continuations = _mm512_setzero_si512()
290
    };
291
    if (len >= 64) {
292
        for (; i <= len - 64; i += 64) {
293
            __m512i current_bytes = _mm512_loadu_si512((const __m512i *)(src + i));
294
            previous = avx512_checkUTF8Bytes(current_bytes, &previous, &has_error);
295
        }
296
    }
297
298
    // last part
299
    if (i < len) {
300
        char buffer[64];
301
        memset(buffer, 0, 64);
302
        memcpy(buffer, src + i, len - i);
303
        __m512i current_bytes = _mm512_loadu_si512((const __m512i *)(buffer));
304
        previous = avx512_checkUTF8Bytes(current_bytes, &previous, &has_error);
305
    } else {
306
        has_error = _kor_mask64(
307
                has_error,
308
                _mm512_cmpgt_epi8_mask(
309
                    previous.carried_continuations,
310
                    _mm512_setr_epi32(0x09090909, 0x09090909, 0x09090909, 0x09090909,
311
                        0x09090909, 0x09090909, 0x09090909, 0x09090909,
312
                        0x09090909, 0x09090909, 0x09090909, 0x09090909,
313
                        0x09090909, 0x09090909, 0x09090909, 0x01090909)));
314
    }
315
316
    return !has_error;
317
}
318
319
#elif defined(__AVX2__) && defined(__AVX__)
320
321
/*****************************/
322
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
323
    return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
324
}
325
326
static inline __m256i push_last_2bytes_of_a_to_b(__m256i a, __m256i b) {
327
    return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14);
328
}
329
330
// all byte values must be no larger than 0xF4
331
static inline void avxcheckSmallerThan0xF4(__m256i current_bytes,
332
        __m256i *has_error) {
333
    // unsigned, saturates to 0 below max
334
    *has_error = _mm256_or_si256(
335
            *has_error, _mm256_subs_epu8(current_bytes, _mm256_set1_epi8((char)0xF4)));
336
}
337
338
static inline __m256i avxcontinuationLengths(__m256i high_nibbles) {
339
    return _mm256_shuffle_epi8(
340
            _mm256_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
341
                0, 0, 0, 0,             // 10xx (continuation)
342
                2, 2,                   // 110x
343
                3,                      // 1110
344
                4, // 1111, next should be 0 (not checked here)
345
                1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
346
                0, 0, 0, 0,             // 10xx (continuation)
347
                2, 2,                   // 110x
348
                3,                      // 1110
349
                4 // 1111, next should be 0 (not checked here)
350
                ),
351
            high_nibbles);
352
}
353
354
static inline __m256i avxcarryContinuations(__m256i initial_lengths,
355
        __m256i previous_carries) {
356
357
    __m256i right1 = _mm256_subs_epu8(
358
            push_last_byte_of_a_to_b(previous_carries, initial_lengths),
359
            _mm256_set1_epi8(1));
360
    __m256i sum = _mm256_add_epi8(initial_lengths, right1);
361
362
    __m256i right2 = _mm256_subs_epu8(
363
            push_last_2bytes_of_a_to_b(previous_carries, sum), _mm256_set1_epi8(2));
364
    return _mm256_add_epi8(sum, right2);
365
}
366
367
static inline void avxcheckContinuations(__m256i initial_lengths,
368
        __m256i carries, __m256i *has_error) {
369
370
    // overlap || underlap
371
    // carry > length && length > 0 || !(carry > length) && !(length > 0)
372
    // (carries > length) == (lengths > 0)
373
    __m256i overunder = _mm256_cmpeq_epi8(
374
            _mm256_cmpgt_epi8(carries, initial_lengths),
375
            _mm256_cmpgt_epi8(initial_lengths, _mm256_setzero_si256()));
376
377
    *has_error = _mm256_or_si256(*has_error, overunder);
378
}
379
380
// when 0xED is found, next byte must be no larger than 0x9F
381
// when 0xF4 is found, next byte must be no larger than 0x8F
382
// next byte must be continuation, ie sign bit is set, so signed < is ok
383
static inline void avxcheckFirstContinuationMax(__m256i current_bytes,
384
        __m256i off1_current_bytes,
385
        __m256i *has_error) {
386
    __m256i maskED =
387
        _mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8((char)0xED));
388
    __m256i maskF4 =
389
        _mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8((char)0xF4));
390
391
    __m256i badfollowED = _mm256_and_si256(
392
            _mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8((char)0x9F)), maskED);
393
    __m256i badfollowF4 = _mm256_and_si256(
394
            _mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8((char)0x8F)), maskF4);
395
396
    *has_error =
397
        _mm256_or_si256(*has_error, _mm256_or_si256(badfollowED, badfollowF4));
398
}
399
400
// map off1_hibits => error condition
401
// hibits     off1    cur
402
// C       => < C2 && true
403
// E       => < E1 && < A0
404
// F       => < F1 && < 90
405
// else      false && false
406
static inline void avxcheckOverlong(__m256i current_bytes,
407
        __m256i off1_current_bytes, __m256i hibits,
408
        __m256i previous_hibits,
409
        __m256i *has_error) {
410
    __m256i off1_hibits = push_last_byte_of_a_to_b(previous_hibits, hibits);
411
    __m256i initial_mins = _mm256_shuffle_epi8(
412
            _mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
413
                -128, -128, -128, // 10xx => false
414
                (char)0xC2, -128,       // 110x
415
                (char)0xE1,             // 1110
416
                (char)0xF1, -128, -128, -128, -128, -128, -128, -128, -128,
417
                -128, -128, -128, -128, // 10xx => false
418
                (char)0xC2, -128,             // 110x
419
                (char)0xE1,                   // 1110
420
                (char)0xF1),
421
            off1_hibits);
422
423
    __m256i initial_under = _mm256_cmpgt_epi8(initial_mins, off1_current_bytes);
424
425
    __m256i second_mins = _mm256_shuffle_epi8(
426
            _mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
427
                -128, -128, -128, // 10xx => false
428
                127, 127,         // 110x => true
429
                (char)0xA0,             // 1110
430
                (char)0x90, -128, -128, -128, -128, -128, -128, -128, -128,
431
                -128, -128, -128, -128, // 10xx => false
432
                127, 127,               // 110x => true
433
                (char)0xA0,                   // 1110
434
                (char)0x90),
435
            off1_hibits);
436
    __m256i second_under = _mm256_cmpgt_epi8(second_mins, current_bytes);
437
    *has_error = _mm256_or_si256(*has_error,
438
            _mm256_and_si256(initial_under, second_under));
439
}
440
441
struct avx_processed_utf_bytes {
442
    __m256i rawbytes;
443
    __m256i high_nibbles;
444
    __m256i carried_continuations;
445
};
446
447
static inline void avx_count_nibbles(__m256i bytes,
448
        struct avx_processed_utf_bytes *answer) {
449
    answer->rawbytes = bytes;
450
    answer->high_nibbles =
451
        _mm256_and_si256(_mm256_srli_epi16(bytes, 4), _mm256_set1_epi8((char)0x0F));
452
}
453
454
// check whether the current bytes are valid UTF-8
455
// at the end of the function, previous gets updated
456
static struct avx_processed_utf_bytes
457
avxcheckUTF8Bytes(__m256i current_bytes,
458
        struct avx_processed_utf_bytes *previous,
459
        __m256i *has_error) {
460
    struct avx_processed_utf_bytes pb;
461
    avx_count_nibbles(current_bytes, &pb);
462
463
    avxcheckSmallerThan0xF4(current_bytes, has_error);
464
465
    __m256i initial_lengths = avxcontinuationLengths(pb.high_nibbles);
466
467
    pb.carried_continuations =
468
        avxcarryContinuations(initial_lengths, previous->carried_continuations);
469
470
    avxcheckContinuations(initial_lengths, pb.carried_continuations, has_error);
471
472
    __m256i off1_current_bytes =
473
        push_last_byte_of_a_to_b(previous->rawbytes, pb.rawbytes);
474
    avxcheckFirstContinuationMax(current_bytes, off1_current_bytes, has_error);
475
476
    avxcheckOverlong(current_bytes, off1_current_bytes, pb.high_nibbles,
477
            previous->high_nibbles, has_error);
478
    return pb;
479
}
480
481
bool utf8_check(const char *src, size_t len) {
482
    size_t i = 0;
483
    __m256i has_error = _mm256_setzero_si256();
484
    struct avx_processed_utf_bytes previous = {
485
        .rawbytes = _mm256_setzero_si256(),
486
        .high_nibbles = _mm256_setzero_si256(),
487
        .carried_continuations = _mm256_setzero_si256()};
488
    if (len >= 32) {
489
        for (; i <= len - 32; i += 32) {
490
            __m256i current_bytes = _mm256_loadu_si256((const __m256i *)(src + i));
491
            previous = avxcheckUTF8Bytes(current_bytes, &previous, &has_error);
492
        }
493
    }
494
495
    // last part
496
    if (i < len) {
497
        char buffer[32];
498
        memset(buffer, 0, 32);
499
        memcpy(buffer, src + i, len - i);
500
        __m256i current_bytes = _mm256_loadu_si256((const __m256i *)(buffer));
501
        previous = avxcheckUTF8Bytes(current_bytes, &previous, &has_error);
502
    } else {
503
        has_error = _mm256_or_si256(
504
                _mm256_cmpgt_epi8(previous.carried_continuations,
505
                    _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
506
                        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
507
                        9, 9, 9, 9, 9, 9, 9, 1)),
508
                has_error);
509
    }
510
511
    return _mm256_testz_si256(has_error, has_error);
512
}
513
514
#elif defined(__SSE2__)
515
516
/*
517
 * legal utf-8 byte sequence
518
 * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
519
 *
520
 *  Code Points        1st       2s       3s       4s
521
 * U+0000..U+007F     00..7F
522
 * U+0080..U+07FF     C2..DF   80..BF
523
 * U+0800..U+0FFF     E0       A0..BF   80..BF
524
 * U+1000..U+CFFF     E1..EC   80..BF   80..BF
525
 * U+D000..U+D7FF     ED       80..9F   80..BF
526
 * U+E000..U+FFFF     EE..EF   80..BF   80..BF
527
 * U+10000..U+3FFFF   F0       90..BF   80..BF   80..BF
528
 * U+40000..U+FFFFF   F1..F3   80..BF   80..BF   80..BF
529
 * U+100000..U+10FFFF F4       80..8F   80..BF   80..BF
530
 *
531
 */
532
533
// all byte values must be no larger than 0xF4
534
static inline void checkSmallerThan0xF4(__m128i current_bytes,
535
        __m128i *has_error) {
536
    // unsigned, saturates to 0 below max
537
    *has_error = _mm_or_si128(*has_error,
538
            _mm_subs_epu8(current_bytes, _mm_set1_epi8((char)0xF4)));
539
}
540
541
static inline __m128i continuationLengths(__m128i high_nibbles) {
542
    return _mm_shuffle_epi8(
543
            _mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
544
                0, 0, 0, 0,             // 10xx (continuation)
545
                2, 2,                   // 110x
546
                3,                      // 1110
547
                4), // 1111, next should be 0 (not checked here)
548
            high_nibbles);
549
}
550
551
static inline __m128i carryContinuations(__m128i initial_lengths,
552
        __m128i previous_carries) {
553
554
    __m128i right1 =
555
        _mm_subs_epu8(_mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1),
556
                _mm_set1_epi8(1));
557
    __m128i sum = _mm_add_epi8(initial_lengths, right1);
558
559
    __m128i right2 = _mm_subs_epu8(_mm_alignr_epi8(sum, previous_carries, 16 - 2),
560
            _mm_set1_epi8(2));
561
    return _mm_add_epi8(sum, right2);
562
}
563
564
static inline void checkContinuations(__m128i initial_lengths, __m128i carries,
565
        __m128i *has_error) {
566
567
    // overlap || underlap
568
    // carry > length && length > 0 || !(carry > length) && !(length > 0)
569
    // (carries > length) == (lengths > 0)
570
    __m128i overunder =
571
        _mm_cmpeq_epi8(_mm_cmpgt_epi8(carries, initial_lengths),
572
                _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128()));
573
574
    *has_error = _mm_or_si128(*has_error, overunder);
575
}
576
577
// when 0xED is found, next byte must be no larger than 0x9F
578
// when 0xF4 is found, next byte must be no larger than 0x8F
579
// next byte must be continuation, ie sign bit is set, so signed < is ok
580
static inline void checkFirstContinuationMax(__m128i current_bytes,
581
        __m128i off1_current_bytes,
582
        __m128i *has_error) {
583
    __m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8((char)0xED));
584
    __m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8((char)0xF4));
585
586
    __m128i badfollowED =
587
        _mm_and_si128(_mm_cmpgt_epi8(current_bytes, _mm_set1_epi8((char)0x9F)), maskED);
588
    __m128i badfollowF4 =
589
        _mm_and_si128(_mm_cmpgt_epi8(current_bytes, _mm_set1_epi8((char)0x8F)), maskF4);
590
591
    *has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollowED, badfollowF4));
592
}
593
594
// map off1_hibits => error condition
595
// hibits     off1    cur
596
// C       => < C2 && true
597
// E       => < E1 && < A0
598
// F       => < F1 && < 90
599
// else      false && false
600
static inline void checkOverlong(__m128i current_bytes,
601
        __m128i off1_current_bytes, __m128i hibits,
602
        __m128i previous_hibits, __m128i *has_error) {
603
    __m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1);
604
    __m128i initial_mins = _mm_shuffle_epi8(
605
            _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
606
                -128, -128, // 10xx => false
607
                (char)0xC2, -128, // 110x
608
                (char)0xE1,       // 1110
609
                (char)0xF1),
610
            off1_hibits);
611
612
    __m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes);
613
614
    __m128i second_mins = _mm_shuffle_epi8(
615
            _mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
616
                -128, -128, // 10xx => false
617
                127, 127,   // 110x => true
618
                (char)0xA0,       // 1110
619
                (char)0x90),
620
            off1_hibits);
621
    __m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes);
622
    *has_error =
623
        _mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under));
624
}
625
626
struct processed_utf_bytes {
627
    __m128i rawbytes;
628
    __m128i high_nibbles;
629
    __m128i carried_continuations;
630
};
631
632
static inline void count_nibbles(__m128i bytes,
633
        struct processed_utf_bytes *answer) {
634
    answer->rawbytes = bytes;
635
    answer->high_nibbles =
636
        _mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8((char)0x0F));
637
}
638
639
// check whether the current bytes are valid UTF-8
640
// at the end of the function, previous gets updated
641
static struct processed_utf_bytes
642
checkUTF8Bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
643
        __m128i *has_error) {
644
    struct processed_utf_bytes pb;
645
    count_nibbles(current_bytes, &pb);
646
647
    checkSmallerThan0xF4(current_bytes, has_error);
648
649
    __m128i initial_lengths = continuationLengths(pb.high_nibbles);
650
651
    pb.carried_continuations =
652
        carryContinuations(initial_lengths, previous->carried_continuations);
653
654
    checkContinuations(initial_lengths, pb.carried_continuations, has_error);
655
656
    __m128i off1_current_bytes =
657
        _mm_alignr_epi8(pb.rawbytes, previous->rawbytes, 16 - 1);
658
    checkFirstContinuationMax(current_bytes, off1_current_bytes, has_error);
659
660
    checkOverlong(current_bytes, off1_current_bytes, pb.high_nibbles,
661
            previous->high_nibbles, has_error);
662
    return pb;
663
}
664
665
bool utf8_check(const char *src, size_t len) {
666
    size_t i = 0;
667
    __m128i has_error = _mm_setzero_si128();
668
    struct processed_utf_bytes previous = {.rawbytes = _mm_setzero_si128(),
669
        .high_nibbles = _mm_setzero_si128(),
670
        .carried_continuations =
671
            _mm_setzero_si128()};
672
    if (len >= 16) {
673
        for (; i <= len - 16; i += 16) {
674
            __m128i current_bytes = _mm_loadu_si128((const __m128i *)(src + i));
675
            previous = checkUTF8Bytes(current_bytes, &previous, &has_error);
676
        }
677
    }
678
679
    // last part
680
    if (i < len) {
681
        char buffer[16];
682
        memset(buffer, 0, 16);
683
        memcpy(buffer, src + i, len - i);
684
        __m128i current_bytes = _mm_loadu_si128((const __m128i *)(buffer));
685
        previous = checkUTF8Bytes(current_bytes, &previous, &has_error);
686
    } else {
687
        has_error =
688
            _mm_or_si128(_mm_cmpgt_epi8(previous.carried_continuations,
689
                        _mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
690
                            9, 9, 9, 9, 9, 1)),
691
                    has_error);
692
    }
693
694
    return _mm_testz_si128(has_error, has_error);
695
}
696
#else
697
// credit: @hoehrmann
698
699
// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
700
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
701
702
static const uint8_t utf8d[] = {
703
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
704
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
705
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 00..1f
706
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
707
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
708
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 20..3f
709
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
710
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
711
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 40..5f
712
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
713
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
714
    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 60..7f
715
    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
716
    1,   1,   1,   1,   1,   9,   9,   9,   9,   9,   9,
717
    9,   9,   9,   9,   9,   9,   9,   9,   9,   9, // 80..9f
718
    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
719
    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
720
    7,   7,   7,   7,   7,   7,   7,   7,   7,   7, // a0..bf
721
    8,   8,   2,   2,   2,   2,   2,   2,   2,   2,   2,
722
    2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
723
    2,   2,   2,   2,   2,   2,   2,   2,   2,   2, // c0..df
724
    0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
725
    0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
726
    0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
727
    0x8, 0x8, 0x8, 0x8, 0x8 // f0..ff
728
};
729
730
static const uint8_t shifted_utf8d_transition[] = {
731
    0x0,  0x10, 0x20, 0x30, 0x50, 0x80, 0x70, 0x10, 0x10, 0x10, 0x40, 0x60,
732
    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
733
    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x0,  0x10, 0x10,
734
    0x10, 0x10, 0x10, 0x0,  0x10, 0x0,  0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
735
    0x10, 0x20, 0x10, 0x10, 0x10, 0x10, 0x10, 0x20, 0x10, 0x20, 0x10, 0x10,
736
    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x20,
737
    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x20, 0x10, 0x10,
738
    0x10, 0x10, 0x10, 0x10, 0x10, 0x20, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
739
    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x30, 0x10, 0x30, 0x10, 0x10,
740
    0x10, 0x10, 0x10, 0x10, 0x10, 0x30, 0x10, 0x10, 0x10, 0x10, 0x10, 0x30,
741
    0x10, 0x30, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x30, 0x10, 0x10,
742
    0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
743
};
744
745
static inline uint32_t shiftless_updatestate(uint32_t *state, uint32_t byte) {
746
  uint32_t type = utf8d[byte];
747
  *state = shifted_utf8d_transition[*state + type];
748
  return *state;
749
}
750
751
/* shiftless_validate_dfa_utf8_branchless */
752
bool utf8_check(const char *src, size_t len) {
753
    uint32_t byteval;
754
    const unsigned char *cu = (const unsigned char *)src;
755
    uint32_t state = 0;
756
757
    for (size_t i = 0; i < len; i++) {
758
        byteval = (uint32_t)cu[i];
759
        shiftless_updatestate(&state, byteval);
760
    }
761
762
    byteval = (uint32_t)'\0';
763
    shiftless_updatestate(&state, byteval);
764
765
    return state != 16;
766
}
767
768
#endif