daren_project/venv/Lib/site-packages/autobahn/nvx/_utf8validator.c

649 lines
17 KiB
C

///////////////////////////////////////////////////////////////////////////////
//
// The MIT License (MIT)
//
// Copyright (c) typedef int GmbH
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
///////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <stdint.h>
// http://stackoverflow.com/questions/11228855/header-files-for-simd-intrinsics
#if defined(__SSE2__) || defined(__SSE4_1__)
#include <x86intrin.h>
#endif
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
typedef struct {
size_t current_index;
size_t total_index;
int state;
int impl;
} utf8_validator_t;
#define UTF8_VALIDATOR_OPTIMAL 0
#define UTF8_VALIDATOR_TABLE_DFA 1
#define UTF8_VALIDATOR_UNROLLED_DFA 2
#define UTF8_VALIDATOR_SSE2_DFA 3
#define UTF8_VALIDATOR_SSE41_DFA 4
int nvx_utf8vld_get_impl (void* utf8vld) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
return vld->impl;
}
int nvx_utf8vld_set_impl (void* utf8vld, int impl) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
if (impl) {
// set requested implementation
//
#ifndef __SSE4_1__
# ifdef __SSE2__
if (impl <= UTF8_VALIDATOR_SSE2_DFA) {
vld->impl = impl;
}
# else
if (impl <= UTF8_VALIDATOR_UNROLLED_DFA) {
vld->impl = impl;
}
# endif
#else
if (impl <= UTF8_VALIDATOR_SSE41_DFA) {
vld->impl = impl;
}
#endif
} else {
// set optimal implementation
//
#ifndef __SSE4_1__
# ifdef __SSE2__
vld->impl = UTF8_VALIDATOR_SSE2_DFA;
# else
vld->impl = UTF8_VALIDATOR_UNROLLED_DFA;
# endif
#else
vld->impl = UTF8_VALIDATOR_SSE41_DFA;
#endif
}
return vld->impl;
}
void nvx_utf8vld_reset (void* utf8vld) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
vld->state = 0;
vld->current_index = -1;
vld->total_index = -1;
}
void* nvx_utf8vld_new () {
void* p = malloc(sizeof(utf8_validator_t));
nvx_utf8vld_reset(p);
nvx_utf8vld_set_impl(p, 0);
return p;
}
void nvx_utf8vld_free (void* utf8vld) {
free (utf8vld);
}
// unrolled DFA from http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
//
static const uint8_t UTF8VALIDATOR_DFA[] __attribute__((aligned(64))) =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // s7..s8
};
int _nvx_utf8vld_validate_table (void* utf8vld, const uint8_t* data, size_t length) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
int state = vld->state;
const uint8_t* end = data + length;
while (data < end && state != 1) {
state = UTF8VALIDATOR_DFA[256 + state * 16 + UTF8VALIDATOR_DFA[*data++]];
}
vld->state = state;
if (state == 0) {
// UTF8 is valid and ends on codepoint
return 0;
} else {
if (state == 1) {
// UTF8 is invalid
return -1;
} else {
// UTF8 is valid, but does not end on codepoint (needs more data)
return 1;
}
}
}
// unrolled DFA from http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
//
#define DFA_TRANSITION(state, octet) \
if (state == 0) { \
if (octet >= 0x00 && octet <= 0x7f) { \
/* reflective state 0 */ \
} else if (octet >= 0xc2 && octet <= 0xdf) { \
state = 2; \
} else if ((octet >= 0xe1 && octet <= 0xec) || octet == 0xee || octet == 0xef) { \
state = 3; \
} else if (octet == 0xe0) { \
state = 4; \
} else if (octet == 0xed) { \
state = 5; \
} else if (octet == 0xf4) { \
state = 8; \
} else if (octet == 0xf1 || octet == 0xf2 || octet == 0xf3) { \
state = 7; \
} else if (octet == 0xf0) { \
state = 6; \
} else { \
state = 1; \
} \
} else if (state == 2) { \
if (octet >= 0x80 && octet <= 0xbf) { \
state = 0; \
} else { \
state = 1; \
} \
} else if (state == 3) { \
if (octet >= 0x80 && octet <= 0xbf) { \
state = 2; \
} else { \
state = 1; \
} \
} else if (state == 4) { \
if (octet >= 0xa0 && octet <= 0xbf) { \
state = 2; \
} else { \
state = 1; \
} \
} else if (state == 5) { \
if (octet >= 0x80 && octet <= 0x9f) { \
state = 2; \
} else { \
state = 1; \
} \
} else if (state == 6) { \
if (octet >= 0x90 && octet <= 0xbf) { \
state = 3; \
} else { \
state = 1; \
} \
} else if (state == 7) { \
if (octet >= 0x80 && octet <= 0xbf) { \
state = 3; \
} else { \
state = 1; \
} \
} else if (state == 8) { \
if (octet >= 0x80 && octet <= 0x8f) { \
state = 3; \
} else { \
state = 1; \
} \
} else if (state == 1) { \
/* refective state 1 */ \
} else { \
/* should not arrive here */ \
}
int _nvx_utf8vld_validate_unrolled (void* utf8vld, const uint8_t* data, size_t length) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
int state = vld->state;
const uint8_t* tail_end = data + length;
while (data < tail_end && state != 1) {
// get tail octet
int octet = *data;
// do the DFA
DFA_TRANSITION(state, octet);
++data;
}
vld->state = state;
if (state == 0) {
// UTF8 is valid and ends on codepoint
return 0;
} else {
if (state == 1) {
// UTF8 is invalid
return -1;
} else {
// UTF8 is valid, but does not end on codepoint (needs more data)
return 1;
}
}
}
/*
__m128i _mm_load_si128 (__m128i const* mem_addr)
#include "emmintrin.h"
Instruction: movdqa
CPUID Feature Flag: SSE2
int _mm_movemask_epi8 (__m128i a)
#include "emmintrin.h"
Instruction: pmovmskb
CPUID Feature Flag: SSE2
__m128i _mm_srli_si128 (__m128i a, int imm)
#include "emmintrin.h"
Instruction: psrldq
CPUID Feature Flag: SSE2
int _mm_cvtsi128_si32 (__m128i a)
#include "emmintrin.h"
Instruction: movd
CPUID Feature Flag: SSE2
int _mm_extract_epi16 (__m128i a, int imm)
#include "emmintrin.h"
Instruction: pextrw
CPUID Feature Flag: SSE2
int _mm_extract_epi8 (__m128i a, const int imm)
#include "smmintrin.h"
Instruction: pextrb
CPUID Feature Flag: SSE4.1
*/
#ifdef __SSE2__
int _nvx_utf8vld_validate_sse2 (void* utf8vld, const uint8_t* data, size_t length) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
int state = vld->state;
const uint8_t* tail_end = data + length;
// process unaligned head (sub 16 octets)
//
size_t head_len = ((size_t) data) % sizeof(__m128i);
if (head_len) {
const uint8_t* head_end = data + head_len;
while (data < head_end && state != UTF8_REJECT) {
// get head octet
int octet = *data;
// do the DFA
DFA_TRANSITION(state, octet);
++data;
}
}
// process aligned middle (16 octet chunks)
//
const __m128i* ptr = ((const __m128i*) data);
const __m128i* end = ((const __m128i*) data) + ((length - head_len) / sizeof(__m128i));
while (ptr < end && state != UTF8_REJECT) {
__builtin_prefetch(ptr + 1, 0, 3);
//__builtin_prefetch(ptr + 4, 0, 3); // 16*4=64: cache-line prefetch
__m128i xmm1 = _mm_load_si128(ptr);
if (__builtin_expect(state || _mm_movemask_epi8(xmm1), 0)) {
// copy to different reg - this allows the prefetching to
// do its job in the meantime (I guess ..)
// SSE2 variant
//
int octet;
// octet 0
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 1
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 2
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 3
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 4
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 5
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 6
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 7
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 8
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 9
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 10
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 11
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 12
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 13
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 14
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
// octet 15
xmm1 = _mm_srli_si128(xmm1, 1);
octet = 0xff & _mm_cvtsi128_si32(xmm1);
DFA_TRANSITION(state, octet);
}
++ptr;
}
// process unaligned tail (sub 16 octets)
//
const uint8_t* tail_ptr = (const uint8_t*) ptr;
while (tail_ptr < tail_end && state != UTF8_REJECT) {
// get tail octet
int octet = *tail_ptr;
// do the DFA
DFA_TRANSITION(state, octet);
++tail_ptr;
}
vld->state = state;
if (state == UTF8_ACCEPT) {
// UTF8 is valid and ends on codepoint
return 0;
} else {
if (state == UTF8_REJECT) {
// UTF8 is invalid
return -1;
} else {
// UTF8 is valid, but does not end on codepoint (needs more data)
return 1;
}
}
}
#endif
#ifdef __SSE4_1__
int _nvx_utf8vld_validate_sse4 (void* utf8vld, const uint8_t* data, size_t length) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
int state = vld->state;
const uint8_t* tail_end = data + length;
// process unaligned head (sub 16 octets)
//
size_t head_len = ((size_t) data) % sizeof(__m128i);
if (head_len) {
const uint8_t* head_end = data + head_len;
while (data < head_end && state != UTF8_REJECT) {
// get head octet
int octet = *data;
// do the DFA
DFA_TRANSITION(state, octet);
++data;
}
}
// process aligned middle (16 octet chunks)
//
const __m128i* ptr = ((const __m128i*) data);
const __m128i* end = ((const __m128i*) data) + ((length - head_len) / sizeof(__m128i));
while (ptr < end && state != UTF8_REJECT) {
__builtin_prefetch(ptr + 1, 0, 3);
//__builtin_prefetch(ptr + 4, 0, 3); // 16*4=64: cache-line prefetch
__m128i xmm1 = _mm_load_si128(ptr);
if (__builtin_expect(state || _mm_movemask_epi8(xmm1), 0)) {
// copy to different reg - this allows the prefetching to
// do its job in the meantime (I guess ..)
// SSE4.1 variant
//
int octet;
// octet 0
octet = _mm_extract_epi8(xmm1, 0);
DFA_TRANSITION(state, octet);
// octet 1
octet = _mm_extract_epi8(xmm1, 1);
DFA_TRANSITION(state, octet);
// octet 2
octet = _mm_extract_epi8(xmm1, 2);
DFA_TRANSITION(state, octet);
// octet 3
octet = _mm_extract_epi8(xmm1, 3);
DFA_TRANSITION(state, octet);
// octet 4
octet = _mm_extract_epi8(xmm1, 4);
DFA_TRANSITION(state, octet);
// octet 5
octet = _mm_extract_epi8(xmm1, 5);
DFA_TRANSITION(state, octet);
// octet 6
octet = _mm_extract_epi8(xmm1, 6);
DFA_TRANSITION(state, octet);
// octet 7
octet = _mm_extract_epi8(xmm1, 7);
DFA_TRANSITION(state, octet);
// octet 8
octet = _mm_extract_epi8(xmm1, 8);
DFA_TRANSITION(state, octet);
// octet 9
octet = _mm_extract_epi8(xmm1, 9);
DFA_TRANSITION(state, octet);
// octet 10
octet = _mm_extract_epi8(xmm1, 10);
DFA_TRANSITION(state, octet);
// octet 11
octet = _mm_extract_epi8(xmm1, 11);
DFA_TRANSITION(state, octet);
// octet 12
octet = _mm_extract_epi8(xmm1, 12);
DFA_TRANSITION(state, octet);
// octet 13
octet = _mm_extract_epi8(xmm1, 13);
DFA_TRANSITION(state, octet);
// octet 14
octet = _mm_extract_epi8(xmm1, 14);
DFA_TRANSITION(state, octet);
// octet 15
octet = _mm_extract_epi8(xmm1, 15);
DFA_TRANSITION(state, octet);
}
++ptr;
}
// process unaligned tail (sub 16 octets)
//
const uint8_t* tail_ptr = (const uint8_t*) ptr;
while (tail_ptr < tail_end && state != UTF8_REJECT) {
// get tail octet
int octet = *tail_ptr;
// do the DFA
DFA_TRANSITION(state, octet);
++tail_ptr;
}
vld->state = state;
if (state == UTF8_ACCEPT) {
// UTF8 is valid and ends on codepoint
return 0;
} else {
if (state == UTF8_REJECT) {
// UTF8 is invalid
return -1;
} else {
// UTF8 is valid, but does not end on codepoint (needs more data)
return 1;
}
}
}
#endif
int nvx_utf8vld_validate (void* utf8vld, const uint8_t* data, size_t length) {
utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
switch (vld->impl) {
case UTF8_VALIDATOR_TABLE_DFA:
return _nvx_utf8vld_validate_table(utf8vld, data, length);
case UTF8_VALIDATOR_UNROLLED_DFA:
return _nvx_utf8vld_validate_unrolled(utf8vld, data, length);
#ifdef __SSE2__
case UTF8_VALIDATOR_SSE2_DFA:
return _nvx_utf8vld_validate_table(utf8vld, data, length);
#endif
#ifdef __SSE4_1__
case UTF8_VALIDATOR_SSE41_DFA:
return _nvx_utf8vld_validate_table(utf8vld, data, length);
#endif
default:
return _nvx_utf8vld_validate_table(utf8vld, data, length);
}
}