decancer 3.3.3
A library that removes common unicode confusables/homoglyphs from strings.
No such query exists :(
Functions
Type definitions
Macros
Loading...
Searching...
No Matches
/home/runner/work/decancer/decancer/bindings/native/decancer.h
Go to the documentation of this file.
1
10
11#ifndef __DECANCER_H__
12#define __DECANCER_H__
13
14#include <stdbool.h>
15#include <stddef.h>
16#include <stdint.h>
17
18#if defined(_WIN32) && !defined(DECANCER_STATIC)
19#define DECANCER_EXPORT __declspec(dllimport)
20#else
21#define DECANCER_EXPORT
22#endif
23
32#define DECANCER_VERSION 0x030303
33
42#define DECANCER_VERSION_MAJOR ((DECANCER_VERSION & 0xff0000) >> 16)
43
52#define DECANCER_VERSION_MINOR ((DECANCER_VERSION & 0xff00) >> 8)
53
62#define DECANCER_VERSION_PATCH (DECANCER_VERSION & 0xff)
63
68#define DECANCER_TRANSLATION_KIND_CHARACTER 0
69
74#define DECANCER_TRANSLATION_KIND_STRING 1
75
82#define DECANCER_TRANSLATION_KIND_NONE 2
83
90#define DECANCER_OPTION_DEFAULT 0
91
98#define DECANCER_OPTION_RETAIN_CAPITALIZATION (1 << 0)
99
111#define DECANCER_OPTION_DISABLE_BIDI (1 << 1)
112
119#define DECANCER_OPTION_RETAIN_DIACRITICS (1 << 2)
120
125#define DECANCER_OPTION_RETAIN_GREEK (1 << 3)
126
131#define DECANCER_OPTION_RETAIN_CYRILLIC (1 << 4)
132
137#define DECANCER_OPTION_RETAIN_HEBREW (1 << 5)
138
143#define DECANCER_OPTION_RETAIN_ARABIC (1 << 6)
144
149#define DECANCER_OPTION_RETAIN_DEVANAGARI (1 << 7)
150
155#define DECANCER_OPTION_RETAIN_BENGALI (1 << 8)
156
161#define DECANCER_OPTION_RETAIN_ARMENIAN (1 << 9)
162
167#define DECANCER_OPTION_RETAIN_GUJARATI (1 << 10)
168
173#define DECANCER_OPTION_RETAIN_TAMIL (1 << 11)
174
179#define DECANCER_OPTION_RETAIN_THAI (1 << 12)
180
185#define DECANCER_OPTION_RETAIN_LAO (1 << 13)
186
191#define DECANCER_OPTION_RETAIN_BURMESE (1 << 14)
192
197#define DECANCER_OPTION_RETAIN_KHMER (1 << 15)
198
203#define DECANCER_OPTION_RETAIN_MONGOLIAN (1 << 16)
204
209#define DECANCER_OPTION_RETAIN_CHINESE (1 << 17)
210
218#define DECANCER_OPTION_RETAIN_JAPANESE (1 << 18)
219
224#define DECANCER_OPTION_RETAIN_KOREAN (1 << 19)
225
230#define DECANCER_OPTION_RETAIN_BRAILLE (1 << 20)
231
236#define DECANCER_OPTION_RETAIN_EMOJIS (1 << 21)
237
245#define DECANCER_OPTION_RETAIN_TURKISH (1 << 22)
246
253#define DECANCER_OPTION_ASCII_ONLY (1 << 23)
254
261#define DECANCER_OPTION_ALPHANUMERIC_ONLY (1 << 24)
262
267#define DECANCER_OPTION_ALL 0x1ffffff
268
273#define DECANCER_OPTION_PURE_HOMOGLYPH 0x3ffffc
274
289typedef struct {
294 const char* message;
295
302
303#ifndef DECANCER_UTF16_ONLY
319typedef struct {
324 const uint8_t* string;
325
330 size_t size;
332#endif
333
334#ifndef DECANCER_UTF8_ONLY
350typedef struct {
355 const uint16_t* string;
356
361 size_t length;
363
374#endif
375
376#ifndef DECANCER_UTF16_ONLY
389typedef void* decancer_matcher_t;
390#endif
391
392#ifndef DECANCER_UTF8_ONLY
406#endif
407
420typedef void* decancer_matches_t;
421
448typedef struct {
456 uint8_t kind;
457
462 union {
467 uint32_t character;
468
473 struct {
478 const uint8_t* contents;
479
484 size_t size;
485
491 void* __heap;
492 } string;
493 } contents;
495
506typedef void* decancer_cured_t;
507
524typedef struct {
529 size_t start;
530
535 size_t end;
537
573typedef uint32_t decancer_options_t;
574
575#ifdef __cplusplus
576extern "C" {
577#endif
578
579#ifndef DECANCER_UTF16_ONLY
624 DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t* input_str, const size_t input_size, const decancer_options_t options, decancer_error_t* error);
625#endif
626
627#ifndef DECANCER_UTF8_ONLY
678 DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t* input_str, const size_t input_length, const decancer_options_t options, decancer_error_t* error);
679#endif
680
738 DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t* translation);
739
740#ifndef DECANCER_UTF16_ONLY
797 DECANCER_EXPORT const uint8_t* decancer_cured_raw(decancer_cured_t cured, const decancer_match_t* match, size_t* output_size);
798#endif
799
800#ifndef DECANCER_UTF8_ONLY
872 DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t* match, uint16_t** output_ptr, size_t* output_length);
873#endif
874
959 DECANCER_EXPORT const decancer_match_t* decancer_matches_raw(decancer_matches_t matches, size_t* output_size);
960
961#ifndef DECANCER_UTF16_ONLY
1040 DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
1041#endif
1042
1043#ifndef DECANCER_UTF8_ONLY
1134 DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
1135#endif
1136
1137#ifndef DECANCER_UTF16_ONLY
1232 DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length);
1233#endif
1234
1235#ifndef DECANCER_UTF8_ONLY
1345 DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length);
1346#endif
1347
1348#ifndef DECANCER_UTF16_ONLY
1422 DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t* match);
1423#endif
1424
1425#ifndef DECANCER_UTF8_ONLY
1512#endif
1513
1514#ifndef DECANCER_UTF16_ONLY
1577 DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint32_t replacement_char);
1578#endif
1579
1580#ifndef DECANCER_UTF8_ONLY
1660 DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint32_t replacement_char);
1661#endif
1662
1663#ifndef DECANCER_UTF16_ONLY
1726 DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint8_t* replacement_str, const size_t replacement_size);
1727#endif
1728
1729#ifndef DECANCER_UTF8_ONLY
1812 DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
1813#endif
1814
1815#ifndef DECANCER_UTF16_ONLY
1884 DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint32_t replacement_char);
1885#endif
1886
1887#ifndef DECANCER_UTF8_ONLY
1976 DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint32_t replacement_char);
1977#endif
1978
1979#ifndef DECANCER_UTF16_ONLY
2048 DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint8_t* replacement_str, const size_t replacement_size);
2049#endif
2050
2051#ifndef DECANCER_UTF8_ONLY
2143 DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
2144#endif
2145
2146#ifndef DECANCER_UTF16_ONLY
2198 DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2199#endif
2200
2201#ifndef DECANCER_UTF8_ONLY
2262 DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2263#endif
2264
2265#ifndef DECANCER_UTF16_ONLY
2317 DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2318#endif
2319
2320#ifndef DECANCER_UTF8_ONLY
2381 DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2382#endif
2383
2384#ifndef DECANCER_UTF16_ONLY
2436 DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2437#endif
2438
2439#ifndef DECANCER_UTF8_ONLY
2500 DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2501#endif
2502
2503#ifndef DECANCER_UTF16_ONLY
2555 DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2556#endif
2557
2558#ifndef DECANCER_UTF8_ONLY
2619 DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2620
2632
2641 DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle);
2642#endif
2643
2644#ifndef DECANCER_UTF16_ONLY
2718
2729 DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher);
2730#endif
2731
2732#ifndef DECANCER_UTF8_ONLY
2815
2826#endif
2827
2843
2856 DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches);
2857
2868 DECANCER_EXPORT void decancer_translation_init(decancer_translation_t* translation);
2869
2881 DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t* translation_in, decancer_translation_t* translation_out);
2882
2892 DECANCER_EXPORT void decancer_translation_free(decancer_translation_t* translation);
2893
2906
2916 DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured);
2917
2918#undef DECANCER_EXPORT
2919
2920#ifdef __cplusplus
2921} // extern "C"
2922#endif
2923#endif
DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encod...
DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly contains the specified UTF-16 encoded string.
DECANCER_EXPORT const decancer_match_t * decancer_matches_raw(decancer_matches_t matches, size_t *output_size)
Returns the raw list of every similar-looking match from a decancer_matches_t object.
DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle)
Frees the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string is similar with the specified UTF-16 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cured_clone(decancer_cured_t cured)
Clones the cured string object created by decancer_cure and decancer_cure_utf16.
DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Finds every similar-looking match of a UTF-8 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encod...
DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_...
DECANCER_EXPORT void decancer_translation_init(decancer_translation_t *translation)
Initiates a newly created translation struct for use.
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Finds every similar-looking match of a UTF-16 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 enc...
DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured)
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
uint32_t decancer_options_t
An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions.
Definition decancer.h:573
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length)
Retrieves the raw UTF-16 bytes from a cured string object.
void * decancer_matcher_t
Represents a UTF-8 matcher iterator object returned from decancer_find.
Definition decancer.h:389
DECANCER_EXPORT decancer_matches_t decancer_matches_clone(decancer_matches_t matches)
Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-8 keywords in the cured string....
DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly starts with the specified UTF-16 encoded string.
DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor,...
void * decancer_cured_t
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
Definition decancer.h:506
DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-8 encoded string.
void * decancer_cured_raw_utf16_t
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than ret...
Definition decancer.h:373
DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches)
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-16 keywords in the cured string....
DECANCER_EXPORT void decancer_matcher_utf16_free(decancer_matcher_utf16_t matcher)
Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone(decancer_cured_raw_utf16_t raw_utf16_handle)
Clones the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t *translation_in, decancer_translation_t *translation_out)
Clones the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume(decancer_matcher_t matcher)
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-16 encoded string.
DECANCER_EXPORT bool decancer_matcher_utf16_next(decancer_matcher_utf16_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-16 matcher iterator.
DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly contains the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded...
DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly ends with the specified UTF-16 encoded string.
DECANCER_EXPORT void decancer_translation_free(decancer_translation_t *translation)
Frees the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-8 encoded string.
DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t *translation)
Cures a single unicode codepoint.
DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string is similar with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly ends with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-8 matcher iterator.
void * decancer_matcher_utf16_t
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
Definition decancer.h:405
DECANCER_EXPORT const uint8_t * decancer_cured_raw(decancer_cured_t cured, const decancer_match_t *match, size_t *output_size)
Retrieves the raw UTF-8 bytes from a cured string object.
DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher)
Frees the UTF-8 matcher iterator object created by decancer_find.
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume(decancer_matcher_utf16_t matcher)
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches obje...
DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly starts with the specified UTF-8 encoded string.
void * decancer_matches_t
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_...
Definition decancer.h:420
DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-16 encoded string.
Represents an error caused by decancer not being able to cure a string.
Definition decancer.h:289
uint8_t message_length
The length of the error message.
Definition decancer.h:300
const char * message
Null-terminated ASCII encoded error message.
Definition decancer.h:294
Represents a UTF-8 encoded keyword. This struct is often used inside an array.
Definition decancer.h:319
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:330
const uint8_t * string
UTF-8 encoded string.
Definition decancer.h:324
Represents a UTF-16 encoded keyword. This struct is often used inside an array.
Definition decancer.h:350
size_t length
Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
Definition decancer.h:361
const uint16_t * string
UTF-16 encoded string.
Definition decancer.h:355
Represents a match in UTF-8 indices.
Definition decancer.h:524
size_t end
End of the match in UTF-8 indices (non-inclusive).
Definition decancer.h:535
size_t start
Start of the match in UTF-8 indices.
Definition decancer.h:529
Represents a translation of a unicode codepoint.
Definition decancer.h:448
uint32_t character
The translation, as a unicode character.
Definition decancer.h:467
uint8_t kind
The type of the translation result. This can be any of the following values:
Definition decancer.h:456
void * __heap
A pointer to a heap memory block, unused.
Definition decancer.h:491
const uint8_t * contents
Raw UTF-8 encoded string.
Definition decancer.h:478
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:484