decancer 3.2.4
A library that removes common unicode confusables/homoglyphs from strings.
 
Not such query exists :(
Functions
Type definitions
Macros
Loading...
Searching...
No Matches
/home/runner/work/decancer/decancer/bindings/native/decancer.h
Go to the documentation of this file.
1
11#ifndef __DECANCER_H__
12#define __DECANCER_H__
13
14#include <stdbool.h>
15#include <stddef.h>
16#include <stdint.h>
17
18#ifdef _WIN32
19#define DECANCER_EXPORT __declspec(dllimport)
20#else
21#define DECANCER_EXPORT
22#endif
23
32#define DECANCER_VERSION 0x030204
33
42#define DECANCER_VERSION_MAJOR ((DECANCER_VERSION & 0xff0000) >> 16)
43
52#define DECANCER_VERSION_MINOR ((DECANCER_VERSION & 0xff00) >> 8)
53
62#define DECANCER_VERSION_PATCH (DECANCER_VERSION & 0xff)
63
68#define DECANCER_TRANSLATION_KIND_CHARACTER 0
69
74#define DECANCER_TRANSLATION_KIND_STRING 1
75
82#define DECANCER_TRANSLATION_KIND_NONE 2
83
90#define DECANCER_OPTION_DEFAULT 0
91
98#define DECANCER_OPTION_RETAIN_CAPITALIZATION (1 << 0)
99
111#define DECANCER_OPTION_DISABLE_BIDI (1 << 1)
112
119#define DECANCER_OPTION_RETAIN_DIACRITICS (1 << 2)
120
125#define DECANCER_OPTION_RETAIN_GREEK (1 << 3)
126
131#define DECANCER_OPTION_RETAIN_CYRILLIC (1 << 4)
132
137#define DECANCER_OPTION_RETAIN_HEBREW (1 << 5)
138
143#define DECANCER_OPTION_RETAIN_ARABIC (1 << 6)
144
149#define DECANCER_OPTION_RETAIN_DEVANAGARI (1 << 7)
150
155#define DECANCER_OPTION_RETAIN_BENGALI (1 << 8)
156
161#define DECANCER_OPTION_RETAIN_ARMENIAN (1 << 9)
162
167#define DECANCER_OPTION_RETAIN_GUJARATI (1 << 10)
168
173#define DECANCER_OPTION_RETAIN_TAMIL (1 << 11)
174
179#define DECANCER_OPTION_RETAIN_THAI (1 << 12)
180
185#define DECANCER_OPTION_RETAIN_LAO (1 << 13)
186
191#define DECANCER_OPTION_RETAIN_BURMESE (1 << 14)
192
197#define DECANCER_OPTION_RETAIN_KHMER (1 << 15)
198
203#define DECANCER_OPTION_RETAIN_MONGOLIAN (1 << 16)
204
209#define DECANCER_OPTION_RETAIN_CHINESE (1 << 17)
210
218#define DECANCER_OPTION_RETAIN_JAPANESE (1 << 18)
219
224#define DECANCER_OPTION_RETAIN_KOREAN (1 << 19)
225
230#define DECANCER_OPTION_RETAIN_BRAILLE (1 << 20)
231
236#define DECANCER_OPTION_RETAIN_EMOJIS (1 << 21)
237
245#define DECANCER_OPTION_RETAIN_TURKISH (1 << 22)
246
253#define DECANCER_OPTION_ASCII_ONLY (1 << 23)
254
261#define DECANCER_OPTION_ALPHANUMERIC_ONLY (1 << 24)
262
267#define DECANCER_OPTION_ALL 0x1ffffff
268
273#define DECANCER_OPTION_PURE_HOMOGLYPH 0x3ffffc
274
289typedef struct {
294 const char* message;
295
302
318typedef struct {
323 const uint8_t* string;
324
329 size_t size;
331
347typedef struct {
352 const uint16_t* string;
353
358 size_t length;
360
371
384typedef void* decancer_matcher_t;
385
399
412typedef void* decancer_matches_t;
413
440typedef struct {
448 uint8_t kind;
449
454 union {
459 uint32_t character;
460
465 struct {
470 const uint8_t* contents;
471
476 size_t size;
477
483 void* __heap;
484 } string;
485 } contents;
487
498typedef void* decancer_cured_t;
499
516typedef struct {
521 size_t start;
522
527 size_t end;
529
565typedef uint32_t decancer_options_t;
566
567#ifdef __cplusplus
568extern "C" {
569#endif
570
615 DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t* input_str, const size_t input_size, const decancer_options_t options, decancer_error_t* error);
616
667 DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t* input_str, const size_t input_length, const decancer_options_t options, decancer_error_t* error);
668
726 DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t* translation);
727
784 DECANCER_EXPORT const uint8_t* decancer_cured_raw(decancer_cured_t cured, const decancer_match_t* match, size_t* output_size);
785
857 DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t* match, uint16_t** output_ptr, size_t* output_length);
858
943 DECANCER_EXPORT const decancer_match_t* decancer_matches_raw(decancer_matches_t matches, size_t* output_size);
944
1023 DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
1024
1115 DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
1116
1211 DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length);
1212
1322 DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length);
1323
1397 DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t* match);
1398
1485
1548 DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint32_t replacement_char);
1549
1629 DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint32_t replacement_char);
1630
1693 DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint8_t* replacement_str, const size_t replacement_size);
1694
1777 DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
1778
1847 DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint32_t replacement_char);
1848
1937 DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint32_t replacement_char);
1938
2007 DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint8_t* replacement_str, const size_t replacement_size);
2008
2100 DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
2101
2153 DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2154
2215 DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2216
2268 DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2269
2330 DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2331
2383 DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2384
2445 DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2446
2498 DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2499
2560 DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2561
2573
2582 DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle);
2583
2657
2668 DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher);
2669
2752
2763
2779
2792 DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches);
2793
2804 DECANCER_EXPORT void decancer_translation_init(decancer_translation_t* translation);
2805
2817 DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t* translation_in, decancer_translation_t* translation_out);
2818
2828 DECANCER_EXPORT void decancer_translation_free(decancer_translation_t* translation);
2829
2842
2852 DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured);
2853
2854#undef DECANCER_EXPORT
2855
2856#ifdef __cplusplus
2857} // extern "C"
2858#endif
2859#endif
DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encod...
DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly contains the specified UTF-16 encoded string.
DECANCER_EXPORT const decancer_match_t * decancer_matches_raw(decancer_matches_t matches, size_t *output_size)
Returns the raw list of every similar-looking match from a decancer_matches_t object.
DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle)
Frees the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string is similar with the specified UTF-16 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cured_clone(decancer_cured_t cured)
Clones the cured string object created by decancer_cure and decancer_cure_utf16.
DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Finds every similar-looking match of a UTF-8 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encod...
DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_...
DECANCER_EXPORT void decancer_translation_init(decancer_translation_t *translation)
Initiates a newly created translation struct for use.
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Finds every similar-looking match of a UTF-16 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 enc...
DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured)
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
uint32_t decancer_options_t
An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions.
Definition decancer.h:565
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length)
Retrieves the raw UTF-16 bytes from a cured string object.
void * decancer_matcher_t
Represents a UTF-8 matcher iterator object returned from decancer_find.
Definition decancer.h:384
DECANCER_EXPORT decancer_matches_t decancer_matches_clone(decancer_matches_t matches)
Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-8 keywords in the cured string....
DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly starts with the specified UTF-16 encoded string.
DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor,...
void * decancer_cured_t
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
Definition decancer.h:498
DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-8 encoded string.
void * decancer_cured_raw_utf16_t
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than ret...
Definition decancer.h:370
DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches)
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-16 keywords in the cured string....
DECANCER_EXPORT void decancer_matcher_utf16_free(decancer_matcher_utf16_t matcher)
Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone(decancer_cured_raw_utf16_t raw_utf16_handle)
Clones the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t *translation_in, decancer_translation_t *translation_out)
Clones the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume(decancer_matcher_t matcher)
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-16 encoded string.
DECANCER_EXPORT bool decancer_matcher_utf16_next(decancer_matcher_utf16_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-16 matcher iterator.
DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly contains the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded...
DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly ends with the specified UTF-16 encoded string.
DECANCER_EXPORT void decancer_translation_free(decancer_translation_t *translation)
Frees the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-8 encoded string.
DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t *translation)
Cures a single unicode codepoint.
DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string is similar with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly ends with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-8 matcher iterator.
void * decancer_matcher_utf16_t
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
Definition decancer.h:398
DECANCER_EXPORT const uint8_t * decancer_cured_raw(decancer_cured_t cured, const decancer_match_t *match, size_t *output_size)
Retrieves the raw UTF-8 bytes from a cured string object.
DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher)
Frees the UTF-8 matcher iterator object created by decancer_find.
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume(decancer_matcher_utf16_t matcher)
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches obje...
DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly starts with the specified UTF-8 encoded string.
void * decancer_matches_t
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_...
Definition decancer.h:412
DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-16 encoded string.
Represents an error caused by decancer not being able to cure a string.
Definition decancer.h:289
uint8_t message_length
The length of the error message.
Definition decancer.h:300
const char * message
Null-terminated ASCII encoded error message.
Definition decancer.h:294
Represents a UTF-8 encoded keyword. This struct is often used inside an array.
Definition decancer.h:318
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:329
const uint8_t * string
UTF-8 encoded string.
Definition decancer.h:323
Represents a UTF-16 encoded keyword. This struct is often used inside an array.
Definition decancer.h:347
size_t length
Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
Definition decancer.h:358
const uint16_t * string
UTF-16 encoded string.
Definition decancer.h:352
Represents a match in UTF-8 indices.
Definition decancer.h:516
size_t end
End of the match in UTF-8 indices (non-inclusive).
Definition decancer.h:527
size_t start
Start of the match in UTF-8 indices.
Definition decancer.h:521
Represents a translation of a unicode codepoint.
Definition decancer.h:440
uint32_t character
The translation, as a unicode character.
Definition decancer.h:459
uint8_t kind
The type of the translation result. This can be any of the following values:
Definition decancer.h:448
void * __heap
A pointer to a heap memory block, unused.
Definition decancer.h:483
const uint8_t * contents
Raw UTF-8 encoded string.
Definition decancer.h:470
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:476