decancer 3.3.3
A library that removes common unicode confusables/homoglyphs from strings.
No such query exists :(
Functions
Type definitions
Macros
Loading...
Searching...
No Matches
/home/runner/work/decancer/decancer/bindings/native/decancer.h
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2021-2026 null8626
3
13
14#ifndef __DECANCER_H__
15#define __DECANCER_H__
16
17#include <stdbool.h>
18#include <stddef.h>
19#include <stdint.h>
20
21#if defined(_WIN32) && !defined(DECANCER_STATIC)
22#define DECANCER_EXPORT __declspec(dllimport)
23#else
24#define DECANCER_EXPORT
25#endif
26
35#define DECANCER_VERSION 0x030303
36
45#define DECANCER_VERSION_MAJOR ((DECANCER_VERSION & 0xff0000) >> 16)
46
55#define DECANCER_VERSION_MINOR ((DECANCER_VERSION & 0xff00) >> 8)
56
65#define DECANCER_VERSION_PATCH (DECANCER_VERSION & 0xff)
66
71#define DECANCER_TRANSLATION_KIND_CHARACTER 0
72
77#define DECANCER_TRANSLATION_KIND_STRING 1
78
85#define DECANCER_TRANSLATION_KIND_NONE 2
86
93#define DECANCER_OPTION_DEFAULT 0
94
101#define DECANCER_OPTION_RETAIN_CAPITALIZATION (1 << 0)
102
114#define DECANCER_OPTION_DISABLE_BIDI (1 << 1)
115
124#define DECANCER_OPTION_DISABLE_LEETSPEAK (1 << 2)
125
134#define DECANCER_OPTION_DISABLE_ALPHABETICAL_LEETSPEAK (1 << 3)
135
142#define DECANCER_OPTION_RETAIN_DIACRITICS (1 << 4)
143
148#define DECANCER_OPTION_RETAIN_GREEK (1 << 5)
149
154#define DECANCER_OPTION_RETAIN_CYRILLIC (1 << 6)
155
160#define DECANCER_OPTION_RETAIN_HEBREW (1 << 7)
161
166#define DECANCER_OPTION_RETAIN_ARABIC (1 << 8)
167
172#define DECANCER_OPTION_RETAIN_DEVANAGARI (1 << 9)
173
178#define DECANCER_OPTION_RETAIN_BENGALI (1 << 10)
179
184#define DECANCER_OPTION_RETAIN_ARMENIAN (1 << 11)
185
190#define DECANCER_OPTION_RETAIN_GUJARATI (1 << 12)
191
196#define DECANCER_OPTION_RETAIN_TAMIL (1 << 13)
197
202#define DECANCER_OPTION_RETAIN_THAI (1 << 14)
203
208#define DECANCER_OPTION_RETAIN_LAO (1 << 15)
209
214#define DECANCER_OPTION_RETAIN_BURMESE (1 << 16)
215
220#define DECANCER_OPTION_RETAIN_KHMER (1 << 17)
221
226#define DECANCER_OPTION_RETAIN_MONGOLIAN (1 << 18)
227
232#define DECANCER_OPTION_RETAIN_CHINESE (1 << 19)
233
241#define DECANCER_OPTION_RETAIN_JAPANESE (1 << 20)
242
247#define DECANCER_OPTION_RETAIN_KOREAN (1 << 21)
248
253#define DECANCER_OPTION_RETAIN_BRAILLE (1 << 22)
254
259#define DECANCER_OPTION_RETAIN_EMOJIS (1 << 23)
260
268#define DECANCER_OPTION_RETAIN_TURKISH (1 << 24)
269
276#define DECANCER_OPTION_ASCII_ONLY (1 << 25)
277
284#define DECANCER_OPTION_ALPHANUMERIC_ONLY (1 << 26)
285
290#define DECANCER_OPTION_ALL 0x7ffffff
291
296#define DECANCER_OPTION_PURE_HOMOGLYPH 0xfffff0
297
312typedef struct {
317 const char* message;
318
325
326#ifndef DECANCER_UTF16_ONLY
342typedef struct {
347 const uint8_t* string;
348
353 size_t size;
355#endif
356
357#ifndef DECANCER_UTF8_ONLY
373typedef struct {
378 const uint16_t* string;
379
384 size_t length;
386
397#endif
398
399#ifndef DECANCER_UTF16_ONLY
412typedef void* decancer_matcher_t;
413#endif
414
415#ifndef DECANCER_UTF8_ONLY
429#endif
430
443typedef void* decancer_matches_t;
444
471typedef struct {
479 uint8_t kind;
480
485 union {
490 uint32_t character;
491
496 struct {
501 const uint8_t* contents;
502
507 size_t size;
508
514 void* __heap;
515 } string;
516 } contents;
518
529typedef void* decancer_cured_t;
530
547typedef struct {
552 size_t start;
553
558 size_t end;
560
598typedef uint32_t decancer_options_t;
599
600#ifdef __cplusplus
601extern "C" {
602#endif
603
604#ifndef DECANCER_UTF16_ONLY
649 DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t* input_str, const size_t input_size, const decancer_options_t options, decancer_error_t* error);
650#endif
651
652#ifndef DECANCER_UTF8_ONLY
703 DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t* input_str, const size_t input_length, const decancer_options_t options, decancer_error_t* error);
704#endif
705
763 DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t* translation);
764
795 DECANCER_EXPORT void decancer_disable_leetspeak(decancer_cured_t cured, const bool switch_);
796
827 DECANCER_EXPORT void decancer_disable_alphabetical_leetspeak(decancer_cured_t cured, const bool switch_);
828
829#ifndef DECANCER_UTF16_ONLY
886 DECANCER_EXPORT const uint8_t* decancer_cured_raw(decancer_cured_t cured, const decancer_match_t* match, size_t* output_size);
887#endif
888
889#ifndef DECANCER_UTF8_ONLY
961 DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t* match, uint16_t** output_ptr, size_t* output_length);
962#endif
963
1048 DECANCER_EXPORT const decancer_match_t* decancer_matches_raw(decancer_matches_t matches, size_t* output_size);
1049
1050#ifndef DECANCER_UTF16_ONLY
1131 DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
1132#endif
1133
1134#ifndef DECANCER_UTF8_ONLY
1227 DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
1228#endif
1229
1230#ifndef DECANCER_UTF16_ONLY
1327 DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length);
1328#endif
1329
1330#ifndef DECANCER_UTF8_ONLY
1442 DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length);
1443#endif
1444
1445#ifndef DECANCER_UTF16_ONLY
1519 DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t* match);
1520#endif
1521
1522#ifndef DECANCER_UTF8_ONLY
1609#endif
1610
1611#ifndef DECANCER_UTF16_ONLY
1676 DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint32_t replacement_char);
1677#endif
1678
1679#ifndef DECANCER_UTF8_ONLY
1761 DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint32_t replacement_char);
1762#endif
1763
1764#ifndef DECANCER_UTF16_ONLY
1829 DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint8_t* replacement_str, const size_t replacement_size);
1830#endif
1831
1832#ifndef DECANCER_UTF8_ONLY
1917 DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
1918#endif
1919
1920#ifndef DECANCER_UTF16_ONLY
1991 DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint32_t replacement_char);
1992#endif
1993
1994#ifndef DECANCER_UTF8_ONLY
2085 DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint32_t replacement_char);
2086#endif
2087
2088#ifndef DECANCER_UTF16_ONLY
2159 DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint8_t* replacement_str, const size_t replacement_size);
2160#endif
2161
2162#ifndef DECANCER_UTF8_ONLY
2256 DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
2257#endif
2258
2259#ifndef DECANCER_UTF16_ONLY
2313 DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2314#endif
2315
2316#ifndef DECANCER_UTF8_ONLY
2379 DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2380#endif
2381
2382#ifndef DECANCER_UTF16_ONLY
2436 DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2437#endif
2438
2439#ifndef DECANCER_UTF8_ONLY
2502 DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2503#endif
2504
2505#ifndef DECANCER_UTF16_ONLY
2559 DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2560#endif
2561
2562#ifndef DECANCER_UTF8_ONLY
2625 DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2626#endif
2627
2628#ifndef DECANCER_UTF16_ONLY
2682 DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2683#endif
2684
2685#ifndef DECANCER_UTF8_ONLY
2748 DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2749
2761
2770 DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle);
2771#endif
2772
2773#ifndef DECANCER_UTF16_ONLY
2847
2858 DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher);
2859#endif
2860
2861#ifndef DECANCER_UTF8_ONLY
2944
2955#endif
2956
2972
2985 DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches);
2986
2997 DECANCER_EXPORT void decancer_translation_init(decancer_translation_t* translation);
2998
3010 DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t* translation_in, decancer_translation_t* translation_out);
3011
3021 DECANCER_EXPORT void decancer_translation_free(decancer_translation_t* translation);
3022
3035
3045 DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured);
3046
3047#undef DECANCER_EXPORT
3048
3049#ifdef __cplusplus
3050} // extern "C"
3051#endif
3052#endif
DECANCER_EXPORT void decancer_disable_alphabetical_leetspeak(decancer_cured_t cured, const bool switch_)
Prevents decancer from applying alphabetical leetspeak comparisons in comparison methods.
DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encod...
DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly contains the specified UTF-16 encoded string.
DECANCER_EXPORT const decancer_match_t * decancer_matches_raw(decancer_matches_t matches, size_t *output_size)
Returns the raw list of every similar-looking match from a decancer_matches_t object.
DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle)
Frees the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string is similar with the specified UTF-16 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cured_clone(decancer_cured_t cured)
Clones the cured string object created by decancer_cure and decancer_cure_utf16.
DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Finds every similar-looking match of a UTF-8 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encod...
DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_...
DECANCER_EXPORT void decancer_translation_init(decancer_translation_t *translation)
Initiates a newly created translation struct for use.
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Finds every similar-looking match of a UTF-16 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 enc...
DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured)
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
uint32_t decancer_options_t
An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions.
Definition decancer.h:598
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length)
Retrieves the raw UTF-16 bytes from a cured string object.
void * decancer_matcher_t
Represents a UTF-8 matcher iterator object returned from decancer_find.
Definition decancer.h:412
DECANCER_EXPORT decancer_matches_t decancer_matches_clone(decancer_matches_t matches)
Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-8 keywords in the cured string....
DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly starts with the specified UTF-16 encoded string.
DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor,...
void * decancer_cured_t
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
Definition decancer.h:529
DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-8 encoded string.
void * decancer_cured_raw_utf16_t
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than ret...
Definition decancer.h:396
DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches)
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-16 keywords in the cured string....
DECANCER_EXPORT void decancer_matcher_utf16_free(decancer_matcher_utf16_t matcher)
Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone(decancer_cured_raw_utf16_t raw_utf16_handle)
Clones the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t *translation_in, decancer_translation_t *translation_out)
Clones the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume(decancer_matcher_t matcher)
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-16 encoded string.
DECANCER_EXPORT bool decancer_matcher_utf16_next(decancer_matcher_utf16_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-16 matcher iterator.
DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly contains the specified UTF-8 encoded string.
DECANCER_EXPORT void decancer_disable_leetspeak(decancer_cured_t cured, const bool switch_)
Prevents decancer from applying leetspeak comparisons in comparison methods.
DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded...
DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly ends with the specified UTF-16 encoded string.
DECANCER_EXPORT void decancer_translation_free(decancer_translation_t *translation)
Frees the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-8 encoded string.
DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t *translation)
Cures a single unicode codepoint.
DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string is similar with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly ends with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-8 matcher iterator.
void * decancer_matcher_utf16_t
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
Definition decancer.h:428
DECANCER_EXPORT const uint8_t * decancer_cured_raw(decancer_cured_t cured, const decancer_match_t *match, size_t *output_size)
Retrieves the raw UTF-8 bytes from a cured string object.
DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher)
Frees the UTF-8 matcher iterator object created by decancer_find.
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume(decancer_matcher_utf16_t matcher)
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches obje...
DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly starts with the specified UTF-8 encoded string.
void * decancer_matches_t
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_...
Definition decancer.h:443
DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-16 encoded string.
Represents an error caused by decancer not being able to cure a string.
Definition decancer.h:312
uint8_t message_length
The length of the error message.
Definition decancer.h:323
const char * message
Null-terminated ASCII encoded error message.
Definition decancer.h:317
Represents a UTF-8 encoded keyword. This struct is often used inside an array.
Definition decancer.h:342
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:353
const uint8_t * string
UTF-8 encoded string.
Definition decancer.h:347
Represents a UTF-16 encoded keyword. This struct is often used inside an array.
Definition decancer.h:373
size_t length
Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
Definition decancer.h:384
const uint16_t * string
UTF-16 encoded string.
Definition decancer.h:378
Represents a match in UTF-8 indices.
Definition decancer.h:547
size_t end
End of the match in UTF-8 indices (non-inclusive).
Definition decancer.h:558
size_t start
Start of the match in UTF-8 indices.
Definition decancer.h:552
Represents a translation of a unicode codepoint.
Definition decancer.h:471
uint32_t character
The translation, as a unicode character.
Definition decancer.h:490
uint8_t kind
The type of the translation result. This can be any of the following values:
Definition decancer.h:479
void * __heap
A pointer to a heap memory block, unused.
Definition decancer.h:514
const uint8_t * contents
Raw UTF-8 encoded string.
Definition decancer.h:501
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:507