decancer 3.3.3
A library that removes common unicode confusables/homoglyphs from strings.
No such query exists :(
Functions
Type definitions
Macros
Loading...
Searching...
No Matches
/home/runner/work/decancer/decancer/bindings/native/decancer.h
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: 2021-2026 null8626
3
13
14#ifndef __DECANCER_H__
15#define __DECANCER_H__
16
17#include <stdbool.h>
18#include <stddef.h>
19#include <stdint.h>
20
21#if defined(_WIN32) && !defined(DECANCER_STATIC)
22#define DECANCER_EXPORT __declspec(dllimport)
23#else
24#define DECANCER_EXPORT
25#endif
26
35#define DECANCER_VERSION 0x030303
36
45#define DECANCER_VERSION_MAJOR ((DECANCER_VERSION & 0xff0000) >> 16)
46
55#define DECANCER_VERSION_MINOR ((DECANCER_VERSION & 0xff00) >> 8)
56
65#define DECANCER_VERSION_PATCH (DECANCER_VERSION & 0xff)
66
71#define DECANCER_TRANSLATION_KIND_CHARACTER 0
72
77#define DECANCER_TRANSLATION_KIND_STRING 1
78
85#define DECANCER_TRANSLATION_KIND_NONE 2
86
93#define DECANCER_OPTION_DEFAULT 0
94
101#define DECANCER_OPTION_RETAIN_CAPITALIZATION (1 << 0)
102
114#define DECANCER_OPTION_DISABLE_BIDI (1 << 1)
115
122#define DECANCER_OPTION_DISABLE_LEETSPEAK (1 << 2)
123
130#define DECANCER_OPTION_RETAIN_DIACRITICS (1 << 3)
131
136#define DECANCER_OPTION_RETAIN_GREEK (1 << 4)
137
142#define DECANCER_OPTION_RETAIN_CYRILLIC (1 << 5)
143
148#define DECANCER_OPTION_RETAIN_HEBREW (1 << 6)
149
154#define DECANCER_OPTION_RETAIN_ARABIC (1 << 7)
155
160#define DECANCER_OPTION_RETAIN_DEVANAGARI (1 << 8)
161
166#define DECANCER_OPTION_RETAIN_BENGALI (1 << 9)
167
172#define DECANCER_OPTION_RETAIN_ARMENIAN (1 << 10)
173
178#define DECANCER_OPTION_RETAIN_GUJARATI (1 << 11)
179
184#define DECANCER_OPTION_RETAIN_TAMIL (1 << 12)
185
190#define DECANCER_OPTION_RETAIN_THAI (1 << 13)
191
196#define DECANCER_OPTION_RETAIN_LAO (1 << 14)
197
202#define DECANCER_OPTION_RETAIN_BURMESE (1 << 15)
203
208#define DECANCER_OPTION_RETAIN_KHMER (1 << 16)
209
214#define DECANCER_OPTION_RETAIN_MONGOLIAN (1 << 17)
215
220#define DECANCER_OPTION_RETAIN_CHINESE (1 << 18)
221
229#define DECANCER_OPTION_RETAIN_JAPANESE (1 << 19)
230
235#define DECANCER_OPTION_RETAIN_KOREAN (1 << 20)
236
241#define DECANCER_OPTION_RETAIN_BRAILLE (1 << 21)
242
247#define DECANCER_OPTION_RETAIN_EMOJIS (1 << 22)
248
256#define DECANCER_OPTION_RETAIN_TURKISH (1 << 23)
257
264#define DECANCER_OPTION_ASCII_ONLY (1 << 24)
265
272#define DECANCER_OPTION_ALPHANUMERIC_ONLY (1 << 25)
273
278#define DECANCER_OPTION_ALL 0x3ffffff
279
284#define DECANCER_OPTION_PURE_HOMOGLYPH 0x7ffff8
285
300typedef struct {
305 const char* message;
306
313
314#ifndef DECANCER_UTF16_ONLY
330typedef struct {
335 const uint8_t* string;
336
341 size_t size;
343#endif
344
345#ifndef DECANCER_UTF8_ONLY
361typedef struct {
366 const uint16_t* string;
367
372 size_t length;
374
385#endif
386
387#ifndef DECANCER_UTF16_ONLY
400typedef void* decancer_matcher_t;
401#endif
402
403#ifndef DECANCER_UTF8_ONLY
417#endif
418
431typedef void* decancer_matches_t;
432
459typedef struct {
467 uint8_t kind;
468
473 union {
478 uint32_t character;
479
484 struct {
489 const uint8_t* contents;
490
495 size_t size;
496
502 void* __heap;
503 } string;
504 } contents;
506
517typedef void* decancer_cured_t;
518
535typedef struct {
540 size_t start;
541
546 size_t end;
548
584typedef uint32_t decancer_options_t;
585
586#ifdef __cplusplus
587extern "C" {
588#endif
589
590#ifndef DECANCER_UTF16_ONLY
635 DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t* input_str, const size_t input_size, const decancer_options_t options, decancer_error_t* error);
636#endif
637
638#ifndef DECANCER_UTF8_ONLY
689 DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t* input_str, const size_t input_length, const decancer_options_t options, decancer_error_t* error);
690#endif
691
749 DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t* translation);
750
779 DECANCER_EXPORT void decancer_disable_leetspeak(decancer_cured_t cured, const bool switch_);
780
781#ifndef DECANCER_UTF16_ONLY
838 DECANCER_EXPORT const uint8_t* decancer_cured_raw(decancer_cured_t cured, const decancer_match_t* match, size_t* output_size);
839#endif
840
841#ifndef DECANCER_UTF8_ONLY
913 DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t* match, uint16_t** output_ptr, size_t* output_length);
914#endif
915
1000 DECANCER_EXPORT const decancer_match_t* decancer_matches_raw(decancer_matches_t matches, size_t* output_size);
1001
1002#ifndef DECANCER_UTF16_ONLY
1082 DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
1083#endif
1084
1085#ifndef DECANCER_UTF8_ONLY
1177 DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
1178#endif
1179
1180#ifndef DECANCER_UTF16_ONLY
1276 DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length);
1277#endif
1278
1279#ifndef DECANCER_UTF8_ONLY
1390 DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length);
1391#endif
1392
1393#ifndef DECANCER_UTF16_ONLY
1467 DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t* match);
1468#endif
1469
1470#ifndef DECANCER_UTF8_ONLY
1557#endif
1558
1559#ifndef DECANCER_UTF16_ONLY
1623 DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint32_t replacement_char);
1624#endif
1625
1626#ifndef DECANCER_UTF8_ONLY
1707 DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint32_t replacement_char);
1708#endif
1709
1710#ifndef DECANCER_UTF16_ONLY
1774 DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size, const uint8_t* replacement_str, const size_t replacement_size);
1775#endif
1776
1777#ifndef DECANCER_UTF8_ONLY
1861 DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
1862#endif
1863
1864#ifndef DECANCER_UTF16_ONLY
1934 DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint32_t replacement_char);
1935#endif
1936
1937#ifndef DECANCER_UTF8_ONLY
2027 DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint32_t replacement_char);
2028#endif
2029
2030#ifndef DECANCER_UTF16_ONLY
2100 DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t* other, const size_t other_length, const uint8_t* replacement_str, const size_t replacement_size);
2101#endif
2102
2103#ifndef DECANCER_UTF8_ONLY
2196 DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t* other, const size_t other_length, const uint16_t* replacement_str, const size_t replacement_length);
2197#endif
2198
2199#ifndef DECANCER_UTF16_ONLY
2252 DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2253#endif
2254
2255#ifndef DECANCER_UTF8_ONLY
2317 DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2318#endif
2319
2320#ifndef DECANCER_UTF16_ONLY
2373 DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2374#endif
2375
2376#ifndef DECANCER_UTF8_ONLY
2438 DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2439#endif
2440
2441#ifndef DECANCER_UTF16_ONLY
2494 DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2495#endif
2496
2497#ifndef DECANCER_UTF8_ONLY
2559 DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2560#endif
2561
2562#ifndef DECANCER_UTF16_ONLY
2615 DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t* other_str, const size_t other_size);
2616#endif
2617
2618#ifndef DECANCER_UTF8_ONLY
2680 DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t* other_str, const size_t other_length);
2681
2693
2702 DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle);
2703#endif
2704
2705#ifndef DECANCER_UTF16_ONLY
2779
2790 DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher);
2791#endif
2792
2793#ifndef DECANCER_UTF8_ONLY
2876
2887#endif
2888
2904
2917 DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches);
2918
2929 DECANCER_EXPORT void decancer_translation_init(decancer_translation_t* translation);
2930
2942 DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t* translation_in, decancer_translation_t* translation_out);
2943
2953 DECANCER_EXPORT void decancer_translation_free(decancer_translation_t* translation);
2954
2967
2977 DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured);
2978
2979#undef DECANCER_EXPORT
2980
2981#ifdef __cplusplus
2982} // extern "C"
2983#endif
2984#endif
DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encod...
DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly contains the specified UTF-16 encoded string.
DECANCER_EXPORT const decancer_match_t * decancer_matches_raw(decancer_matches_t matches, size_t *output_size)
Returns the raw list of every similar-looking match from a decancer_matches_t object.
DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle)
Frees the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string is similar with the specified UTF-16 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cured_clone(decancer_cured_t cured)
Clones the cured string object created by decancer_cure and decancer_cure_utf16.
DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Finds every similar-looking match of a UTF-8 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encod...
DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_...
DECANCER_EXPORT void decancer_translation_init(decancer_translation_t *translation)
Initiates a newly created translation struct for use.
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Finds every similar-looking match of a UTF-16 encoded string in the cured string.
DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 enc...
DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured)
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
uint32_t decancer_options_t
An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions.
Definition decancer.h:584
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length)
Retrieves the raw UTF-16 bytes from a cured string object.
void * decancer_matcher_t
Represents a UTF-8 matcher iterator object returned from decancer_find.
Definition decancer.h:400
DECANCER_EXPORT decancer_matches_t decancer_matches_clone(decancer_matches_t matches)
Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-8 keywords in the cured string....
DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly starts with the specified UTF-16 encoded string.
DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor,...
void * decancer_cured_t
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
Definition decancer.h:517
DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-8 encoded string.
void * decancer_cured_raw_utf16_t
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than ret...
Definition decancer.h:384
DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches)
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-16 keywords in the cured string....
DECANCER_EXPORT void decancer_matcher_utf16_free(decancer_matcher_utf16_t matcher)
Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone(decancer_cured_raw_utf16_t raw_utf16_handle)
Clones the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT void decancer_translation_clone(const decancer_translation_t *translation_in, decancer_translation_t *translation_out)
Clones the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume(decancer_matcher_t matcher)
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-16 encoded string.
DECANCER_EXPORT bool decancer_matcher_utf16_next(decancer_matcher_utf16_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-16 matcher iterator.
DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly contains the specified UTF-8 encoded string.
DECANCER_EXPORT void decancer_disable_leetspeak(decancer_cured_t cured, const bool switch_)
Prevents decancer from applying leetspeak comparisons in comparison methods.
DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded...
DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly ends with the specified UTF-16 encoded string.
DECANCER_EXPORT void decancer_translation_free(decancer_translation_t *translation)
Frees the translation struct used in decancer_cure_char.
DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-8 encoded string.
DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t *translation)
Cures a single unicode codepoint.
DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string is similar with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly ends with the specified UTF-8 encoded string.
DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-8 matcher iterator.
void * decancer_matcher_utf16_t
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
Definition decancer.h:416
DECANCER_EXPORT const uint8_t * decancer_cured_raw(decancer_cured_t cured, const decancer_match_t *match, size_t *output_size)
Retrieves the raw UTF-8 bytes from a cured string object.
DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher)
Frees the UTF-8 matcher iterator object created by decancer_find.
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume(decancer_matcher_utf16_t matcher)
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches obje...
DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly starts with the specified UTF-8 encoded string.
void * decancer_matches_t
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_...
Definition decancer.h:431
DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-16 encoded string.
Represents an error caused by decancer not being able to cure a string.
Definition decancer.h:300
uint8_t message_length
The length of the error message.
Definition decancer.h:311
const char * message
Null-terminated ASCII encoded error message.
Definition decancer.h:305
Represents a UTF-8 encoded keyword. This struct is often used inside an array.
Definition decancer.h:330
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:341
const uint8_t * string
UTF-8 encoded string.
Definition decancer.h:335
Represents a UTF-16 encoded keyword. This struct is often used inside an array.
Definition decancer.h:361
size_t length
Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
Definition decancer.h:372
const uint16_t * string
UTF-16 encoded string.
Definition decancer.h:366
Represents a match in UTF-8 indices.
Definition decancer.h:535
size_t end
End of the match in UTF-8 indices (non-inclusive).
Definition decancer.h:546
size_t start
Start of the match in UTF-8 indices.
Definition decancer.h:540
Represents a translation of a unicode codepoint.
Definition decancer.h:459
uint32_t character
The translation, as a unicode character.
Definition decancer.h:478
uint8_t kind
The type of the translation result. This can be any of the following values:
Definition decancer.h:467
void * __heap
A pointer to a heap memory block, unused.
Definition decancer.h:502
const uint8_t * contents
Raw UTF-8 encoded string.
Definition decancer.h:489
size_t size
UTF-8 size of the string, in bytes.
Definition decancer.h:495