A library that removes common unicode confusables/homoglyphs from strings. More...
Go to the source code of this file.
Data Structures | |
struct | decancer_error_t |
Represents an error caused by decancer not being able to cure a string. More... | |
struct | decancer_keyword_t |
Represents a UTF-8 encoded keyword. This struct is often used inside an array. More... | |
struct | decancer_keyword_utf16_t |
Represents a UTF-16 encoded keyword. This struct is often used inside an array. More... | |
struct | decancer_translation_t |
Represents a translation of a unicode codepoint. More... | |
struct | decancer_match_t |
Represents a match in UTF-8 indices. More... | |
Macros | |
#define | DECANCER_VERSION 0x030208 |
The current version used by this library. | |
#define | DECANCER_VERSION_MAJOR ((DECANCER_VERSION & 0xff0000) >> 16) |
The current major version used by this library. | |
#define | DECANCER_VERSION_MINOR ((DECANCER_VERSION & 0xff00) >> 8) |
The current minor version used by this library. | |
#define | DECANCER_VERSION_PATCH (DECANCER_VERSION & 0xff) |
The current patch version used by this library. | |
#define | DECANCER_TRANSLATION_KIND_CHARACTER 0 |
The translation is a single unicode character. | |
#define | DECANCER_TRANSLATION_KIND_STRING 1 |
The translation is a string. | |
#define | DECANCER_TRANSLATION_KIND_NONE 2 |
The translation is an empty string. | |
#define | DECANCER_OPTION_DEFAULT 0 |
Uses decancer's default options – AKA to be AS AGGRESSIVE AS POSSIBLE. | |
#define | DECANCER_OPTION_RETAIN_CAPITALIZATION (1 << 0) |
Prevents decancer from changing all characters to lowercase. Therefore, if the input character is in uppercase, the output character will be in uppercase as well. | |
#define | DECANCER_OPTION_DISABLE_BIDI (1 << 1) |
Prevents decancer from applying the Unicode Bidirectional Algorithm. | |
#define | DECANCER_OPTION_RETAIN_DIACRITICS (1 << 2) |
Prevents decancer from curing characters WITH diacritics or accents. | |
#define | DECANCER_OPTION_RETAIN_GREEK (1 << 3) |
Prevents decancer from curing all greek characters. | |
#define | DECANCER_OPTION_RETAIN_CYRILLIC (1 << 4) |
Prevents decancer from curing all cyrillic characters. | |
#define | DECANCER_OPTION_RETAIN_HEBREW (1 << 5) |
Prevents decancer from curing all hebrew characters. | |
#define | DECANCER_OPTION_RETAIN_ARABIC (1 << 6) |
Prevents decancer from curing all arabic characters. | |
#define | DECANCER_OPTION_RETAIN_DEVANAGARI (1 << 7) |
Prevents decancer from curing all devanagari characters. | |
#define | DECANCER_OPTION_RETAIN_BENGALI (1 << 8) |
Prevents decancer from curing all bengali characters. | |
#define | DECANCER_OPTION_RETAIN_ARMENIAN (1 << 9) |
Prevents decancer from curing all armenian characters. | |
#define | DECANCER_OPTION_RETAIN_GUJARATI (1 << 10) |
Prevents decancer from curing all gujarati characters. | |
#define | DECANCER_OPTION_RETAIN_TAMIL (1 << 11) |
Prevents decancer from curing all tamil characters. | |
#define | DECANCER_OPTION_RETAIN_THAI (1 << 12) |
Prevents decancer from curing all thai characters. | |
#define | DECANCER_OPTION_RETAIN_LAO (1 << 13) |
Prevents decancer from curing all lao characters. | |
#define | DECANCER_OPTION_RETAIN_BURMESE (1 << 14) |
Prevents decancer from curing all burmese characters. | |
#define | DECANCER_OPTION_RETAIN_KHMER (1 << 15) |
Prevents decancer from curing all khmer characters. | |
#define | DECANCER_OPTION_RETAIN_MONGOLIAN (1 << 16) |
Prevents decancer from curing all mongolian characters. | |
#define | DECANCER_OPTION_RETAIN_CHINESE (1 << 17) |
Prevents decancer from curing all chinese characters. | |
#define | DECANCER_OPTION_RETAIN_JAPANESE (1 << 18) |
Prevents decancer from curing all katakana and hiragana characters. | |
#define | DECANCER_OPTION_RETAIN_KOREAN (1 << 19) |
Prevents decancer from curing all korean characters. | |
#define | DECANCER_OPTION_RETAIN_BRAILLE (1 << 20) |
Prevents decancer from curing all braille characters. | |
#define | DECANCER_OPTION_RETAIN_EMOJIS (1 << 21) |
Prevents decancer from curing all emojis. | |
#define | DECANCER_OPTION_RETAIN_TURKISH (1 << 22) |
Prevents decancer from curing all turkish characters. | |
#define | DECANCER_OPTION_ASCII_ONLY (1 << 23) |
Removes all non-ASCII characters from the result. | |
#define | DECANCER_OPTION_ALPHANUMERIC_ONLY (1 << 24) |
Removes all non-alphanumeric characters from the result. | |
#define | DECANCER_OPTION_ALL 0x1ffffff |
A configuration where every option is enabled. | |
#define | DECANCER_OPTION_PURE_HOMOGLYPH 0x3ffffc |
Prevents decancer from curing characters from major foreign writing systems, including diacritics. | |
Typedefs | |
typedef void * | decancer_cured_raw_utf16_t |
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than retaining the lifetime of the returned UTF-16 pointer. | |
typedef void * | decancer_matcher_t |
Represents a UTF-8 matcher iterator object returned from decancer_find. | |
typedef void * | decancer_matcher_utf16_t |
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16. | |
typedef void * | decancer_matches_t |
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_utf16. | |
typedef void * | decancer_cured_t |
Represents a cured string returned from decancer_cure and decancer_cure_utf16. | |
typedef uint32_t | decancer_options_t |
An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions. | |
Functions | |
DECANCER_EXPORT decancer_cured_t | decancer_cure (const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error) |
Cures a UTF-8 encoded string. | |
DECANCER_EXPORT decancer_cured_t | decancer_cure_utf16 (const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error) |
Cures a UTF-16 encoded string. | |
DECANCER_EXPORT void | decancer_cure_char (const uint32_t input, const decancer_options_t options, decancer_translation_t *translation) |
Cures a single unicode codepoint. | |
DECANCER_EXPORT const uint8_t * | decancer_cured_raw (decancer_cured_t cured, const decancer_match_t *match, size_t *output_size) |
Retrieves the raw UTF-8 bytes from a cured string object. | |
DECANCER_EXPORT decancer_cured_raw_utf16_t | decancer_cured_raw_utf16 (decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length) |
Retrieves the raw UTF-16 bytes from a cured string object. | |
DECANCER_EXPORT const decancer_match_t * | decancer_matches_raw (decancer_matches_t matches, size_t *output_size) |
Returns the raw list of every similar-looking match from a decancer_matches_t object. | |
DECANCER_EXPORT decancer_matcher_t | decancer_find (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size) |
Finds every similar-looking match of a UTF-8 encoded string in the cured string. | |
DECANCER_EXPORT decancer_matcher_utf16_t | decancer_find_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length) |
Finds every similar-looking match of a UTF-16 encoded string in the cured string. | |
DECANCER_EXPORT decancer_matches_t | decancer_find_multiple (decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length) |
Finds every similar-looking match from a list of UTF-8 keywords in the cured string. Unlike decancer_find, this function also takes note of overlapping matches and merges them together. | |
DECANCER_EXPORT decancer_matches_t | decancer_find_multiple_utf16 (decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length) |
Finds every similar-looking match from a list of UTF-16 keywords in the cured string. Unlike decancer_find_utf16, this function also takes note of overlapping matches and merges them together. | |
DECANCER_EXPORT bool | decancer_matcher_next (decancer_matcher_t matcher, decancer_match_t *match) |
Iterates to the next element of a UTF-8 matcher iterator. | |
DECANCER_EXPORT bool | decancer_matcher_utf16_next (decancer_matcher_utf16_t matcher, decancer_match_t *match) |
Iterates to the next element of a UTF-16 matcher iterator. | |
DECANCER_EXPORT bool | decancer_censor (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char) |
Censors every similar-looking match of the specified UTF-8 encoded string. | |
DECANCER_EXPORT bool | decancer_censor_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char) |
Censors every similar-looking match of the specified UTF-16 encoded string. | |
DECANCER_EXPORT bool | decancer_replace (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size) |
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded string. | |
DECANCER_EXPORT bool | decancer_replace_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length) |
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encoded string. | |
DECANCER_EXPORT bool | decancer_censor_multiple (decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char) |
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor, this function also takes note of overlapping matches. | |
DECANCER_EXPORT bool | decancer_censor_multiple_utf16 (decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char) |
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_utf16, this function also takes note of overlapping matches. | |
DECANCER_EXPORT bool | decancer_replace_multiple (decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size) |
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encoded string. Unlike decancer_replace, this function also takes note of overlapping matches. | |
DECANCER_EXPORT bool | decancer_replace_multiple_utf16 (decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length) |
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 encoded string. Unlike decancer_replace_utf16, this function also takes note of overlapping matches. | |
DECANCER_EXPORT bool | decancer_contains (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size) |
Checks if the cured string similarly contains the specified UTF-8 encoded string. | |
DECANCER_EXPORT bool | decancer_contains_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length) |
Checks if the cured string similarly contains the specified UTF-16 encoded string. | |
DECANCER_EXPORT bool | decancer_starts_with (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size) |
Checks if the cured string similarly starts with the specified UTF-8 encoded string. | |
DECANCER_EXPORT bool | decancer_starts_with_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length) |
Checks if the cured string similarly starts with the specified UTF-16 encoded string. | |
DECANCER_EXPORT bool | decancer_ends_with (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size) |
Checks if the cured string similarly ends with the specified UTF-8 encoded string. | |
DECANCER_EXPORT bool | decancer_ends_with_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length) |
Checks if the cured string similarly ends with the specified UTF-16 encoded string. | |
DECANCER_EXPORT bool | decancer_equals (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size) |
Checks if the cured string is similar with the specified UTF-8 encoded string. | |
DECANCER_EXPORT bool | decancer_equals_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length) |
Checks if the cured string is similar with the specified UTF-16 encoded string. | |
DECANCER_EXPORT decancer_cured_raw_utf16_t | decancer_cured_raw_utf16_clone (decancer_cured_raw_utf16_t raw_utf16_handle) |
Clones the rust object created by decancer_cured_raw_utf16. | |
DECANCER_EXPORT void | decancer_cured_raw_utf16_free (decancer_cured_raw_utf16_t raw_utf16_handle) |
Frees the rust object created by decancer_cured_raw_utf16. | |
DECANCER_EXPORT decancer_matches_t | decancer_matcher_consume (decancer_matcher_t matcher) |
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object. | |
DECANCER_EXPORT void | decancer_matcher_free (decancer_matcher_t matcher) |
Frees the UTF-8 matcher iterator object created by decancer_find. | |
DECANCER_EXPORT decancer_matches_t | decancer_matcher_utf16_consume (decancer_matcher_utf16_t matcher) |
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches object. | |
DECANCER_EXPORT void | decancer_matcher_utf16_free (decancer_matcher_utf16_t matcher) |
Frees the UTF-16 matcher iterator object created by decancer_find_utf16. | |
DECANCER_EXPORT decancer_matches_t | decancer_matches_clone (decancer_matches_t matches) |
Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16. | |
DECANCER_EXPORT void | decancer_matches_free (decancer_matches_t matches) |
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16. | |
DECANCER_EXPORT void | decancer_translation_init (decancer_translation_t *translation) |
Initiates a newly created translation struct for use. | |
DECANCER_EXPORT void | decancer_translation_clone (const decancer_translation_t *translation_in, decancer_translation_t *translation_out) |
Clones the translation struct used in decancer_cure_char. | |
DECANCER_EXPORT void | decancer_translation_free (decancer_translation_t *translation) |
Frees the translation struct used in decancer_cure_char. | |
DECANCER_EXPORT decancer_cured_t | decancer_cured_clone (decancer_cured_t cured) |
Clones the cured string object created by decancer_cure and decancer_cure_utf16. | |
DECANCER_EXPORT void | decancer_cured_free (decancer_cured_t cured) |
Frees the cured string object created by decancer_cure and decancer_cure_utf16. | |
A library that removes common unicode confusables/homoglyphs from strings.
#define DECANCER_OPTION_ALL 0x1ffffff |
A configuration where every option is enabled.
#define DECANCER_OPTION_ALPHANUMERIC_ONLY (1 << 24) |
Removes all non-alphanumeric characters from the result.
#define DECANCER_OPTION_ASCII_ONLY (1 << 23) |
Removes all non-ASCII characters from the result.
#define DECANCER_OPTION_DEFAULT 0 |
Uses decancer's default options – AKA to be AS AGGRESSIVE AS POSSIBLE.
This makes decancer cures as much characters as possible and turns all the output characters to lowercase.
#define DECANCER_OPTION_DISABLE_BIDI (1 << 1) |
Prevents decancer from applying the Unicode Bidirectional Algorithm.
Use this ONLY when you don't expect any right-to-left characters. Enabling this option has no effect if it's called on decancer_cure_char.
#define DECANCER_OPTION_PURE_HOMOGLYPH 0x3ffffc |
Prevents decancer from curing characters from major foreign writing systems, including diacritics.
#define DECANCER_OPTION_RETAIN_ARABIC (1 << 6) |
Prevents decancer from curing all arabic characters.
#define DECANCER_OPTION_RETAIN_ARMENIAN (1 << 9) |
Prevents decancer from curing all armenian characters.
#define DECANCER_OPTION_RETAIN_BENGALI (1 << 8) |
Prevents decancer from curing all bengali characters.
#define DECANCER_OPTION_RETAIN_BRAILLE (1 << 20) |
Prevents decancer from curing all braille characters.
#define DECANCER_OPTION_RETAIN_BURMESE (1 << 14) |
Prevents decancer from curing all burmese characters.
#define DECANCER_OPTION_RETAIN_CAPITALIZATION (1 << 0) |
Prevents decancer from changing all characters to lowercase. Therefore, if the input character is in uppercase, the output character will be in uppercase as well.
#define DECANCER_OPTION_RETAIN_CHINESE (1 << 17) |
Prevents decancer from curing all chinese characters.
#define DECANCER_OPTION_RETAIN_CYRILLIC (1 << 4) |
Prevents decancer from curing all cyrillic characters.
#define DECANCER_OPTION_RETAIN_DEVANAGARI (1 << 7) |
Prevents decancer from curing all devanagari characters.
#define DECANCER_OPTION_RETAIN_DIACRITICS (1 << 2) |
Prevents decancer from curing characters WITH diacritics or accents.
#define DECANCER_OPTION_RETAIN_EMOJIS (1 << 21) |
Prevents decancer from curing all emojis.
#define DECANCER_OPTION_RETAIN_GREEK (1 << 3) |
Prevents decancer from curing all greek characters.
#define DECANCER_OPTION_RETAIN_GUJARATI (1 << 10) |
Prevents decancer from curing all gujarati characters.
#define DECANCER_OPTION_RETAIN_HEBREW (1 << 5) |
Prevents decancer from curing all hebrew characters.
#define DECANCER_OPTION_RETAIN_JAPANESE (1 << 18) |
Prevents decancer from curing all katakana and hiragana characters.
#define DECANCER_OPTION_RETAIN_KHMER (1 << 15) |
Prevents decancer from curing all khmer characters.
#define DECANCER_OPTION_RETAIN_KOREAN (1 << 19) |
Prevents decancer from curing all korean characters.
#define DECANCER_OPTION_RETAIN_LAO (1 << 13) |
Prevents decancer from curing all lao characters.
#define DECANCER_OPTION_RETAIN_MONGOLIAN (1 << 16) |
Prevents decancer from curing all mongolian characters.
#define DECANCER_OPTION_RETAIN_TAMIL (1 << 11) |
Prevents decancer from curing all tamil characters.
#define DECANCER_OPTION_RETAIN_THAI (1 << 12) |
Prevents decancer from curing all thai characters.
#define DECANCER_OPTION_RETAIN_TURKISH (1 << 22) |
Prevents decancer from curing all turkish characters.
#define DECANCER_TRANSLATION_KIND_CHARACTER 0 |
The translation is a single unicode character.
#define DECANCER_TRANSLATION_KIND_NONE 2 |
The translation is an empty string.
You can get this when the input character is a control character, surrogate, combining character (e.g diacritics), private use character, byte order character, or any invalid unicode value.
#define DECANCER_TRANSLATION_KIND_STRING 1 |
The translation is a string.
#define DECANCER_VERSION 0x030208 |
The current version used by this library.
#define DECANCER_VERSION_MAJOR ((DECANCER_VERSION & 0xff0000) >> 16) |
The current major version used by this library.
#define DECANCER_VERSION_MINOR ((DECANCER_VERSION & 0xff00) >> 8) |
The current minor version used by this library.
#define DECANCER_VERSION_PATCH (DECANCER_VERSION & 0xff) |
The current patch version used by this library.
typedef void* decancer_cured_raw_utf16_t |
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than retaining the lifetime of the returned UTF-16 pointer.
typedef void* decancer_cured_t |
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
typedef void* decancer_matcher_t |
Represents a UTF-8 matcher iterator object returned from decancer_find.
typedef void* decancer_matcher_utf16_t |
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
typedef void* decancer_matches_t |
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_utf16.
typedef uint32_t decancer_options_t |
An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions.
DECANCER_EXPORT bool decancer_censor | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size, | ||
const uint32_t | replacement_char ) |
Censors every similar-looking match of the specified UTF-8 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
replacement_char | The censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a). |
DECANCER_EXPORT bool decancer_censor_multiple | ( | decancer_cured_t | cured, |
const decancer_keyword_t * | other, | ||
const size_t | other_length, | ||
const uint32_t | replacement_char ) |
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor, this function also takes note of overlapping matches.
Example:
cured | The cured string object. |
other | A list of UTF-8 keywords to match with. |
other_length | Length of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t). |
replacement_char | The censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a). |
DECANCER_EXPORT bool decancer_censor_multiple_utf16 | ( | decancer_cured_t | cured, |
const decancer_keyword_utf16_t * | other, | ||
const size_t | other_length, | ||
const uint32_t | replacement_char ) |
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_utf16, this function also takes note of overlapping matches.
Example:
cured | The cured string object. |
other | A list of UTF-16 keywords to match with. |
other_length | Length of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t). |
replacement_char | The censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a). |
DECANCER_EXPORT bool decancer_censor_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length, | ||
const uint32_t | replacement_char ) |
Censors every similar-looking match of the specified UTF-16 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
replacement_char | The censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a). |
DECANCER_EXPORT bool decancer_contains | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size ) |
Checks if the cured string similarly contains the specified UTF-8 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
DECANCER_EXPORT bool decancer_contains_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length ) |
Checks if the cured string similarly contains the specified UTF-16 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT decancer_cured_t decancer_cure | ( | const uint8_t * | input_str, |
const size_t | input_size, | ||
const decancer_options_t | options, | ||
decancer_error_t * | error ) |
Cures a UTF-8 encoded string.
Example:
input_str | The UTF-8 encoded string. |
input_size | UTF-8 size of the input string, in bytes. |
options | Options to customize decancer's curing behavior. To use decancer's default behavior, pass in DECANCER_OPTION_DEFAULT. |
error | A pointer to a decancer_error_t struct. This pointer can be NULL if you want to ignore errors. |
DECANCER_EXPORT void decancer_cure_char | ( | const uint32_t | input, |
const decancer_options_t | options, | ||
decancer_translation_t * | translation ) |
Cures a single unicode codepoint.
Example:
input | The unicode codepoint. |
options | Options to customize decancer's curing behavior. To use decancer's default behavior, pass in DECANCER_OPTION_DEFAULT. |
translation | A pointer to the output translation struct. |
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16 | ( | const uint16_t * | input_str, |
const size_t | input_length, | ||
const decancer_options_t | options, | ||
decancer_error_t * | error ) |
Cures a UTF-16 encoded string.
Example:
input_str | The UTF-16 encoded string. |
input_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
options | Options to customize decancer's curing behavior. To use decancer's default behavior, pass in DECANCER_OPTION_DEFAULT. |
error | A pointer to a decancer_error_t struct. This pointer can be NULL if you want to ignore errors. |
DECANCER_EXPORT decancer_cured_t decancer_cured_clone | ( | decancer_cured_t | cured | ) |
Clones the cured string object created by decancer_cure and decancer_cure_utf16.
cured | The cured string object to clone. |
DECANCER_EXPORT void decancer_cured_free | ( | decancer_cured_t | cured | ) |
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
cured | The cured string object to free. |
DECANCER_EXPORT const uint8_t * decancer_cured_raw | ( | decancer_cured_t | cured, |
const decancer_match_t * | match, | ||
size_t * | output_size ) |
Retrieves the raw UTF-8 bytes from a cured string object.
Example:
cured | The cured string object. |
match | A pointer to a match object if you just want a slice, otherwise NULL if you want the entire string. |
output_size | A pointer to the output's UTF-8 size, in bytes. |
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16 | ( | decancer_cured_t | cured, |
const decancer_match_t * | match, | ||
uint16_t ** | output_ptr, | ||
size_t * | output_length ) |
Retrieves the raw UTF-16 bytes from a cured string object.
Example:
cured | The cured string object. |
match | A pointer to a match object if you just want a slice, otherwise NULL if you want the entire string. |
output_ptr | A pointer to the output's UTF-16 encoded string. |
output_length | A pointer to the length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone | ( | decancer_cured_raw_utf16_t | raw_utf16_handle | ) |
Clones the rust object created by decancer_cured_raw_utf16.
raw_utf16_handle | The rust object to clone. |
DECANCER_EXPORT void decancer_cured_raw_utf16_free | ( | decancer_cured_raw_utf16_t | raw_utf16_handle | ) |
Frees the rust object created by decancer_cured_raw_utf16.
raw_utf16_handle | The rust object to free. |
DECANCER_EXPORT bool decancer_ends_with | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size ) |
Checks if the cured string similarly ends with the specified UTF-8 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
DECANCER_EXPORT bool decancer_ends_with_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length ) |
Checks if the cured string similarly ends with the specified UTF-16 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT bool decancer_equals | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size ) |
Checks if the cured string is similar with the specified UTF-8 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
DECANCER_EXPORT bool decancer_equals_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length ) |
Checks if the cured string is similar with the specified UTF-16 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT decancer_matcher_t decancer_find | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size ) |
Finds every similar-looking match of a UTF-8 encoded string in the cured string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
DECANCER_EXPORT decancer_matches_t decancer_find_multiple | ( | decancer_cured_t | cured, |
const decancer_keyword_t * | other, | ||
const size_t | other_length ) |
Finds every similar-looking match from a list of UTF-8 keywords in the cured string. Unlike decancer_find, this function also takes note of overlapping matches and merges them together.
Example:
cured | The cured string object. |
other | A list of UTF-8 keywords to match with. |
other_length | Length of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t). |
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16 | ( | decancer_cured_t | cured, |
const decancer_keyword_utf16_t * | other, | ||
const size_t | other_length ) |
Finds every similar-looking match from a list of UTF-16 keywords in the cured string. Unlike decancer_find_utf16, this function also takes note of overlapping matches and merges them together.
Example:
cured | The cured string object. |
other | A list of UTF-16 keywords to match with. |
other_length | Length of the keywords array in units of decancer_keyword_utf16_t – or sizeof(array) / sizeof(decancer_keyword_utf16_t). |
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length ) |
Finds every similar-looking match of a UTF-16 encoded string in the cured string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume | ( | decancer_matcher_t | matcher | ) |
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
Example:
matcher | The UTF-8 matcher iterator object to consume. |
DECANCER_EXPORT void decancer_matcher_free | ( | decancer_matcher_t | matcher | ) |
Frees the UTF-8 matcher iterator object created by decancer_find.
matcher | The UTF-8 matcher iterator object to free. |
DECANCER_EXPORT bool decancer_matcher_next | ( | decancer_matcher_t | matcher, |
decancer_match_t * | match ) |
Iterates to the next element of a UTF-8 matcher iterator.
Example:
matcher | The UTF-8 matcher iterator object. |
match | A pointer to a decancer_match_t struct. |
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume | ( | decancer_matcher_utf16_t | matcher | ) |
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches object.
Example:
matcher | The UTF-16 matcher iterator object to consume. |
DECANCER_EXPORT void decancer_matcher_utf16_free | ( | decancer_matcher_utf16_t | matcher | ) |
Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
matcher | The UTF-16 matcher iterator object to free. |
DECANCER_EXPORT bool decancer_matcher_utf16_next | ( | decancer_matcher_utf16_t | matcher, |
decancer_match_t * | match ) |
Iterates to the next element of a UTF-16 matcher iterator.
Example:
matcher | The UTF-16 matcher iterator object. |
match | A pointer to a decancer_match_t struct. |
DECANCER_EXPORT decancer_matches_t decancer_matches_clone | ( | decancer_matches_t | matches | ) |
Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
matches | The matches object to clone. |
DECANCER_EXPORT void decancer_matches_free | ( | decancer_matches_t | matches | ) |
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
matches | The matches object to free. |
DECANCER_EXPORT const decancer_match_t * decancer_matches_raw | ( | decancer_matches_t | matches, |
size_t * | output_size ) |
Returns the raw list of every similar-looking match from a decancer_matches_t object.
Example:
matches | The matches object. |
output_size | A pointer to the output's array size. |
DECANCER_EXPORT bool decancer_replace | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size, | ||
const uint8_t * | replacement_str, | ||
const size_t | replacement_size ) |
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
replacement_str | The UTF-8 encoded string to replace with. |
replacement_size | UTF-8 size of the replacement string, in bytes. |
DECANCER_EXPORT bool decancer_replace_multiple | ( | decancer_cured_t | cured, |
const decancer_keyword_t * | other, | ||
const size_t | other_length, | ||
const uint8_t * | replacement_str, | ||
const size_t | replacement_size ) |
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encoded string. Unlike decancer_replace, this function also takes note of overlapping matches.
Example:
cured | The cured string object. |
other | A list of UTF-8 keywords to match with. |
other_length | Length of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t). |
replacement_str | The UTF-8 encoded string to replace with. |
replacement_size | UTF-8 size of the replacement string, in bytes. |
DECANCER_EXPORT bool decancer_replace_multiple_utf16 | ( | decancer_cured_t | cured, |
const decancer_keyword_utf16_t * | other, | ||
const size_t | other_length, | ||
const uint16_t * | replacement_str, | ||
const size_t | replacement_length ) |
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 encoded string. Unlike decancer_replace_utf16, this function also takes note of overlapping matches.
Example:
cured | The cured string object. |
other | A list of UTF-16 keywords to match with. |
other_length | Length of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t). |
replacement_str | The UTF-16 encoded string to replace with. |
replacement_length | Length of the replacement string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT bool decancer_replace_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length, | ||
const uint16_t * | replacement_str, | ||
const size_t | replacement_length ) |
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the search string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
replacement_str | The UTF-16 encoded string to replace with. |
replacement_length | Length of the replacement string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT bool decancer_starts_with | ( | decancer_cured_t | cured, |
const uint8_t * | other_str, | ||
const size_t | other_size ) |
Checks if the cured string similarly starts with the specified UTF-8 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-8 encoded string to match with. |
other_size | UTF-8 size of the other string, in bytes. |
DECANCER_EXPORT bool decancer_starts_with_utf16 | ( | decancer_cured_t | cured, |
const uint16_t * | other_str, | ||
const size_t | other_length ) |
Checks if the cured string similarly starts with the specified UTF-16 encoded string.
Example:
cured | The cured string object. |
other_str | The UTF-16 encoded string to match with. |
other_length | Length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t). |
DECANCER_EXPORT void decancer_translation_clone | ( | const decancer_translation_t * | translation_in, |
decancer_translation_t * | translation_out ) |
Clones the translation struct used in decancer_cure_char.
translation_in | A pointer to a translation struct to clone. |
translation_out | A pointer to the newly cloned translation struct. This struct doesn't have to be initiated with decancer_translation_init first. |
DECANCER_EXPORT void decancer_translation_free | ( | decancer_translation_t * | translation | ) |
Frees the translation struct used in decancer_cure_char.
translation | A pointer to a translation struct. |
DECANCER_EXPORT void decancer_translation_init | ( | decancer_translation_t * | translation | ) |
Initiates a newly created translation struct for use.
translation | A pointer to a translation struct bound for decancer_cure_char. |