decancer 3.2.8
A library that removes common unicode confusables/homoglyphs from strings.
 
Not such query exists :(
Functions
Type definitions
Macros
Loading...
Searching...
No Matches
/home/runner/work/decancer/decancer/bindings/native/decancer.h File Reference

A library that removes common unicode confusables/homoglyphs from strings. More...

Go to the source code of this file.

Data Structures

struct  decancer_error_t
 Represents an error caused by decancer not being able to cure a string. More...
 
struct  decancer_keyword_t
 Represents a UTF-8 encoded keyword. This struct is often used inside an array. More...
 
struct  decancer_keyword_utf16_t
 Represents a UTF-16 encoded keyword. This struct is often used inside an array. More...
 
struct  decancer_translation_t
 Represents a translation of a unicode codepoint. More...
 
struct  decancer_match_t
 Represents a match in UTF-8 indices. More...
 

Macros

#define DECANCER_VERSION   0x030208
 The current version used by this library.
 
#define DECANCER_VERSION_MAJOR   ((DECANCER_VERSION & 0xff0000) >> 16)
 The current major version used by this library.
 
#define DECANCER_VERSION_MINOR   ((DECANCER_VERSION & 0xff00) >> 8)
 The current minor version used by this library.
 
#define DECANCER_VERSION_PATCH   (DECANCER_VERSION & 0xff)
 The current patch version used by this library.
 
#define DECANCER_TRANSLATION_KIND_CHARACTER   0
 The translation is a single unicode character.
 
#define DECANCER_TRANSLATION_KIND_STRING   1
 The translation is a string.
 
#define DECANCER_TRANSLATION_KIND_NONE   2
 The translation is an empty string.
 
#define DECANCER_OPTION_DEFAULT   0
 Uses decancer's default options – AKA to be AS AGGRESSIVE AS POSSIBLE.
 
#define DECANCER_OPTION_RETAIN_CAPITALIZATION   (1 << 0)
 Prevents decancer from changing all characters to lowercase. Therefore, if the input character is in uppercase, the output character will be in uppercase as well.
 
#define DECANCER_OPTION_DISABLE_BIDI   (1 << 1)
 Prevents decancer from applying the Unicode Bidirectional Algorithm.
 
#define DECANCER_OPTION_RETAIN_DIACRITICS   (1 << 2)
 Prevents decancer from curing characters WITH diacritics or accents.
 
#define DECANCER_OPTION_RETAIN_GREEK   (1 << 3)
 Prevents decancer from curing all greek characters.
 
#define DECANCER_OPTION_RETAIN_CYRILLIC   (1 << 4)
 Prevents decancer from curing all cyrillic characters.
 
#define DECANCER_OPTION_RETAIN_HEBREW   (1 << 5)
 Prevents decancer from curing all hebrew characters.
 
#define DECANCER_OPTION_RETAIN_ARABIC   (1 << 6)
 Prevents decancer from curing all arabic characters.
 
#define DECANCER_OPTION_RETAIN_DEVANAGARI   (1 << 7)
 Prevents decancer from curing all devanagari characters.
 
#define DECANCER_OPTION_RETAIN_BENGALI   (1 << 8)
 Prevents decancer from curing all bengali characters.
 
#define DECANCER_OPTION_RETAIN_ARMENIAN   (1 << 9)
 Prevents decancer from curing all armenian characters.
 
#define DECANCER_OPTION_RETAIN_GUJARATI   (1 << 10)
 Prevents decancer from curing all gujarati characters.
 
#define DECANCER_OPTION_RETAIN_TAMIL   (1 << 11)
 Prevents decancer from curing all tamil characters.
 
#define DECANCER_OPTION_RETAIN_THAI   (1 << 12)
 Prevents decancer from curing all thai characters.
 
#define DECANCER_OPTION_RETAIN_LAO   (1 << 13)
 Prevents decancer from curing all lao characters.
 
#define DECANCER_OPTION_RETAIN_BURMESE   (1 << 14)
 Prevents decancer from curing all burmese characters.
 
#define DECANCER_OPTION_RETAIN_KHMER   (1 << 15)
 Prevents decancer from curing all khmer characters.
 
#define DECANCER_OPTION_RETAIN_MONGOLIAN   (1 << 16)
 Prevents decancer from curing all mongolian characters.
 
#define DECANCER_OPTION_RETAIN_CHINESE   (1 << 17)
 Prevents decancer from curing all chinese characters.
 
#define DECANCER_OPTION_RETAIN_JAPANESE   (1 << 18)
 Prevents decancer from curing all katakana and hiragana characters.
 
#define DECANCER_OPTION_RETAIN_KOREAN   (1 << 19)
 Prevents decancer from curing all korean characters.
 
#define DECANCER_OPTION_RETAIN_BRAILLE   (1 << 20)
 Prevents decancer from curing all braille characters.
 
#define DECANCER_OPTION_RETAIN_EMOJIS   (1 << 21)
 Prevents decancer from curing all emojis.
 
#define DECANCER_OPTION_RETAIN_TURKISH   (1 << 22)
 Prevents decancer from curing all turkish characters.
 
#define DECANCER_OPTION_ASCII_ONLY   (1 << 23)
 Removes all non-ASCII characters from the result.
 
#define DECANCER_OPTION_ALPHANUMERIC_ONLY   (1 << 24)
 Removes all non-alphanumeric characters from the result.
 
#define DECANCER_OPTION_ALL   0x1ffffff
 A configuration where every option is enabled.
 
#define DECANCER_OPTION_PURE_HOMOGLYPH   0x3ffffc
 Prevents decancer from curing characters from major foreign writing systems, including diacritics.
 

Typedefs

typedef void * decancer_cured_raw_utf16_t
 Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than retaining the lifetime of the returned UTF-16 pointer.
 
typedef void * decancer_matcher_t
 Represents a UTF-8 matcher iterator object returned from decancer_find.
 
typedef void * decancer_matcher_utf16_t
 Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
 
typedef void * decancer_matches_t
 Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_utf16.
 
typedef void * decancer_cured_t
 Represents a cured string returned from decancer_cure and decancer_cure_utf16.
 
typedef uint32_t decancer_options_t
 An unsigned 32-bit bitflags that lets you customize decancer's behavior in its curing functions.
 

Functions

DECANCER_EXPORT decancer_cured_t decancer_cure (const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
 Cures a UTF-8 encoded string.
 
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16 (const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
 Cures a UTF-16 encoded string.
 
DECANCER_EXPORT void decancer_cure_char (const uint32_t input, const decancer_options_t options, decancer_translation_t *translation)
 Cures a single unicode codepoint.
 
DECANCER_EXPORT const uint8_t * decancer_cured_raw (decancer_cured_t cured, const decancer_match_t *match, size_t *output_size)
 Retrieves the raw UTF-8 bytes from a cured string object.
 
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16 (decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length)
 Retrieves the raw UTF-16 bytes from a cured string object.
 
DECANCER_EXPORT const decancer_match_tdecancer_matches_raw (decancer_matches_t matches, size_t *output_size)
 Returns the raw list of every similar-looking match from a decancer_matches_t object.
 
DECANCER_EXPORT decancer_matcher_t decancer_find (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
 Finds every similar-looking match of a UTF-8 encoded string in the cured string.
 
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
 Finds every similar-looking match of a UTF-16 encoded string in the cured string.
 
DECANCER_EXPORT decancer_matches_t decancer_find_multiple (decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length)
 Finds every similar-looking match from a list of UTF-8 keywords in the cured string. Unlike decancer_find, this function also takes note of overlapping matches and merges them together.
 
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16 (decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length)
 Finds every similar-looking match from a list of UTF-16 keywords in the cured string. Unlike decancer_find_utf16, this function also takes note of overlapping matches and merges them together.
 
DECANCER_EXPORT bool decancer_matcher_next (decancer_matcher_t matcher, decancer_match_t *match)
 Iterates to the next element of a UTF-8 matcher iterator.
 
DECANCER_EXPORT bool decancer_matcher_utf16_next (decancer_matcher_utf16_t matcher, decancer_match_t *match)
 Iterates to the next element of a UTF-16 matcher iterator.
 
DECANCER_EXPORT bool decancer_censor (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char)
 Censors every similar-looking match of the specified UTF-8 encoded string.
 
DECANCER_EXPORT bool decancer_censor_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char)
 Censors every similar-looking match of the specified UTF-16 encoded string.
 
DECANCER_EXPORT bool decancer_replace (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size)
 Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded string.
 
DECANCER_EXPORT bool decancer_replace_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
 Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encoded string.
 
DECANCER_EXPORT bool decancer_censor_multiple (decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char)
 Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor, this function also takes note of overlapping matches.
 
DECANCER_EXPORT bool decancer_censor_multiple_utf16 (decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char)
 Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_utf16, this function also takes note of overlapping matches.
 
DECANCER_EXPORT bool decancer_replace_multiple (decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size)
 Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encoded string. Unlike decancer_replace, this function also takes note of overlapping matches.
 
DECANCER_EXPORT bool decancer_replace_multiple_utf16 (decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
 Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 encoded string. Unlike decancer_replace_utf16, this function also takes note of overlapping matches.
 
DECANCER_EXPORT bool decancer_contains (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
 Checks if the cured string similarly contains the specified UTF-8 encoded string.
 
DECANCER_EXPORT bool decancer_contains_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
 Checks if the cured string similarly contains the specified UTF-16 encoded string.
 
DECANCER_EXPORT bool decancer_starts_with (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
 Checks if the cured string similarly starts with the specified UTF-8 encoded string.
 
DECANCER_EXPORT bool decancer_starts_with_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
 Checks if the cured string similarly starts with the specified UTF-16 encoded string.
 
DECANCER_EXPORT bool decancer_ends_with (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
 Checks if the cured string similarly ends with the specified UTF-8 encoded string.
 
DECANCER_EXPORT bool decancer_ends_with_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
 Checks if the cured string similarly ends with the specified UTF-16 encoded string.
 
DECANCER_EXPORT bool decancer_equals (decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
 Checks if the cured string is similar with the specified UTF-8 encoded string.
 
DECANCER_EXPORT bool decancer_equals_utf16 (decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
 Checks if the cured string is similar with the specified UTF-16 encoded string.
 
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone (decancer_cured_raw_utf16_t raw_utf16_handle)
 Clones the rust object created by decancer_cured_raw_utf16.
 
DECANCER_EXPORT void decancer_cured_raw_utf16_free (decancer_cured_raw_utf16_t raw_utf16_handle)
 Frees the rust object created by decancer_cured_raw_utf16.
 
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume (decancer_matcher_t matcher)
 Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
 
DECANCER_EXPORT void decancer_matcher_free (decancer_matcher_t matcher)
 Frees the UTF-8 matcher iterator object created by decancer_find.
 
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume (decancer_matcher_utf16_t matcher)
 Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches object.
 
DECANCER_EXPORT void decancer_matcher_utf16_free (decancer_matcher_utf16_t matcher)
 Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
 
DECANCER_EXPORT decancer_matches_t decancer_matches_clone (decancer_matches_t matches)
 Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
 
DECANCER_EXPORT void decancer_matches_free (decancer_matches_t matches)
 Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
 
DECANCER_EXPORT void decancer_translation_init (decancer_translation_t *translation)
 Initiates a newly created translation struct for use.
 
DECANCER_EXPORT void decancer_translation_clone (const decancer_translation_t *translation_in, decancer_translation_t *translation_out)
 Clones the translation struct used in decancer_cure_char.
 
DECANCER_EXPORT void decancer_translation_free (decancer_translation_t *translation)
 Frees the translation struct used in decancer_cure_char.
 
DECANCER_EXPORT decancer_cured_t decancer_cured_clone (decancer_cured_t cured)
 Clones the cured string object created by decancer_cure and decancer_cure_utf16.
 
DECANCER_EXPORT void decancer_cured_free (decancer_cured_t cured)
 Frees the cured string object created by decancer_cure and decancer_cure_utf16.
 

Detailed Description

A library that removes common unicode confusables/homoglyphs from strings.

Author
null8626
Date
2024-10-29
Version
3.2.8

Macro Definition Documentation

◆ DECANCER_OPTION_ALL

#define DECANCER_OPTION_ALL   0x1ffffff

A configuration where every option is enabled.

Since
3.2.0

◆ DECANCER_OPTION_ALPHANUMERIC_ONLY

#define DECANCER_OPTION_ALPHANUMERIC_ONLY   (1 << 24)

Removes all non-alphanumeric characters from the result.

See also
DECANCER_OPTION_ASCII_ONLY
Since
3.2.0

◆ DECANCER_OPTION_ASCII_ONLY

#define DECANCER_OPTION_ASCII_ONLY   (1 << 23)

Removes all non-ASCII characters from the result.

See also
DECANCER_OPTION_ALPHANUMERIC_ONLY
Since
3.2.0

◆ DECANCER_OPTION_DEFAULT

#define DECANCER_OPTION_DEFAULT   0

Uses decancer's default options – AKA to be AS AGGRESSIVE AS POSSIBLE.

This makes decancer cures as much characters as possible and turns all the output characters to lowercase.

Since
3.0.0

◆ DECANCER_OPTION_DISABLE_BIDI

#define DECANCER_OPTION_DISABLE_BIDI   (1 << 1)

Prevents decancer from applying the Unicode Bidirectional Algorithm.

Use this ONLY when you don't expect any right-to-left characters. Enabling this option has no effect if it's called on decancer_cure_char.

See also
DECANCER_OPTION_RETAIN_ARABIC
DECANCER_OPTION_RETAIN_HEBREW
Warning
This speeds up the function call, but CAN BREAK right-to-left characters. It's highly recommended to also use DECANCER_OPTION_RETAIN_ARABIC and DECANCER_OPTION_RETAIN_HEBREW.
Since
3.0.0

◆ DECANCER_OPTION_PURE_HOMOGLYPH

#define DECANCER_OPTION_PURE_HOMOGLYPH   0x3ffffc

Prevents decancer from curing characters from major foreign writing systems, including diacritics.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_ARABIC

#define DECANCER_OPTION_RETAIN_ARABIC   (1 << 6)

Prevents decancer from curing all arabic characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_ARMENIAN

#define DECANCER_OPTION_RETAIN_ARMENIAN   (1 << 9)

Prevents decancer from curing all armenian characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_BENGALI

#define DECANCER_OPTION_RETAIN_BENGALI   (1 << 8)

Prevents decancer from curing all bengali characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_BRAILLE

#define DECANCER_OPTION_RETAIN_BRAILLE   (1 << 20)

Prevents decancer from curing all braille characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_BURMESE

#define DECANCER_OPTION_RETAIN_BURMESE   (1 << 14)

Prevents decancer from curing all burmese characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_CAPITALIZATION

#define DECANCER_OPTION_RETAIN_CAPITALIZATION   (1 << 0)

Prevents decancer from changing all characters to lowercase. Therefore, if the input character is in uppercase, the output character will be in uppercase as well.

Note
Many confusables are neither an uppercase or a lowercase character. Therefore, the decancer defaults to displaying the translation in LOWERCASE.
Since
3.0.0

◆ DECANCER_OPTION_RETAIN_CHINESE

#define DECANCER_OPTION_RETAIN_CHINESE   (1 << 17)

Prevents decancer from curing all chinese characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_CYRILLIC

#define DECANCER_OPTION_RETAIN_CYRILLIC   (1 << 4)

Prevents decancer from curing all cyrillic characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_DEVANAGARI

#define DECANCER_OPTION_RETAIN_DEVANAGARI   (1 << 7)

Prevents decancer from curing all devanagari characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_DIACRITICS

#define DECANCER_OPTION_RETAIN_DIACRITICS   (1 << 2)

Prevents decancer from curing characters WITH diacritics or accents.

Note
Decancer can still cure standalone diacritic characters, which is used in Zalgo texts.
Since
3.0.0

◆ DECANCER_OPTION_RETAIN_EMOJIS

#define DECANCER_OPTION_RETAIN_EMOJIS   (1 << 21)

Prevents decancer from curing all emojis.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_GREEK

#define DECANCER_OPTION_RETAIN_GREEK   (1 << 3)

Prevents decancer from curing all greek characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_GUJARATI

#define DECANCER_OPTION_RETAIN_GUJARATI   (1 << 10)

Prevents decancer from curing all gujarati characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_HEBREW

#define DECANCER_OPTION_RETAIN_HEBREW   (1 << 5)

Prevents decancer from curing all hebrew characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_JAPANESE

#define DECANCER_OPTION_RETAIN_JAPANESE   (1 << 18)

Prevents decancer from curing all katakana and hiragana characters.

See also
DECANCER_OPTION_RETAIN_CHINESE
Note
To also provent decancer from curing kanji characters, use DECANCER_OPTION_RETAIN_CHINESE.
Since
3.0.0

◆ DECANCER_OPTION_RETAIN_KHMER

#define DECANCER_OPTION_RETAIN_KHMER   (1 << 15)

Prevents decancer from curing all khmer characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_KOREAN

#define DECANCER_OPTION_RETAIN_KOREAN   (1 << 19)

Prevents decancer from curing all korean characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_LAO

#define DECANCER_OPTION_RETAIN_LAO   (1 << 13)

Prevents decancer from curing all lao characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_MONGOLIAN

#define DECANCER_OPTION_RETAIN_MONGOLIAN   (1 << 16)

Prevents decancer from curing all mongolian characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_TAMIL

#define DECANCER_OPTION_RETAIN_TAMIL   (1 << 11)

Prevents decancer from curing all tamil characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_THAI

#define DECANCER_OPTION_RETAIN_THAI   (1 << 12)

Prevents decancer from curing all thai characters.

Since
3.0.0

◆ DECANCER_OPTION_RETAIN_TURKISH

#define DECANCER_OPTION_RETAIN_TURKISH   (1 << 22)

Prevents decancer from curing all turkish characters.

See also
DECANCER_RETAIN_CAPITALIZATION
Note
To also prevent decancer from curing the uppercase dotted i character (Δ°), use DECANCER_RETAIN_CAPITALIZATION.
Since
3.2.4

◆ DECANCER_TRANSLATION_KIND_CHARACTER

#define DECANCER_TRANSLATION_KIND_CHARACTER   0

The translation is a single unicode character.

Since
1.5.4

◆ DECANCER_TRANSLATION_KIND_NONE

#define DECANCER_TRANSLATION_KIND_NONE   2

The translation is an empty string.

You can get this when the input character is a control character, surrogate, combining character (e.g diacritics), private use character, byte order character, or any invalid unicode value.

Since
1.5.4

◆ DECANCER_TRANSLATION_KIND_STRING

#define DECANCER_TRANSLATION_KIND_STRING   1

The translation is a string.

Since
1.5.4

◆ DECANCER_VERSION

#define DECANCER_VERSION   0x030208

The current version used by this library.

See also
DECANCER_VERSION_MAJOR
DECANCER_VERSION_MINOR
DECANCER_VERSION_PATCH
Since
3.2.2

◆ DECANCER_VERSION_MAJOR

#define DECANCER_VERSION_MAJOR   ((DECANCER_VERSION & 0xff0000) >> 16)

The current major version used by this library.

See also
DECANCER_VERSION
DECANCER_VERSION_MINOR
DECANCER_VERSION_PATCH
Since
3.2.2

◆ DECANCER_VERSION_MINOR

#define DECANCER_VERSION_MINOR   ((DECANCER_VERSION & 0xff00) >> 8)

The current minor version used by this library.

See also
DECANCER_VERSION
DECANCER_VERSION_MAJOR
DECANCER_VERSION_PATCH
Since
3.2.2

◆ DECANCER_VERSION_PATCH

#define DECANCER_VERSION_PATCH   (DECANCER_VERSION & 0xff)

The current patch version used by this library.

See also
DECANCER_VERSION
DECANCER_VERSION_MAJOR
DECANCER_VERSION_MINOR
Since
3.2.2

Typedef Documentation

◆ decancer_cured_raw_utf16_t

Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than retaining the lifetime of the returned UTF-16 pointer.

See also
decancer_cured_raw_utf16
decancer_cured_raw_utf16_clone
decancer_cured_raw_utf16_free
Note
You are responsible in freeing this object later by calling decancer_cured_raw_utf16_free.
Since
3.2.2

◆ decancer_cured_t

typedef void* decancer_cured_t

Represents a cured string returned from decancer_cure and decancer_cure_utf16.

See also
decancer_cure
decancer_cure_utf16
decancer_cured_clone
decancer_cured_free
Note
You are responsible in freeing this object later by calling decancer_cured_free.
Since
1.5.3

◆ decancer_matcher_t

typedef void* decancer_matcher_t

Represents a UTF-8 matcher iterator object returned from decancer_find.

See also
decancer_matcher_utf16_t
decancer_find
decancer_matcher_next
decancer_matcher_consume
decancer_matcher_free
Note
For its UTF-16 counterpart, see decancer_matcher_utf16_t.
You are responsible in freeing this object later by calling decancer_matcher_free.
Since
3.1.0

◆ decancer_matcher_utf16_t

typedef void* decancer_matcher_utf16_t

Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.

See also
decancer_matcher_t
decancer_find_utf16
decancer_matcher_utf16_consume
decancer_matcher_utf16_next
decancer_matcher_utf16_free
Note
For its UTF-8 counterpart, see decancer_matcher_t.
You are responsible in freeing this object later by calling decancer_matcher_utf16_free.
Since
3.2.2

◆ decancer_matches_t

typedef void* decancer_matches_t

Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_utf16.

See also
decancer_find_multiple
decancer_find_multiple_utf16
decancer_matcher_consume
decancer_matcher_utf16_consume
decancer_matches_clone
decancer_matches_free
Note
You are responsible in freeing this object later by calling decancer_matches_free.
Since
3.1.1

◆ decancer_options_t

Function Documentation

◆ decancer_censor()

DECANCER_EXPORT bool decancer_censor ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size,
const uint32_t replacement_char )

Censors every similar-looking match of the specified UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
size_t raw_contents_size;
const uint8_t* raw_contents;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
// 0x2a is the ASCII representation of '*'
decancer_assert(decancer_censor(cured, "funny", 5, 0x2a), "decancer_censor");
raw_contents = decancer_cured_raw(cured, NULL, &raw_contents_size);
decancer_assert(!memcmp(raw_contents, "very ***** text", raw_contents_size), "censor result");
END:
return ret;
}
A library that removes common unicode confusables/homoglyphs from strings.
DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured)
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
#define DECANCER_OPTION_DEFAULT
Uses decancer's default options – AKA to be AS AGGRESSIVE AS POSSIBLE.
Definition decancer.h:90
void * decancer_cured_t
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
Definition decancer.h:498
DECANCER_EXPORT bool decancer_censor(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-8 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-8 encoded string.
DECANCER_EXPORT const uint8_t * decancer_cured_raw(decancer_cured_t cured, const decancer_match_t *match, size_t *output_size)
Retrieves the raw UTF-8 bytes from a cured string object.
Represents an error caused by decancer not being able to cure a string.
Definition decancer.h:289
uint8_t message_length
The length of the error message.
Definition decancer.h:300
const char * message
Null-terminated ASCII encoded error message.
Definition decancer.h:294
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
replacement_charThe censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a).
See also
decancer_censor_utf16
decancer_censor_multiple
decancer_censor_multiple_utf16
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-16 counterpart, see decancer_censor_utf16.
Since
3.1.1

◆ decancer_censor_multiple()

DECANCER_EXPORT bool decancer_censor_multiple ( decancer_cured_t cured,
const decancer_keyword_t * other,
const size_t other_length,
const uint32_t replacement_char )

Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor, this function also takes note of overlapping matches.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_keyword_t keywords[] = {
{"very", 4},
{"funny", 5}
};
size_t raw_contents_size;
const uint8_t* raw_contents;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
// 0x2a is the ASCII representation of '*'
decancer_assert(decancer_censor_multiple(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_t), 0x2a), "decancer_censor_multiple");
raw_contents = decancer_cured_raw(cured, NULL, &raw_contents_size);
decancer_assert(!memcmp(raw_contents, "**** ***** text", raw_contents_size), "censor multiple result");
END:
return ret;
}
DECANCER_EXPORT bool decancer_censor_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-8 keywords. Unlike decancer_censor,...
Represents a UTF-8 encoded keyword. This struct is often used inside an array.
Definition decancer.h:318
Parameters
curedThe cured string object.
otherA list of UTF-8 keywords to match with.
other_lengthLength of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t).
replacement_charThe censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a).
See also
decancer_censor
decancer_censor_utf16
decancer_censor_multiple_utf16
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-16 counterpart, see decancer_censor_multiple_utf16.
Since
3.1.1

◆ decancer_censor_multiple_utf16()

DECANCER_EXPORT bool decancer_censor_multiple_utf16 ( decancer_cured_t cured,
const decancer_keyword_utf16_t * other,
const size_t other_length,
const uint32_t replacement_char )

Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_utf16, this function also takes note of overlapping matches.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "very"
uint16_t very[] = { 0x76, 0x65, 0x72, 0x79 };
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
// UTF-16 bytes for "**** ***** text"
uint16_t expected_contents[] = { 0x2a, 0x2a, 0x2a, 0x2a, 0x20, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x20, 0x74, 0x65, 0x78, 0x74 };
decancer_keyword_utf16_t keywords[] = {
{very, sizeof(very) / sizeof(uint16_t)},
{funny, sizeof(funny) / sizeof(uint16_t)}
};
size_t raw_contents_length;
uint16_t* raw_contents;
decancer_cured_raw_utf16_t raw_contents_handle;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
// 0x2a is the ASCII representation of '*'
decancer_assert(decancer_censor_multiple_utf16(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_utf16_t), 0x2a), "decancer_censor_multiple_utf16", CURED_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, NULL, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(expected_contents) / sizeof(uint16_t)), "length of censor multiple result", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, expected_contents, sizeof(expected_contents)), "censor multiple result", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
CURED_END:
return ret;
}
DECANCER_EXPORT void decancer_cured_raw_utf16_free(decancer_cured_raw_utf16_t raw_utf16_handle)
Frees the rust object created by decancer_cured_raw_utf16.
DECANCER_EXPORT bool decancer_censor_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified list of UTF-16 keywords. Unlike decancer_censor_...
DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16(decancer_cured_t cured, const decancer_match_t *match, uint16_t **output_ptr, size_t *output_length)
Retrieves the raw UTF-16 bytes from a cured string object.
void * decancer_cured_raw_utf16_t
Represents a rust object returned from decancer_cured_raw_utf16. This value has no use other than ret...
Definition decancer.h:370
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-16 encoded string.
Represents a UTF-16 encoded keyword. This struct is often used inside an array.
Definition decancer.h:347
Parameters
curedThe cured string object.
otherA list of UTF-16 keywords to match with.
other_lengthLength of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t).
replacement_charThe censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a).
See also
decancer_censor
decancer_censor_utf16
decancer_censor_multiple
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-8 counterpart, see decancer_censor_multiple.
Since
3.2.2

◆ decancer_censor_utf16()

DECANCER_EXPORT bool decancer_censor_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length,
const uint32_t replacement_char )

Censors every similar-looking match of the specified UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
// UTF-16 bytes for "very ***** text"
uint16_t expected_contents[] = { 0x76, 0x65, 0x72, 0x79, 0x20, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x20, 0x74, 0x65, 0x78, 0x74 };
size_t raw_contents_length;
uint16_t* raw_contents;
decancer_cured_raw_utf16_t raw_contents_handle;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
// 0x2a is the ASCII representation of '*'
decancer_assert(decancer_censor_utf16(cured, funny, sizeof(funny) / sizeof(uint16_t), 0x2a), "decancer_censor_utf16", CURED_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, NULL, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(expected_contents) / sizeof(uint16_t)), "length of censor result", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, expected_contents, sizeof(expected_contents)), "censor result", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
CURED_END:
return ret;
}
DECANCER_EXPORT bool decancer_censor_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint32_t replacement_char)
Censors every similar-looking match of the specified UTF-16 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
replacement_charThe censor unicode codepoint. Ideally '*' (0x2a) or '-' (0x2a).
See also
decancer_censor
decancer_censor_multiple
decancer_censor_multiple_utf16
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-8 counterpart, see decancer_censor.
Since
3.2.2

◆ decancer_contains()

DECANCER_EXPORT bool decancer_contains ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size )

Checks if the cured string similarly contains the specified UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_contains(cured, "funny", 5), "decancer_contains");
END:
return ret;
}
DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly contains the specified UTF-8 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
See also
decancer_contains_utf16
Returns
bool true if the cured string similarly contains the specified string, false otherwise.
Note
For its UTF-16 counterpart, see decancer_contains_utf16.
Since
1.5.3

◆ decancer_contains_utf16()

DECANCER_EXPORT bool decancer_contains_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length )

Checks if the cured string similarly contains the specified UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_contains_utf16(cured, funny, sizeof(funny) / sizeof(uint16_t)), "decancer_contains_utf16");
END:
return ret;
}
DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly contains the specified UTF-16 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_contains
Returns
bool true if the cured string similarly contains the specified string, false otherwise.
Note
For its UTF-8 counterpart, see decancer_contains.
Since
3.2.2

◆ decancer_cure()

DECANCER_EXPORT decancer_cured_t decancer_cure ( const uint8_t * input_str,
const size_t input_size,
const decancer_options_t options,
decancer_error_t * error )

Cures a UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
int main(void) {
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
return 0;
}
Parameters
input_strThe UTF-8 encoded string.
input_sizeUTF-8 size of the input string, in bytes.
optionsOptions to customize decancer's curing behavior. To use decancer's default behavior, pass in DECANCER_OPTION_DEFAULT.
errorA pointer to a decancer_error_t struct. This pointer can be NULL if you want to ignore errors.
See also
decancer_cure_utf16
decancer_cure_char
decancer_cured_clone
decancer_cured_free
Returns
decancer_cured_t The cured string object or NULL failure – see the modified error struct for more details.
Note
For its UTF-16 counterpart, see decancer_cure_utf16.
You are responsible in freeing the returned object later by calling decancer_cured_free.
Since
1.5.3

◆ decancer_cure_char()

DECANCER_EXPORT void decancer_cure_char ( const uint32_t input,
const decancer_options_t options,
decancer_translation_t * translation )

Cures a single unicode codepoint.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
decancer_assert(translation.kind == DECANCER_TRANSLATION_KIND_CHARACTER, "translation not a character");
decancer_assert(translation.contents.character == 0x65, "character translation contents");
decancer_assert(translation.kind == DECANCER_TRANSLATION_KIND_STRING, "translation not a string");
decancer_assert(translation.contents.string.size == 2, "string translation size");
decancer_assert(translation.contents.string.contents[0] == 'a' && translation.contents.string.contents[1] == 'e', "string translation contents");
decancer_assert(translation.kind == DECANCER_TRANSLATION_KIND_NONE, "translation not an empty string");
END:
return ret;
}
#define DECANCER_TRANSLATION_KIND_STRING
The translation is a string.
Definition decancer.h:74
DECANCER_EXPORT void decancer_translation_init(decancer_translation_t *translation)
Initiates a newly created translation struct for use.
#define DECANCER_TRANSLATION_KIND_CHARACTER
The translation is a single unicode character.
Definition decancer.h:68
#define DECANCER_TRANSLATION_KIND_NONE
The translation is an empty string.
Definition decancer.h:82
DECANCER_EXPORT void decancer_translation_free(decancer_translation_t *translation)
Frees the translation struct used in decancer_cure_char.
DECANCER_EXPORT void decancer_cure_char(const uint32_t input, const decancer_options_t options, decancer_translation_t *translation)
Cures a single unicode codepoint.
Represents a translation of a unicode codepoint.
Definition decancer.h:440
uint8_t kind
The type of the translation result. This can be any of the following values:
Definition decancer.h:448
const uint8_t * contents
Raw UTF-8 encoded string.
Definition decancer.h:470
Parameters
inputThe unicode codepoint.
optionsOptions to customize decancer's curing behavior. To use decancer's default behavior, pass in DECANCER_OPTION_DEFAULT.
translationA pointer to the output translation struct.
See also
decancer_cure
decancer_cure_utf16
decancer_translation_init
decancer_translation_clone
decancer_translation_free
Note
You are responsible in freeing the translation struct later by passing it as a pointer to decancer_translation_free.
Warning
You MUST pass the translation struct to decancer_translation_init first before using this function. Not doing so could result in possible undefined behavior.
Since
1.5.4

◆ decancer_cure_utf16()

DECANCER_EXPORT decancer_cured_t decancer_cure_utf16 ( const uint16_t * input_str,
const size_t input_length,
const decancer_options_t options,
decancer_error_t * error )

Cures a UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
int main(void) {
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
return 0;
}
Parameters
input_strThe UTF-16 encoded string.
input_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
optionsOptions to customize decancer's curing behavior. To use decancer's default behavior, pass in DECANCER_OPTION_DEFAULT.
errorA pointer to a decancer_error_t struct. This pointer can be NULL if you want to ignore errors.
See also
decancer_cure
decancer_cure_char
decancer_cured_clone
decancer_cured_free
Returns
decancer_cured_t The cured string object or NULL failure – see the modified error struct for more details.
Note
For its UTF-8 counterpart, see decancer_cure.
You are responsible in freeing the returned object later by calling decancer_cured_free.
Since
3.2.2

◆ decancer_cured_clone()

DECANCER_EXPORT decancer_cured_t decancer_cured_clone ( decancer_cured_t cured)

Clones the cured string object created by decancer_cure and decancer_cure_utf16.

Parameters
curedThe cured string object to clone.
Returns
decancer_cured_t The newly cloned cured string object.
See also
decancer_cure
decancer_cure_utf16
decancer_cured_free
Note
You are responsible in freeing the returned object later by calling decancer_cured_free.
Since
3.2.2

◆ decancer_cured_free()

DECANCER_EXPORT void decancer_cured_free ( decancer_cured_t cured)

Frees the cured string object created by decancer_cure and decancer_cure_utf16.

Parameters
curedThe cured string object to free.
See also
decancer_cure
decancer_cure_utf16
decancer_cured_clone
Since
3.1.0

◆ decancer_cured_raw()

DECANCER_EXPORT const uint8_t * decancer_cured_raw ( decancer_cured_t cured,
const decancer_match_t * match,
size_t * output_size )

Retrieves the raw UTF-8 bytes from a cured string object.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
size_t raw_contents_size;
const uint8_t* raw_contents = decancer_cured_raw(cured, NULL, &raw_contents_size);
decancer_assert(raw_contents_size == 15, "size of very funny text");
decancer_assert(!memcmp(raw_contents, "very funny text", raw_contents_size), "contents of very funny text");
END:
return ret;
}
Parameters
curedThe cured string object.
matchA pointer to a match object if you just want a slice, otherwise NULL if you want the entire string.
output_sizeA pointer to the output's UTF-8 size, in bytes.
Returns
const uint8_t* An immutable UTF-8 pointer representing raw contents of the cured string object.
See also
decancer_cured_raw_utf16
Note
For its UTF-16 counterpart, see decancer_cured_raw_utf16.
The returned pointer remains valid until cured gets passed onto decancer_cured_free.
Since
3.1.1

◆ decancer_cured_raw_utf16()

DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16 ( decancer_cured_t cured,
const decancer_match_t * match,
uint16_t ** output_ptr,
size_t * output_length )

Retrieves the raw UTF-16 bytes from a cured string object.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "very funny text"
uint16_t expected_contents[] = { 0x76, 0x65, 0x72, 0x79, 0x20, 0x66, 0x75, 0x6e, 0x6e, 0x79, 0x20, 0x74, 0x65, 0x78, 0x74 };
decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
size_t raw_contents_length;
uint16_t* raw_contents;
decancer_cured_raw_utf16_t raw_contents_handle = decancer_cured_raw_utf16(cured, NULL, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(expected_contents) / sizeof(uint16_t)), "length of very funny text");
decancer_assert(!memcmp(raw_contents, expected_contents, sizeof(expected_contents)), "contents of very funny text");
END:
decancer_cured_raw_utf16_free(raw_contents_handle);
return ret;
}
Parameters
curedThe cured string object.
matchA pointer to a match object if you just want a slice, otherwise NULL if you want the entire string.
output_ptrA pointer to the output's UTF-16 encoded string.
output_lengthA pointer to the length of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_cured_raw
decancer_cured_raw_utf16_clone
decancer_cured_raw_utf16_free
Returns
decancer_cured_raw_utf16_t A rust object. This value has no use other than retaining the lifetime of the returned UTF-16 pointer.
Note
For its UTF-8 counterpart, see decancer_cured_raw.
You are responsible in freeing the returned object later by calling decancer_cured_raw_utf16_free.
The lifetime of the UTF-16 encoded string remains valid until the returned object gets passed onto decancer_cured_raw_utf16_free.
Since
3.2.2

◆ decancer_cured_raw_utf16_clone()

DECANCER_EXPORT decancer_cured_raw_utf16_t decancer_cured_raw_utf16_clone ( decancer_cured_raw_utf16_t raw_utf16_handle)

Clones the rust object created by decancer_cured_raw_utf16.

Parameters
raw_utf16_handleThe rust object to clone.
Returns
decancer_cured_raw_utf16_t The newly cloned rust object.
See also
decancer_cured_raw_utf16
decancer_cured_raw_utf16_free
Note
You are responsible in freeing the returned object later by calling decancer_cured_raw_utf16_free.
Since
3.2.2

◆ decancer_cured_raw_utf16_free()

DECANCER_EXPORT void decancer_cured_raw_utf16_free ( decancer_cured_raw_utf16_t raw_utf16_handle)

Frees the rust object created by decancer_cured_raw_utf16.

Parameters
raw_utf16_handleThe rust object to free.
See also
decancer_cured_raw_utf16
decancer_cured_raw_utf16_clone
Since
3.2.2

◆ decancer_ends_with()

DECANCER_EXPORT bool decancer_ends_with ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size )

Checks if the cured string similarly ends with the specified UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_ends_with(cured, "text", 4), "decancer_ends_with");
END:
return ret;
}
DECANCER_EXPORT bool decancer_ends_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly ends with the specified UTF-8 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
See also
decancer_ends_with_utf16
Returns
bool true if the cured string similarly ends with the specified string, false otherwise.
Note
For its UTF-16 counterpart, see decancer_ends_with_utf16.
Since
1.5.3

◆ decancer_ends_with_utf16()

DECANCER_EXPORT bool decancer_ends_with_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length )

Checks if the cured string similarly ends with the specified UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "text"
uint16_t text[] = { 0x74, 0x65, 0x78, 0x74 };
decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_ends_with_utf16(cured, text, sizeof(text) / sizeof(uint16_t)), "decancer_ends_with_utf16");
END:
return ret;
}
DECANCER_EXPORT bool decancer_ends_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly ends with the specified UTF-16 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_ends_with
Returns
bool true if the cured string similarly ends with the specified string, false otherwise.
Note
For its UTF-8 counterpart, see decancer_ends_with.
Since
3.2.2

◆ decancer_equals()

DECANCER_EXPORT bool decancer_equals ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size )

Checks if the cured string is similar with the specified UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_equals(cured, "very funny text", 15), "decancer_equals");
END:
return ret;
}
DECANCER_EXPORT bool decancer_equals(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string is similar with the specified UTF-8 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
See also
decancer_equals_utf16
Returns
bool true if the cured string is similar with the specified string, false otherwise.
Note
For its UTF-16 counterpart, see decancer_equals_utf16.
Since
1.5.3

◆ decancer_equals_utf16()

DECANCER_EXPORT bool decancer_equals_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length )

Checks if the cured string is similar with the specified UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "very funny text"
uint16_t expected_contents[] = { 0x76, 0x65, 0x72, 0x79, 0x20, 0x66, 0x75, 0x6e, 0x6e, 0x79, 0x20, 0x74, 0x65, 0x78, 0x74 };
decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_equals_utf16(cured, expected_contents, sizeof(expected_contents) / sizeof(uint16_t)), "decancer_equals_utf16");
END:
return ret;
}
DECANCER_EXPORT bool decancer_equals_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string is similar with the specified UTF-16 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_equals
Returns
bool true if the cured string is similar with the specified string, false otherwise.
Note
For its UTF-8 counterpart, see decancer_equals.
Since
3.2.2

◆ decancer_find()

DECANCER_EXPORT decancer_matcher_t decancer_find ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size )

Finds every similar-looking match of a UTF-8 encoded string in the cured string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
size_t raw_contents_size;
const uint8_t* raw_contents;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matcher = decancer_find(cured, "funny", 5);
decancer_assert(matcher != NULL, "decancer_find", CURED_END);
decancer_assert(decancer_matcher_next(matcher, &match), "first iteration of decancer_matcher_next", MATCHER_END);
decancer_assert(match.start == 5, "start of funny", MATCHER_END);
decancer_assert(match.end == 10, "end of funny", MATCHER_END);
raw_contents = decancer_cured_raw(cured, &match, &raw_contents_size);
decancer_assert(raw_contents_size == 5, "size of funny", MATCHER_END);
decancer_assert(!memcmp(raw_contents, "funny", raw_contents_size), "contents of funny", MATCHER_END);
decancer_assert(!decancer_matcher_next(matcher, &match), "no more matches", MATCHER_END);
MATCHER_END:
CURED_END:
return ret;
}
DECANCER_EXPORT decancer_matcher_t decancer_find(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Finds every similar-looking match of a UTF-8 encoded string in the cured string.
void * decancer_matcher_t
Represents a UTF-8 matcher iterator object returned from decancer_find.
Definition decancer.h:384
DECANCER_EXPORT bool decancer_matcher_next(decancer_matcher_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-8 matcher iterator.
DECANCER_EXPORT void decancer_matcher_free(decancer_matcher_t matcher)
Frees the UTF-8 matcher iterator object created by decancer_find.
Represents a match in UTF-8 indices.
Definition decancer.h:516
size_t end
End of the match in UTF-8 indices (non-inclusive).
Definition decancer.h:527
size_t start
Start of the match in UTF-8 indices.
Definition decancer.h:521
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
See also
decancer_find_utf16
decancer_find_multiple
decancer_find_multiple_utf16
decancer_matcher_consume
decancer_matcher_next
decancer_matcher_free
Returns
decancer_matcher_t A UTF-8 matcher iterator object or NULL if the other string is not properly UTF-8 encoded.
Note
For its UTF-16 counterpart, see decancer_find_utf16.
You are responsible in freeing the returned object later by calling decancer_matcher_free.
Since
3.1.0

◆ decancer_find_multiple()

DECANCER_EXPORT decancer_matches_t decancer_find_multiple ( decancer_cured_t cured,
const decancer_keyword_t * other,
const size_t other_length )

Finds every similar-looking match from a list of UTF-8 keywords in the cured string. Unlike decancer_find, this function also takes note of overlapping matches and merges them together.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_keyword_t keywords[] = {
{"very", 4},
{"funny", 5}
};
size_t raw_matches_length;
const decancer_match_t* raw_matches;
const uint8_t* raw_contents;
size_t raw_contents_size;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matches = decancer_find_multiple(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_t));
decancer_assert(matches != NULL, "decancer_find_multiple", CURED_END);
raw_matches = decancer_matches_raw(matches, &raw_matches_length);
decancer_assert(raw_matches_length == 2, "raw_matches_length", MATCHES_END);
decancer_assert(raw_matches[0].start == 0, "start of very", MATCHES_END);
decancer_assert(raw_matches[0].end == 4, "end of very", MATCHES_END);
raw_contents = decancer_cured_raw(cured, &raw_matches[0], &raw_contents_size);
decancer_assert(raw_contents_size == 4, "size of very", MATCHES_END);
decancer_assert(!memcmp(raw_contents, "very", raw_contents_size), "contents of very", MATCHES_END);
decancer_assert(raw_matches[1].start == 5, "start of funny", MATCHES_END);
decancer_assert(raw_matches[1].end == 10, "end of funny", MATCHES_END);
raw_contents = decancer_cured_raw(cured, &raw_matches[1], &raw_contents_size);
decancer_assert(raw_contents_size == 5, "size of funny", MATCHES_END);
decancer_assert(!memcmp(raw_contents, "funny", raw_contents_size), "contents of funny", MATCHES_END);
MATCHES_END:
CURED_END:
return ret;
}
DECANCER_EXPORT const decancer_match_t * decancer_matches_raw(decancer_matches_t matches, size_t *output_size)
Returns the raw list of every similar-looking match from a decancer_matches_t object.
DECANCER_EXPORT decancer_matches_t decancer_find_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-8 keywords in the cured string....
DECANCER_EXPORT void decancer_matches_free(decancer_matches_t matches)
Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.
void * decancer_matches_t
Represents a matcher iterator object returned from decancer_find_multiple and decancer_find_multiple_...
Definition decancer.h:412
Parameters
curedThe cured string object.
otherA list of UTF-8 keywords to match with.
other_lengthLength of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t).
See also
decancer_find
decancer_find_utf16
decancer_find_multiple_utf16
decancer_matcher_consume
decancer_matcher_utf16_consume
decancer_matches_clone
decancer_matches_free
Returns
decancer_matches_t A matches object or NULL if the keywords are not properly UTF-8 encoded.
Note
For its UTF-16 counterpart, see decancer_find_multiple_utf16.
You are responsible in freeing the returned object later by calling decancer_matches_free.
Since
3.1.1

◆ decancer_find_multiple_utf16()

DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16 ( decancer_cured_t cured,
const decancer_keyword_utf16_t * other,
const size_t other_length )

Finds every similar-looking match from a list of UTF-16 keywords in the cured string. Unlike decancer_find_utf16, this function also takes note of overlapping matches and merges them together.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "very"
uint16_t very[] = { 0x76, 0x65, 0x72, 0x79 };
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
decancer_keyword_utf16_t keywords[] = {
{very, sizeof(very) / sizeof(uint16_t)},
{funny, sizeof(funny) / sizeof(uint16_t)}
};
size_t raw_matches_length;
const decancer_match_t* raw_matches;
decancer_cured_raw_utf16_t raw_contents_handle;
uint16_t* raw_contents;
size_t raw_contents_length;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matches = decancer_find_multiple_utf16(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_utf16_t));
decancer_assert(matches != NULL, "decancer_find_multiple_utf16", CURED_END);
raw_matches = decancer_matches_raw(matches, &raw_matches_length);
decancer_assert(raw_matches_length == 2, "raw_matches_length", MATCHES_END);
decancer_assert(raw_matches[0].start == 0, "start of very", MATCHES_END);
decancer_assert(raw_matches[0].end == 4, "end of very", MATCHES_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, &raw_matches[0], &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(very) / sizeof(uint16_t)), "length of very", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, very, sizeof(very)), "contents of very", RAW_CONTENTS_END);
decancer_assert(raw_matches[1].start == 5, "start of funny", RAW_CONTENTS_END);
decancer_assert(raw_matches[1].end == 10, "end of funny", RAW_CONTENTS_END);
decancer_cured_raw_utf16_free(raw_contents_handle);
raw_contents_handle = decancer_cured_raw_utf16(cured, &raw_matches[1], &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(funny) / sizeof(uint16_t)), "length of funny", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, funny, sizeof(funny)), "contents of funny", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
MATCHES_END:
CURED_END:
return ret;
}
DECANCER_EXPORT decancer_matches_t decancer_find_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length)
Finds every similar-looking match from a list of UTF-16 keywords in the cured string....
Parameters
curedThe cured string object.
otherA list of UTF-16 keywords to match with.
other_lengthLength of the keywords array in units of decancer_keyword_utf16_t – or sizeof(array) / sizeof(decancer_keyword_utf16_t).
See also
decancer_find
decancer_find_utf16
decancer_matcher_consume
decancer_matcher_utf16_consume
decancer_matches_clone
decancer_matches_free
Returns
decancer_matches_t A matches object or NULL if the keywords are not properly UTF-8 encoded.
Note
For its UTF-8 counterpart, see decancer_find_multiple.
You are responsible in freeing the returned object later by calling decancer_matches_free.
Since
3.2.2

◆ decancer_find_utf16()

DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length )

Finds every similar-looking match of a UTF-16 encoded string in the cured string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
decancer_cured_raw_utf16_t raw_contents_handle;
size_t raw_contents_length;
uint16_t* raw_contents;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matcher = decancer_find_utf16(cured, funny, sizeof(funny) / sizeof(uint16_t));
decancer_assert(matcher != NULL, "decancer_find_utf16", CURED_END);
decancer_assert(decancer_matcher_utf16_next(matcher, &match), "first iteration of decancer_matcher_next", MATCHER_END);
decancer_assert(match.start == 5, "start of funny", RAW_CONTENTS_END);
decancer_assert(match.end == 10, "end of funny", RAW_CONTENTS_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, &match, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(funny) / sizeof(uint16_t)), "length of funny", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, funny, sizeof(funny)), "contents of funny", RAW_CONTENTS_END);
decancer_assert(!decancer_matcher_utf16_next(matcher, &match), "end of iteration", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
MATCHER_END:
CURED_END:
return ret;
}
DECANCER_EXPORT decancer_matcher_utf16_t decancer_find_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Finds every similar-looking match of a UTF-16 encoded string in the cured string.
DECANCER_EXPORT void decancer_matcher_utf16_free(decancer_matcher_utf16_t matcher)
Frees the UTF-16 matcher iterator object created by decancer_find_utf16.
DECANCER_EXPORT bool decancer_matcher_utf16_next(decancer_matcher_utf16_t matcher, decancer_match_t *match)
Iterates to the next element of a UTF-16 matcher iterator.
void * decancer_matcher_utf16_t
Represents a UTF-16 matcher iterator object returned from decancer_find_utf16.
Definition decancer.h:398
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_find
decancer_find_multiple
decancer_find_multiple_utf16
decancer_matcher_utf16_consume
decancer_matcher_utf16_next
decancer_matcher_utf16_free
Returns
decancer_matcher_utf16_t A UTF-16 matcher iterator object or NULL if the other string is not properly UTF-8 encoded.
Note
For its UTF-8 counterpart, see decancer_find.
You are responsible in freeing the returned object later by calling decancer_matcher_utf16_free.
Since
3.2.2

◆ decancer_matcher_consume()

DECANCER_EXPORT decancer_matches_t decancer_matcher_consume ( decancer_matcher_t matcher)

Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
const decancer_match_t* raw_matches;
size_t raw_matches_length;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matcher = decancer_find(cured, "funny", 5);
decancer_assert(matcher != NULL, "decancer_find", CURED_END);
matches = decancer_matcher_consume(matcher);
raw_matches = decancer_matches_raw(matches, &raw_matches_length);
decancer_assert(raw_matches_length == 1, "raw_matches_length", MATCHES_END);
decancer_assert(raw_matches[0].start == 5, "start of funny", MATCHES_END);
decancer_assert(raw_matches[0].end == 10, "end of funny", MATCHES_END);
MATCHES_END:
CURED_END:
return ret;
}
DECANCER_EXPORT decancer_matches_t decancer_matcher_consume(decancer_matcher_t matcher)
Consumes the UTF-8 matcher iterator object created by decancer_find and returns a matches object.
Parameters
matcherThe UTF-8 matcher iterator object to consume.
Returns
decancer_matches_t A new matches object created from the UTF-8 matcher iterator object.
See also
decancer_find
decancer_matcher_next
decancer_matcher_free
decancer_matcher_utf16_consume
Note
For its UTF-16 counterpart, see decancer_matcher_utf16_consume.
You don't need to pass the UTF-8 matcher iterator object to decancer_matcher_free after calling this.
You are responsible in freeing the returned object later by calling decancer_matches_free.
Since
3.2.2

◆ decancer_matcher_free()

DECANCER_EXPORT void decancer_matcher_free ( decancer_matcher_t matcher)

Frees the UTF-8 matcher iterator object created by decancer_find.

Parameters
matcherThe UTF-8 matcher iterator object to free.
See also
decancer_find
decancer_matcher_consume
decancer_matcher_next
decancer_matcher_utf16_free
Since
3.1.0

◆ decancer_matcher_next()

DECANCER_EXPORT bool decancer_matcher_next ( decancer_matcher_t matcher,
decancer_match_t * match )

Iterates to the next element of a UTF-8 matcher iterator.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
size_t raw_contents_size;
const uint8_t* raw_contents;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matcher = decancer_find(cured, "funny", 5);
decancer_assert(matcher != NULL, "decancer_find", CURED_END);
decancer_assert(decancer_matcher_next(matcher, &match), "first iteration of decancer_matcher_next", MATCHER_END);
decancer_assert(match.start == 5, "start of funny", MATCHER_END);
decancer_assert(match.end == 10, "end of funny", MATCHER_END);
raw_contents = decancer_cured_raw(cured, &match, &raw_contents_size);
decancer_assert(raw_contents_size == 5, "size of funny", MATCHER_END);
decancer_assert(!memcmp(raw_contents, "funny", raw_contents_size), "contents of funny", MATCHER_END);
decancer_assert(!decancer_matcher_next(matcher, &match), "no more matches", MATCHER_END);
MATCHER_END:
CURED_END:
return ret;
}
Parameters
matcherThe UTF-8 matcher iterator object.
matchA pointer to a decancer_match_t struct.
See also
decancer_find
decancer_matcher_utf16_next
decancer_matcher_free
Returns
bool true if a new value is present, or false if the iteration is complete.
Note
For its UTF-16 counterpart, see decancer_matcher_utf16_next.
Since
3.1.0

◆ decancer_matcher_utf16_consume()

DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume ( decancer_matcher_utf16_t matcher)

Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches object.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
const decancer_match_t* raw_matches;
size_t raw_matches_length;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matcher = decancer_find_utf16(cured, funny, sizeof(funny) / sizeof(uint16_t));
decancer_assert(matcher != NULL, "decancer_find_utf16", CURED_END);
matches = decancer_matcher_utf16_consume(matcher);
raw_matches = decancer_matches_raw(matches, &raw_matches_length);
decancer_assert(raw_matches_length == 1, "raw_matches_length", MATCHES_END);
decancer_assert(raw_matches[0].start == 5, "start of funny", MATCHES_END);
decancer_assert(raw_matches[0].end == 10, "end of funny", MATCHES_END);
MATCHES_END:
CURED_END:
return ret;
}
DECANCER_EXPORT decancer_matches_t decancer_matcher_utf16_consume(decancer_matcher_utf16_t matcher)
Consumes the UTF-16 matcher iterator object created by decancer_find_utf16 and returns a matches obje...
Parameters
matcherThe UTF-16 matcher iterator object to consume.
Returns
decancer_matches_t A new matches object created from the UTF-16 matcher iterator object.
See also
decancer_find_utf16
decancer_matcher_consume
decancer_matcher_utf16_next
decancer_matcher_utf16_free
Note
For its UTF-8 counterpart, see decancer_matcher_consume.
You don't need to pass the UTF-16 matcher iterator object to decancer_matcher_utf16_free after calling this.
You are responsible in freeing the returned object later by calling decancer_matches_free.
Since
3.2.2

◆ decancer_matcher_utf16_free()

DECANCER_EXPORT void decancer_matcher_utf16_free ( decancer_matcher_utf16_t matcher)

Frees the UTF-16 matcher iterator object created by decancer_find_utf16.

Parameters
matcherThe UTF-16 matcher iterator object to free.
See also
decancer_find_utf16
decancer_matcher_free
decancer_matcher_utf16_next
Since
3.2.2

◆ decancer_matcher_utf16_next()

DECANCER_EXPORT bool decancer_matcher_utf16_next ( decancer_matcher_utf16_t matcher,
decancer_match_t * match )

Iterates to the next element of a UTF-16 matcher iterator.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
decancer_cured_raw_utf16_t raw_contents_handle;
size_t raw_contents_length;
uint16_t* raw_contents;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matcher = decancer_find_utf16(cured, funny, sizeof(funny) / sizeof(uint16_t));
decancer_assert(matcher != NULL, "decancer_find_utf16", CURED_END);
decancer_assert(decancer_matcher_utf16_next(matcher, &match), "first iteration of decancer_matcher_next", MATCHER_END);
decancer_assert(match.start == 5, "start of funny", RAW_CONTENTS_END);
decancer_assert(match.end == 10, "end of funny", RAW_CONTENTS_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, &match, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(funny) / sizeof(uint16_t)), "length of funny", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, funny, sizeof(funny)), "contents of funny", RAW_CONTENTS_END);
decancer_assert(!decancer_matcher_utf16_next(matcher, &match), "end of iteration", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
MATCHER_END:
CURED_END:
return ret;
}
Parameters
matcherThe UTF-16 matcher iterator object.
matchA pointer to a decancer_match_t struct.
Returns
bool true if a new value is present, or false if the iteration is complete.
See also
decancer_find_utf16
decancer_matcher_next
decancer_matcher_utf16_free
Note
For its UTF-8 counterpart, see decancer_matcher_next.
Since
3.2.2

◆ decancer_matches_clone()

DECANCER_EXPORT decancer_matches_t decancer_matches_clone ( decancer_matches_t matches)

Clones the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.

Parameters
matchesThe matches object to clone.
Returns
decancer_matches_t The newly cloned matches object.
See also
decancer_find_multiple
decancer_find_multiple_utf16
decancer_matcher_consume
decancer_matcher_utf16_consume
decancer_matches_raw
decancer_matches_free
Note
You are responsible in freeing the returned object later by calling decancer_matches_free.
Since
3.2.2

◆ decancer_matches_free()

DECANCER_EXPORT void decancer_matches_free ( decancer_matches_t matches)

Frees the matches object created by decancer_find_multiple and decancer_find_multiple_utf16.

Parameters
matchesThe matches object to free.
See also
decancer_find_multiple
decancer_find_multiple_utf16
decancer_matcher_consume
decancer_matcher_utf16_consume
decancer_matches_clone
decancer_matches_raw
Since
3.1.1

◆ decancer_matches_raw()

DECANCER_EXPORT const decancer_match_t * decancer_matches_raw ( decancer_matches_t matches,
size_t * output_size )

Returns the raw list of every similar-looking match from a decancer_matches_t object.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_keyword_t keywords[] = {
{"very", 4},
{"funny", 5}
};
size_t raw_matches_length;
const decancer_match_t* raw_matches;
const uint8_t* raw_contents;
size_t raw_contents_size;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
matches = decancer_find_multiple(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_t));
decancer_assert(matches != NULL, "decancer_find_multiple", CURED_END);
raw_matches = decancer_matches_raw(matches, &raw_matches_length);
decancer_assert(raw_matches_length == 2, "raw_matches_length", MATCHES_END);
decancer_assert(raw_matches[0].start == 0, "start of very", MATCHES_END);
decancer_assert(raw_matches[0].end == 4, "end of very", MATCHES_END);
raw_contents = decancer_cured_raw(cured, &raw_matches[0], &raw_contents_size);
decancer_assert(raw_contents_size == 4, "size of very", MATCHES_END);
decancer_assert(!memcmp(raw_contents, "very", raw_contents_size), "contents of very", MATCHES_END);
decancer_assert(raw_matches[1].start == 5, "start of funny", MATCHES_END);
decancer_assert(raw_matches[1].end == 10, "end of funny", MATCHES_END);
raw_contents = decancer_cured_raw(cured, &raw_matches[1], &raw_contents_size);
decancer_assert(raw_contents_size == 5, "size of funny", MATCHES_END);
decancer_assert(!memcmp(raw_contents, "funny", raw_contents_size), "contents of funny", MATCHES_END);
MATCHES_END:
CURED_END:
return ret;
}
Parameters
matchesThe matches object.
output_sizeA pointer to the output's array size.
Returns
const decancer_match_t* The raw pointer containing every similar-looking match.
Note
The returned pointer remains valid until the matches object gets passed onto decancer_matches_free.
Since
3.1.1

◆ decancer_replace()

DECANCER_EXPORT bool decancer_replace ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size,
const uint8_t * replacement_str,
const size_t replacement_size )

Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
size_t raw_contents_size;
const uint8_t* raw_contents;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_replace(cured, "very", 4, "not", 3), "decancer_replace");
raw_contents = decancer_cured_raw(cured, NULL, &raw_contents_size);
decancer_assert(!memcmp(raw_contents, "not funny text", raw_contents_size), "replace result");
END:
return ret;
}
DECANCER_EXPORT bool decancer_replace(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified UTF-8 encoded string with another UTF-8 encoded...
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
replacement_strThe UTF-8 encoded string to replace with.
replacement_sizeUTF-8 size of the replacement string, in bytes.
See also
decancer_replace_utf16
decancer_replace_multiple
decancer_replace_multiple_utf16
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-16 counterpart, see decancer_replace_utf16.
Since
3.1.1

◆ decancer_replace_multiple()

DECANCER_EXPORT bool decancer_replace_multiple ( decancer_cured_t cured,
const decancer_keyword_t * other,
const size_t other_length,
const uint8_t * replacement_str,
const size_t replacement_size )

Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encoded string. Unlike decancer_replace, this function also takes note of overlapping matches.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_keyword_t keywords[] = {
{"very", 4},
{"funny", 5}
};
size_t raw_contents_size;
const uint8_t* raw_contents;
cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_replace_multiple(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_t), "sussy", 5), "decancer_replace_multiple");
raw_contents = decancer_cured_raw(cured, NULL, &raw_contents_size);
decancer_assert(!memcmp(raw_contents, "sussy sussy text", raw_contents_size), "replace multiple result");
END:
return ret;
}
DECANCER_EXPORT bool decancer_replace_multiple(decancer_cured_t cured, const decancer_keyword_t *other, const size_t other_length, const uint8_t *replacement_str, const size_t replacement_size)
Replaces every similar-looking match of the specified list of UTF-8 keywords with another UTF-8 encod...
Parameters
curedThe cured string object.
otherA list of UTF-8 keywords to match with.
other_lengthLength of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t).
replacement_strThe UTF-8 encoded string to replace with.
replacement_sizeUTF-8 size of the replacement string, in bytes.
See also
decancer_replace
decancer_replace_utf16
decancer_replace_multiple_utf16
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-16 counterpart, see decancer_replace_multiple_utf16.
Since
3.1.1

◆ decancer_replace_multiple_utf16()

DECANCER_EXPORT bool decancer_replace_multiple_utf16 ( decancer_cured_t cured,
const decancer_keyword_utf16_t * other,
const size_t other_length,
const uint16_t * replacement_str,
const size_t replacement_length )

Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 encoded string. Unlike decancer_replace_utf16, this function also takes note of overlapping matches.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "very"
uint16_t very[] = { 0x76, 0x65, 0x72, 0x79 };
// UTF-16 bytes for "funny"
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
// UTF-16 bytes for "sussy"
uint16_t sussy[] = { 0x73, 0x75, 0x73, 0x73, 0x79 };
// UTF-16 bytes for "sussy sussy text"
uint16_t expected_contents[] = { 0x73, 0x75, 0x73, 0x73, 0x79, 0x20, 0x73, 0x75, 0x73, 0x73, 0x79, 0x20, 0x74, 0x65, 0x78, 0x74 };
decancer_keyword_utf16_t keywords[] = {
{very, sizeof(very) / sizeof(uint16_t)},
{funny, sizeof(funny) / sizeof(uint16_t)}
};
size_t raw_contents_length;
uint16_t* raw_contents;
decancer_cured_raw_utf16_t raw_contents_handle;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_replace_multiple_utf16(cured, keywords, sizeof(keywords) / sizeof(decancer_keyword_utf16_t), sussy, sizeof(sussy) / sizeof(uint16_t)), "decancer_replace_multiple_utf16", CURED_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, NULL, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(expected_contents) / sizeof(uint16_t)), "length of replace multiple result", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, expected_contents, sizeof(expected_contents)), "replace multiple result", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
CURED_END:
return ret;
}
DECANCER_EXPORT bool decancer_replace_multiple_utf16(decancer_cured_t cured, const decancer_keyword_utf16_t *other, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified list of UTF-16 keywords with another UTF-16 enc...
Parameters
curedThe cured string object.
otherA list of UTF-16 keywords to match with.
other_lengthLength of the keywords array in units of decancer_keyword_t – or sizeof(array) / sizeof(decancer_keyword_t).
replacement_strThe UTF-16 encoded string to replace with.
replacement_lengthLength of the replacement string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_replace
decancer_replace_utf16
decancer_replace_multiple
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-8 counterpart, see decancer_replace_multiple.
Since
3.2.2

◆ decancer_replace_utf16()

DECANCER_EXPORT bool decancer_replace_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length,
const uint16_t * replacement_str,
const size_t replacement_length )

Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes, label) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto label; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "not"
uint16_t not[] = { 0x6e, 0x6f, 0x74 };
// UTF-16 bytes for "very"
uint16_t very[] = { 0x76, 0x65, 0x72, 0x79 };
// UTF-16 bytes for "not funny text"
uint16_t expected_contents[] = { 0x6e, 0x6f, 0x74, 0x20, 0x66, 0x75, 0x6e, 0x6e, 0x79, 0x20, 0x74, 0x65, 0x78, 0x74 };
size_t raw_contents_length;
uint16_t* raw_contents;
decancer_cured_raw_utf16_t raw_contents_handle;
cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_replace_utf16(cured, very, sizeof(very) / sizeof(uint16_t), not, sizeof(not) / sizeof(uint16_t)), "decancer_replace_utf16", CURED_END);
raw_contents_handle = decancer_cured_raw_utf16(cured, NULL, &raw_contents, &raw_contents_length);
decancer_assert(raw_contents_length == (sizeof(expected_contents) / sizeof(uint16_t)), "length of replace result", RAW_CONTENTS_END);
decancer_assert(!memcmp(raw_contents, expected_contents, sizeof(expected_contents)), "replace result", RAW_CONTENTS_END);
RAW_CONTENTS_END:
decancer_cured_raw_utf16_free(raw_contents_handle);
CURED_END:
return ret;
}
DECANCER_EXPORT bool decancer_replace_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length, const uint16_t *replacement_str, const size_t replacement_length)
Replaces every similar-looking match of the specified UTF-16 encoded string with another UTF-16 encod...
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the search string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
replacement_strThe UTF-16 encoded string to replace with.
replacement_lengthLength of the replacement string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_replace
decancer_replace_multiple
decancer_replace_multiple_utf16
Returns
bool true on success, or false on failure due to invalid encoding.
Note
For its UTF-8 counterpart, see decancer_replace.
Since
3.2.2

◆ decancer_starts_with()

DECANCER_EXPORT bool decancer_starts_with ( decancer_cured_t cured,
const uint8_t * other_str,
const size_t other_size )

Checks if the cured string similarly starts with the specified UTF-8 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-8 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_starts_with(cured, "very", 4), "decancer_starts_with");
END:
return ret;
}
DECANCER_EXPORT bool decancer_starts_with(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly starts with the specified UTF-8 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-8 encoded string to match with.
other_sizeUTF-8 size of the other string, in bytes.
See also
decancer_starts_with_utf16
Returns
bool true if the cured string similarly starts with the specified string, false otherwise.
Note
For its UTF-16 counterpart, see decancer_starts_with_utf16.
Since
1.5.3

◆ decancer_starts_with_utf16()

DECANCER_EXPORT bool decancer_starts_with_utf16 ( decancer_cured_t cured,
const uint16_t * other_str,
const size_t other_length )

Checks if the cured string similarly starts with the specified UTF-16 encoded string.

Example:

#include <decancer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
// UTF-16 bytes for "vοΌ₯ⓑ𝔂 π”½π•ŒΕ‡β„•ο½™ ţ乇𝕏𝓣"
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
// UTF-16 bytes for "very"
uint16_t very[] = { 0x76, 0x65, 0x72, 0x79 };
decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error);
if (cured == NULL) {
fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message);
return 1;
}
decancer_assert(decancer_starts_with_utf16(cured, very, sizeof(very) / sizeof(uint16_t)), "decancer_starts_with_utf16");
END:
return ret;
}
DECANCER_EXPORT bool decancer_starts_with_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly starts with the specified UTF-16 encoded string.
Parameters
curedThe cured string object.
other_strThe UTF-16 encoded string to match with.
other_lengthLength of the UTF-16 encoded string in units of uint16_t – or sizeof(string) / sizeof(uint16_t).
See also
decancer_starts_with
Returns
bool true if the cured string similarly starts with the specified string, false otherwise.
Note
For its UTF-8 counterpart, see decancer_starts_with.
Since
3.2.2

◆ decancer_translation_clone()

DECANCER_EXPORT void decancer_translation_clone ( const decancer_translation_t * translation_in,
decancer_translation_t * translation_out )

Clones the translation struct used in decancer_cure_char.

Parameters
translation_inA pointer to a translation struct to clone.
translation_outA pointer to the newly cloned translation struct. This struct doesn't have to be initiated with decancer_translation_init first.
See also
decancer_cure_char
decancer_translation_init
decancer_translation_free
Note
You are responsible in freeing the translation struct later by passing it as a pointer to decancer_translation_free.
Since
3.2.2

◆ decancer_translation_free()

DECANCER_EXPORT void decancer_translation_free ( decancer_translation_t * translation)

Frees the translation struct used in decancer_cure_char.

Parameters
translationA pointer to a translation struct.
See also
decancer_cure_char
decancer_translation_init
decancer_translation_clone
Since
3.1.0

◆ decancer_translation_init()

DECANCER_EXPORT void decancer_translation_init ( decancer_translation_t * translation)

Initiates a newly created translation struct for use.

Parameters
translationA pointer to a translation struct bound for decancer_cure_char.
See also
decancer_cure_char
decancer_translation_clone
decancer_translation_free
Note
This function MUST be called before any calls to decancer_cure_char.
Since
3.2.2