A library that removes common unicode confusables/homoglyphs from strings.
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d,
0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99,
0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3};
if (cured == NULL) {
return 1;
}
END:
return ret;
}
A library that removes common unicode confusables/homoglyphs from strings.
DECANCER_EXPORT void decancer_cured_free(decancer_cured_t cured)
Frees the cured string object created by decancer_cure and decancer_cure_utf16.
#define DECANCER_OPTION_DEFAULT
Uses decancer's default options β AKA to be AS AGGRESSIVE AS POSSIBLE.
Definition decancer.h:90
void * decancer_cured_t
Represents a cured string returned from decancer_cure and decancer_cure_utf16.
Definition decancer.h:498
DECANCER_EXPORT bool decancer_contains(decancer_cured_t cured, const uint8_t *other_str, const size_t other_size)
Checks if the cured string similarly contains the specified UTF-8 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cure(const uint8_t *input_str, const size_t input_size, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-8 encoded string.
Represents an error caused by decancer not being able to cure a string.
Definition decancer.h:289
uint8_t message_length
The length of the error message.
Definition decancer.h:300
const char * message
Null-terminated ASCII encoded error message.
Definition decancer.h:294
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define decancer_assert(expr, notes) \
if (!(expr)) { \
fprintf(stderr, "assertion failure at " notes "\n"); \
ret = 1; \
goto END; \
}
int main(void) {
int ret = 0;
uint16_t input[] = {
0x0076, 0xff25, 0x24e1,
0xd835, 0xdd02, 0x0020,
0xd835, 0xdd3d, 0xd835,
0xdd4c, 0x0147, 0x2115,
0xff59, 0x0020, 0x0163,
0x4e47, 0xd835, 0xdd4f,
0xd835, 0xdce3
};
uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 };
if (cured == NULL) {
return 1;
}
decancer_assert(
decancer_contains_utf16(cured, funny,
sizeof(funny) /
sizeof(uint16_t)),
"decancer_contains_utf16");
END:
return ret;
}
DECANCER_EXPORT bool decancer_contains_utf16(decancer_cured_t cured, const uint16_t *other_str, const size_t other_length)
Checks if the cured string similarly contains the specified UTF-16 encoded string.
DECANCER_EXPORT decancer_cured_t decancer_cure_utf16(const uint16_t *input_str, const size_t input_length, const decancer_options_t options, decancer_error_t *error)
Cures a UTF-16 encoded string.
If you want to support my eyes for manually looking at thousands of unicode characters, consider donating! β€