[PATCH] Add unicode consortium names and codes for categories.
David Bremner
david at tethera.net
Sat Apr 27 21:28:13 BST 2019
These codes are documented in Chapter 4, Version 11 of the Unicode standard.
---
xapian-core/include/xapian/unicode.h | 66 +++++++++++++++-------------
1 file changed, 35 insertions(+), 31 deletions(-)
diff --git a/xapian-core/include/xapian/unicode.h b/xapian-core/include/xapian/unicode.h
index 8e4bb90ddbec..53783da1b3c1 100644
--- a/xapian-core/include/xapian/unicode.h
+++ b/xapian-core/include/xapian/unicode.h
@@ -210,38 +210,42 @@ class XAPIAN_VISIBILITY_DEFAULT Utf8Iterator {
/// Functions associated with handling Unicode characters.
namespace Unicode {
-/** Each Unicode character is in exactly one of these categories. */
+/** Each Unicode character is in exactly one of these categories.
+ *
+ * The Unicode standard calls this the "General Category", and uses a
+ * "Major, minor" convention to derive a two letter code.
+ */
typedef enum {
- UNASSIGNED,
- UPPERCASE_LETTER,
- LOWERCASE_LETTER,
- TITLECASE_LETTER,
- MODIFIER_LETTER,
- OTHER_LETTER,
- NON_SPACING_MARK,
- ENCLOSING_MARK,
- COMBINING_SPACING_MARK,
- DECIMAL_DIGIT_NUMBER,
- LETTER_NUMBER,
- OTHER_NUMBER,
- SPACE_SEPARATOR,
- LINE_SEPARATOR,
- PARAGRAPH_SEPARATOR,
- CONTROL,
- FORMAT,
- PRIVATE_USE,
- SURROGATE,
- CONNECTOR_PUNCTUATION,
- DASH_PUNCTUATION,
- OPEN_PUNCTUATION,
- CLOSE_PUNCTUATION,
- INITIAL_QUOTE_PUNCTUATION,
- FINAL_QUOTE_PUNCTUATION,
- OTHER_PUNCTUATION,
- MATH_SYMBOL,
- CURRENCY_SYMBOL,
- MODIFIER_SYMBOL,
- OTHER_SYMBOL
+ UNASSIGNED, /**< Other, not assigned (Cn) */
+ UPPERCASE_LETTER, /**< Letter, uppercase (Lu) */
+ LOWERCASE_LETTER, /**< Letter, lowercase (Ll) */
+ TITLECASE_LETTER, /**< Letter, titlecase (Lt) */
+ MODIFIER_LETTER, /**< Letter, modifier (Lm) */
+ OTHER_LETTER, /**< Letter, other (Lo) */
+ NON_SPACING_MARK, /**< Mark, nonspacing (Mn) */
+ ENCLOSING_MARK, /**< Mark, enclosing (Me) */
+ COMBINING_SPACING_MARK, /**< Mark, spacing combining (Mc) */
+ DECIMAL_DIGIT_NUMBER, /**< Number, decimal digit (Nd) */
+ LETTER_NUMBER, /**< Number, letter (Nl) */
+ OTHER_NUMBER, /**< Number, other (No) */
+ SPACE_SEPARATOR, /**< Separator, space (Zs) */
+ LINE_SEPARATOR, /**< Separator, line (Zl) */
+ PARAGRAPH_SEPARATOR, /**< Separator, paragraph (Zp) */
+ CONTROL, /**< Other, control (Cc) */
+ FORMAT, /**< Other, format (Cf) */
+ PRIVATE_USE, /**< Other, private use (Co) */
+ SURROGATE, /**< Other, surrogate (Cs) */
+ CONNECTOR_PUNCTUATION, /**< Punctuation, connector (Pc) */
+ DASH_PUNCTUATION, /**< Punctuation, dash (Pd) */
+ OPEN_PUNCTUATION, /**< Punctuation, open (Ps) */
+ CLOSE_PUNCTUATION, /**< Punctuation, close (Pe) */
+ INITIAL_QUOTE_PUNCTUATION, /**< Punctuation, initial quote (Pi) */
+ FINAL_QUOTE_PUNCTUATION, /**< Punctuation, final quote (Pf) */
+ OTHER_PUNCTUATION, /**< Punctuation, other (Po) */
+ MATH_SYMBOL, /**< Symbol, math (Sm) */
+ CURRENCY_SYMBOL, /**< Symbol, currency (Sc) */
+ MODIFIER_SYMBOL, /**< Symbol, modified (Sk) */
+ OTHER_SYMBOL /**< Symbol, other (So) */
} category;
namespace Internal {
--
2.20.1
More information about the Xapian-devel
mailing list