"Fossies" - the Fresh Open Source Software Archive

Member "icu/source/test/intltest/transtst.cpp" (22 Apr 2020, 178800 Bytes) of package /linux/misc/icu4c-67_1-src.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes reports for "transtst.cpp": 67rc_vs_67_1 or 66_1_vs_67_1.

    1 // © 2016 and later: Unicode, Inc. and others.
    2 // License & terms of use: http://www.unicode.org/copyright.html
    3 /*
    4 **********************************************************************
    5 *   Copyright (C) 1999-2016, International Business Machines
    6 *   Corporation and others.  All Rights Reserved.
    7 **********************************************************************
    8 *   Date        Name        Description
    9 *   11/10/99    aliu        Creation.
   10 **********************************************************************
   11 */
   12 
   13 #include "unicode/utypes.h"
   14 
   15 #if !UCONFIG_NO_TRANSLITERATION
   16 
   17 #include "transtst.h"
   18 #include "unicode/locid.h"
   19 #include "unicode/dtfmtsym.h"
   20 #include "unicode/normlzr.h"
   21 #include "unicode/translit.h"
   22 #include "unicode/uchar.h"
   23 #include "unicode/unifilt.h"
   24 #include "unicode/uniset.h"
   25 #include "unicode/ustring.h"
   26 #include "unicode/usetiter.h"
   27 #include "unicode/uscript.h"
   28 #include "unicode/utf16.h"
   29 #include "cpdtrans.h"
   30 #include "nultrans.h"
   31 #include "rbt.h"
   32 #include "rbt_pars.h"
   33 #include "anytrans.h"
   34 #include "esctrn.h"
   35 #include "name2uni.h"
   36 #include "nortrans.h"
   37 #include "remtrans.h"
   38 #include "titletrn.h"
   39 #include "tolowtrn.h"
   40 #include "toupptrn.h"
   41 #include "unesctrn.h"
   42 #include "uni2name.h"
   43 #include "cstring.h"
   44 #include "cmemory.h"
   45 #include <stdio.h>
   46 
   47 /***********************************************************************
   48 
   49                      HOW TO USE THIS TEST FILE
   50                                -or-
   51                   How I developed on two platforms
   52                 without losing (too much of) my mind
   53 
   54 
   55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
   56    any public void method named Test...() taking no parameters becomes
   57    a test.  On C++, you need to modify the header and add a line to
   58    the runIndexedTest() dispatch method.
   59 
   60 2. Make liberal use of the expect() method; it is your friend.
   61 
   62 3. The tests in this file exactly match those in a sister file on the
   63    other side.  The two files are:
   64 
   65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
   66    icu4c:  source/test/intltest/transtst.cpp
   67 
   68                   ==> THIS IS THE IMPORTANT PART <==
   69 
   70    When you add a test in this file, add it in TransliteratorTest.java
   71    too.  Give it the same name and put it in the same relative place.
   72    This makes maintenance a lot simpler for any poor soul who ends up
   73    trying to synchronize the tests between icu4j and icu4c.
   74 
   75 4. If you MUST enter a test that is NOT paralleled in the sister file,
   76    then add it in the special non-mirrored section.  These are
   77    labeled
   78 
   79      "icu4j ONLY"
   80 
   81    or
   82 
   83      "icu4c ONLY"
   84 
   85    Make sure you document the reason the test is here and not there.
   86 
   87 
   88 Thank you.
   89 The Management
   90 ***********************************************************************/
   91 
   92 // Define character constants thusly to be EBCDIC-friendly
   93 enum {
   94     LEFT_BRACE=((UChar)0x007B), /*{*/
   95     PIPE      =((UChar)0x007C), /*|*/
   96     ZERO      =((UChar)0x0030), /*0*/
   97     UPPER_A   =((UChar)0x0041)  /*A*/
   98 };
   99 
  100 TransliteratorTest::TransliteratorTest()
  101 :   DESERET_DEE((UChar32)0x10414),
  102     DESERET_dee((UChar32)0x1043C)
  103 {
  104 }
  105 
  106 TransliteratorTest::~TransliteratorTest() {}
  107 
  108 void
  109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
  110                                    const char* &name, char* /*par*/) {
  111     switch (index) {
  112         TESTCASE(0,TestInstantiation);
  113         TESTCASE(1,TestSimpleRules);
  114         TESTCASE(2,TestRuleBasedInverse);
  115         TESTCASE(3,TestKeyboard);
  116         TESTCASE(4,TestKeyboard2);
  117         TESTCASE(5,TestKeyboard3);
  118         TESTCASE(6,TestArabic);
  119         TESTCASE(7,TestCompoundKana);
  120         TESTCASE(8,TestCompoundHex);
  121         TESTCASE(9,TestFiltering);
  122         TESTCASE(10,TestInlineSet);
  123         TESTCASE(11,TestPatternQuoting);
  124         TESTCASE(12,TestJ277);
  125         TESTCASE(13,TestJ243);
  126         TESTCASE(14,TestJ329);
  127         TESTCASE(15,TestSegments);
  128         TESTCASE(16,TestCursorOffset);
  129         TESTCASE(17,TestArbitraryVariableValues);
  130         TESTCASE(18,TestPositionHandling);
  131         TESTCASE(19,TestHiraganaKatakana);
  132         TESTCASE(20,TestCopyJ476);
  133         TESTCASE(21,TestAnchors);
  134         TESTCASE(22,TestInterIndic);
  135         TESTCASE(23,TestFilterIDs);
  136         TESTCASE(24,TestCaseMap);
  137         TESTCASE(25,TestNameMap);
  138         TESTCASE(26,TestLiberalizedID);
  139         TESTCASE(27,TestCreateInstance);
  140         TESTCASE(28,TestNormalizationTransliterator);
  141         TESTCASE(29,TestCompoundRBT);
  142         TESTCASE(30,TestCompoundFilter);
  143         TESTCASE(31,TestRemove);
  144         TESTCASE(32,TestToRules);
  145         TESTCASE(33,TestContext);
  146         TESTCASE(34,TestSupplemental);
  147         TESTCASE(35,TestQuantifier);
  148         TESTCASE(36,TestSTV);
  149         TESTCASE(37,TestCompoundInverse);
  150         TESTCASE(38,TestNFDChainRBT);
  151         TESTCASE(39,TestNullInverse);
  152         TESTCASE(40,TestAliasInverseID);
  153         TESTCASE(41,TestCompoundInverseID);
  154         TESTCASE(42,TestUndefinedVariable);
  155         TESTCASE(43,TestEmptyContext);
  156         TESTCASE(44,TestCompoundFilterID);
  157         TESTCASE(45,TestPropertySet);
  158         TESTCASE(46,TestNewEngine);
  159         TESTCASE(47,TestQuantifiedSegment);
  160         TESTCASE(48,TestDevanagariLatinRT);
  161         TESTCASE(49,TestTeluguLatinRT);
  162         TESTCASE(50,TestCompoundLatinRT);
  163         TESTCASE(51,TestSanskritLatinRT);
  164         TESTCASE(52,TestLocaleInstantiation);
  165         TESTCASE(53,TestTitleAccents);
  166         TESTCASE(54,TestLocaleResource);
  167         TESTCASE(55,TestParseError);
  168         TESTCASE(56,TestOutputSet);
  169         TESTCASE(57,TestVariableRange);
  170         TESTCASE(58,TestInvalidPostContext);
  171         TESTCASE(59,TestIDForms);
  172         TESTCASE(60,TestToRulesMark);
  173         TESTCASE(61,TestEscape);
  174         TESTCASE(62,TestAnchorMasking);
  175         TESTCASE(63,TestDisplayName);
  176         TESTCASE(64,TestSpecialCases);
  177 #if !UCONFIG_NO_FILE_IO
  178         TESTCASE(65,TestIncrementalProgress);
  179 #endif
  180         TESTCASE(66,TestSurrogateCasing);
  181         TESTCASE(67,TestFunction);
  182         TESTCASE(68,TestInvalidBackRef);
  183         TESTCASE(69,TestMulticharStringSet);
  184         TESTCASE(70,TestUserFunction);
  185         TESTCASE(71,TestAnyX);
  186         TESTCASE(72,TestSourceTargetSet);
  187         TESTCASE(73,TestGurmukhiDevanagari);
  188         TESTCASE(74,TestPatternWhiteSpace);
  189         TESTCASE(75,TestAllCodepoints);
  190         TESTCASE(76,TestBoilerplate);
  191         TESTCASE(77,TestAlternateSyntax);
  192         TESTCASE(78,TestBeginEnd);
  193         TESTCASE(79,TestBeginEndToRules);
  194         TESTCASE(80,TestRegisterAlias);
  195         TESTCASE(81,TestRuleStripping);
  196         TESTCASE(82,TestHalfwidthFullwidth);
  197         TESTCASE(83,TestThai);
  198         TESTCASE(84,TestAny);
  199         TESTCASE(85,TestBasicTransliteratorEvenWithoutData);
  200         default: name = ""; break;
  201     }
  202 }
  203 
  204 /**
  205  * Make sure every system transliterator can be instantiated.
  206  * 
  207  * ALSO test that the result of toRules() for each rule is a valid
  208  * rule.  Do this here so we don't have to have another test that
  209  * instantiates everything as well.
  210  */
  211 void TransliteratorTest::TestInstantiation() {
  212     UErrorCode ec = U_ZERO_ERROR;
  213     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
  214     assertSuccess("getAvailableIDs()", ec);
  215     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
  216     int32_t n = Transliterator::countAvailableIDs();
  217     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
  218                avail->count(ec) == n);
  219     assertSuccess("count()", ec);
  220     UnicodeString name;
  221     for (int32_t i=0; i<n; ++i) {
  222         const UnicodeString& id = *avail->snext(ec);
  223         if (!assertSuccess("snext()", ec) ||
  224             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
  225             break;
  226         }
  227         UnicodeString id2 = Transliterator::getAvailableID(i);
  228         if (id.length() < 1) {
  229             errln(UnicodeString("FAIL: getAvailableID(") +
  230                   i + ") returned empty string");
  231             continue;
  232         }
  233         if (id != id2) {
  234             errln(UnicodeString("FAIL: getAvailableID(") +
  235                   i + ") != getAvailableIDs().snext()");
  236             continue;
  237         }
  238         UParseError parseError;
  239         UErrorCode status = U_ZERO_ERROR;
  240         Transliterator* t = Transliterator::createInstance(id,
  241                               UTRANS_FORWARD, parseError,status);
  242         name.truncate(0);
  243         Transliterator::getDisplayName(id, name);
  244         if (t == 0) {
  245 #if UCONFIG_NO_BREAK_ITERATION
  246             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
  247             if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
  248                 id.compare((UnicodeString)"Thai-Latin") != 0)
  249 #endif
  250                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
  251                       /*", parse error " + parseError.code +*/
  252                       ", line " + parseError.line +
  253                       ", offset " + parseError.offset +
  254                       ", pre-context " + prettify(parseError.preContext, TRUE) +
  255                       ", post-context " +prettify(parseError.postContext,TRUE) +
  256                       ", Error: " + u_errorName(status));
  257                 // When createInstance fails, it deletes the failing
  258                 // entry from the available ID list.  We detect this
  259                 // here by looking for a change in countAvailableIDs.
  260             int32_t nn = Transliterator::countAvailableIDs();
  261             if (nn == (n - 1)) {
  262                 n = nn;
  263                 --i; // Compensate for deleted entry
  264             }
  265         } else {
  266             logln(UnicodeString("OK: ") + name + " (" + id + ")");
  267 
  268             // Now test toRules
  269             UnicodeString rules;
  270             t->toRules(rules, TRUE);
  271             Transliterator *u = Transliterator::createFromRules("x",
  272                                     rules, UTRANS_FORWARD, parseError,status);
  273             if (u == 0) {
  274                 errln(UnicodeString("FAIL: ") + id +
  275                       ".createFromRules() => bad rules" +
  276                       /*", parse error " + parseError.code +*/
  277                       ", line " + parseError.line +
  278                       ", offset " + parseError.offset +
  279                       ", context " + prettify(parseError.preContext, TRUE) +
  280                       ", rules: " + prettify(rules, TRUE));
  281             } else {
  282                 delete u;
  283             }
  284             delete t;
  285         }
  286     }
  287     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
  288     assertSuccess("snext()", ec);
  289     delete avail;
  290 
  291     // Now test the failure path
  292     UParseError parseError;
  293     UErrorCode status = U_ZERO_ERROR;
  294     UnicodeString id("<Not a valid Transliterator ID>");
  295     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
  296     if (t != 0) {
  297         errln("FAIL: " + id + " returned a transliterator");
  298         delete t;
  299     } else {
  300         logln("OK: Bogus ID handled properly");
  301     }
  302 }
  303 
  304 void TransliteratorTest::TestSimpleRules(void) {
  305     /* Example: rules 1. ab>x|y
  306      *                2. yc>z
  307      *
  308      * []|eabcd  start - no match, copy e to tranlated buffer
  309      * [e]|abcd  match rule 1 - copy output & adjust cursor
  310      * [ex|y]cd  match rule 2 - copy output & adjust cursor
  311      * [exz]|d   no match, copy d to transliterated buffer
  312      * [exzd]|   done
  313      */
  314     expect(UnicodeString("ab>x|y;", "") +
  315            "yc>z",
  316            "eabcd", "exzd");
  317 
  318     /* Another set of rules:
  319      *    1. ab>x|yzacw
  320      *    2. za>q
  321      *    3. qc>r
  322      *    4. cw>n
  323      *
  324      * []|ab       Rule 1
  325      * [x|yzacw]   No match
  326      * [xy|zacw]   Rule 2
  327      * [xyq|cw]    Rule 4
  328      * [xyqn]|     Done
  329      */
  330     expect(UnicodeString("ab>x|yzacw;") +
  331            "za>q;" +
  332            "qc>r;" +
  333            "cw>n",
  334            "ab", "xyqn");
  335 
  336     /* Test categories
  337      */
  338     UErrorCode status = U_ZERO_ERROR;
  339     UParseError parseError;
  340     Transliterator *t = Transliterator::createFromRules(
  341         "<ID>",
  342         UnicodeString("$dummy=").append((UChar)0xE100) +
  343         UnicodeString(";"
  344                       "$vowel=[aeiouAEIOU];"
  345                       "$lu=[:Lu:];"
  346                       "$vowel } $lu > '!';"
  347                       "$vowel > '&';"
  348                       "'!' { $lu > '^';"
  349                       "$lu > '*';"
  350                       "a > ERROR", ""),
  351         UTRANS_FORWARD, parseError,
  352         status);
  353     if (U_FAILURE(status)) {
  354         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
  355         return;
  356     }
  357     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
  358     delete t;
  359 }
  360 
  361 /**
  362  * Test inline set syntax and set variable syntax.
  363  */
  364 void TransliteratorTest::TestInlineSet(void) {
  365     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
  366     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
  367     
  368     expect(UnicodeString(
  369            "$digit = [0-9];"
  370            "$alpha = [a-zA-Z];"
  371            "$alphanumeric = [$digit $alpha];" // ***
  372            "$special = [^$alphanumeric];"     // ***
  373            "$alphanumeric > '-';"
  374            "$special > '*';", ""),
  375            
  376            "thx-1138", "---*----");
  377 }
  378 
  379 /**
  380  * Create some inverses and confirm that they work.  We have to be
  381  * careful how we do this, since the inverses will not be true
  382  * inverses -- we can't throw any random string at the composition
  383  * of the transliterators and expect the identity function.  F x
  384  * F' != I.  However, if we are careful about the input, we will
  385  * get the expected results.
  386  */
  387 void TransliteratorTest::TestRuleBasedInverse(void) {
  388     UnicodeString RULES =
  389         UnicodeString("abc>zyx;") +
  390         "ab>yz;" +
  391         "bc>zx;" +
  392         "ca>xy;" +
  393         "a>x;" +
  394         "b>y;" +
  395         "c>z;" +
  396 
  397         "abc<zyx;" +
  398         "ab<yz;" +
  399         "bc<zx;" +
  400         "ca<xy;" +
  401         "a<x;" +
  402         "b<y;" +
  403         "c<z;" +
  404 
  405         "";
  406 
  407     const char* DATA[] = {
  408         // Careful here -- random strings will not work.  If we keep
  409         // the left side to the domain and the right side to the range
  410         // we will be okay though (left, abc; right xyz).
  411         "a", "x",
  412         "abcacab", "zyxxxyy",
  413         "caccb", "xyzzy",
  414     };
  415 
  416     int32_t DATA_length = UPRV_LENGTHOF(DATA);
  417 
  418     UErrorCode status = U_ZERO_ERROR;
  419     UParseError parseError;
  420     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
  421                                 UTRANS_FORWARD, parseError, status);
  422     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
  423                                 UTRANS_REVERSE, parseError, status);
  424     if (U_FAILURE(status)) {
  425         errln("FAIL: RBT constructor failed");
  426         return;
  427     }
  428     for (int32_t i=0; i<DATA_length; i+=2) {
  429         expect(*fwd, DATA[i], DATA[i+1]);
  430         expect(*rev, DATA[i+1], DATA[i]);
  431     }
  432     delete fwd;
  433     delete rev;
  434 }
  435 
  436 /**
  437  * Basic test of keyboard.
  438  */
  439 void TransliteratorTest::TestKeyboard(void) {
  440     UParseError parseError;
  441     UErrorCode status = U_ZERO_ERROR;
  442     Transliterator *t = Transliterator::createFromRules("<ID>",
  443                               UnicodeString("psch>Y;")
  444                               +"ps>y;"
  445                               +"ch>x;"
  446                               +"a>A;",
  447                               UTRANS_FORWARD, parseError,
  448                               status);
  449     if (U_FAILURE(status)) {
  450         errln("FAIL: RBT constructor failed");
  451         return;
  452     }
  453     const char* DATA[] = {
  454         // insertion, buffer
  455         "a", "A",
  456         "p", "Ap",
  457         "s", "Aps",
  458         "c", "Apsc",
  459         "a", "AycA",
  460         "psch", "AycAY",
  461         0, "AycAY", // null means finishKeyboardTransliteration
  462     };
  463 
  464     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
  465     delete t;
  466 }
  467 
  468 /**
  469  * Basic test of keyboard with cursor.
  470  */
  471 void TransliteratorTest::TestKeyboard2(void) {
  472     UParseError parseError;
  473     UErrorCode status = U_ZERO_ERROR;
  474     Transliterator *t = Transliterator::createFromRules("<ID>",
  475                               UnicodeString("ych>Y;")
  476                               +"ps>|y;"
  477                               +"ch>x;"
  478                               +"a>A;",
  479                               UTRANS_FORWARD, parseError,
  480                               status);
  481     if (U_FAILURE(status)) {
  482         errln("FAIL: RBT constructor failed");
  483         return;
  484     }
  485     const char* DATA[] = {
  486         // insertion, buffer
  487         "a", "A",
  488         "p", "Ap",
  489         "s", "Aps", // modified for rollback - "Ay",
  490         "c", "Apsc", // modified for rollback - "Ayc",
  491         "a", "AycA",
  492         "p", "AycAp",
  493         "s", "AycAps", // modified for rollback - "AycAy",
  494         "c", "AycApsc", // modified for rollback - "AycAyc",
  495         "h", "AycAY",
  496         0, "AycAY", // null means finishKeyboardTransliteration
  497     };
  498 
  499     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
  500     delete t;
  501 }
  502 
  503 /**
  504  * Test keyboard transliteration with back-replacement.
  505  */
  506 void TransliteratorTest::TestKeyboard3(void) {
  507     // We want th>z but t>y.  Furthermore, during keyboard
  508     // transliteration we want t>y then yh>z if t, then h are
  509     // typed.
  510     UnicodeString RULES("t>|y;"
  511                         "yh>z;");
  512 
  513     const char* DATA[] = {
  514         // Column 1: characters to add to buffer (as if typed)
  515         // Column 2: expected appearance of buffer after
  516         //           keyboard xliteration.
  517         "a", "a",
  518         "b", "ab",
  519         "t", "abt", // modified for rollback - "aby",
  520         "c", "abyc",
  521         "t", "abyct", // modified for rollback - "abycy",
  522         "h", "abycz",
  523         0, "abycz", // null means finishKeyboardTransliteration
  524     };
  525 
  526     UParseError parseError;
  527     UErrorCode status = U_ZERO_ERROR;
  528     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
  529     if (U_FAILURE(status)) {
  530         errln("FAIL: RBT constructor failed");
  531         return;
  532     }
  533     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
  534     delete t;
  535 }
  536 
  537 void TransliteratorTest::keyboardAux(const Transliterator& t,
  538                                      const char* DATA[], int32_t DATA_length) {
  539     UErrorCode status = U_ZERO_ERROR;
  540     UTransPosition index={0, 0, 0, 0};
  541     UnicodeString s;
  542     for (int32_t i=0; i<DATA_length; i+=2) {
  543         UnicodeString log;
  544         if (DATA[i] != 0) {
  545             log = s + " + "
  546                 + DATA[i]
  547                 + " -> ";
  548             t.transliterate(s, index, DATA[i], status);
  549         } else {
  550             log = s + " => ";
  551             t.finishTransliteration(s, index);
  552         }
  553         // Show the start index '{' and the cursor '|'
  554         UnicodeString a, b, c;
  555         s.extractBetween(0, index.contextStart, a);
  556         s.extractBetween(index.contextStart, index.start, b);
  557         s.extractBetween(index.start, s.length(), c);
  558         log.append(a).
  559             append((UChar)LEFT_BRACE).
  560             append(b).
  561             append((UChar)PIPE).
  562             append(c);
  563         if (s == DATA[i+1] && U_SUCCESS(status)) {
  564             logln(log);
  565         } else {
  566             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
  567         }
  568     }
  569 }
  570 
  571 void TransliteratorTest::TestArabic(void) {
  572 // Test disabled for 2.0 until new Arabic transliterator can be written.
  573 //    /*
  574 //    const char* DATA[] = {
  575 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
  576 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
  577 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
  578 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
  579 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
  580 //                  "\u062c\u0645\u064a\u0644\u0629",
  581 //    };
  582 //    */
  583 //
  584 //    UChar ar_raw[] = {
  585 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
  586 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
  587 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
  588 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
  589 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
  590 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
  591 //    };
  592 //    UnicodeString ar(ar_raw);
  593 //    UErrorCode status=U_ZERO_ERROR;
  594 //    UParseError parseError;
  595 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
  596 //    if (t == 0) {
  597 //        errln("FAIL: createInstance failed");
  598 //        return;
  599 //    }
  600 //    expect(*t, "Arabic", ar);
  601 //    delete t;
  602 }
  603 
  604 /**
  605  * Compose the Kana transliterator forward and reverse and try
  606  * some strings that should come out unchanged.
  607  */
  608 void TransliteratorTest::TestCompoundKana(void) {
  609     UParseError parseError;
  610     UErrorCode status = U_ZERO_ERROR;
  611     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
  612     if (t == 0) {
  613         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
  614     } else {
  615         expect(*t, "aaaaa", "aaaaa");
  616         delete t;
  617     }
  618 }
  619 
  620 /**
  621  * Compose the hex transliterators forward and reverse.
  622  */
  623 void TransliteratorTest::TestCompoundHex(void) {
  624     UParseError parseError;
  625     UErrorCode status = U_ZERO_ERROR;
  626     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
  627     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
  628     Transliterator* transab[] = { a, b };
  629     Transliterator* transba[] = { b, a };
  630     if (a == 0 || b == 0) {
  631         errln("FAIL: construction failed");
  632         delete a;
  633         delete b;
  634         return;
  635     }
  636     // Do some basic tests of a
  637     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
  638     // Do some basic tests of b
  639     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
  640 
  641     Transliterator* ab = new CompoundTransliterator(transab, 2);
  642     UnicodeString s("abcde", "");
  643     expect(*ab, s, s);
  644 
  645     UnicodeString str(s);
  646     a->transliterate(str);
  647     Transliterator* ba = new CompoundTransliterator(transba, 2);
  648     expect(*ba, str, str);
  649 
  650     delete ab;
  651     delete ba;
  652     delete a;
  653     delete b;
  654 }
  655 
  656 int gTestFilterClassID = 0;
  657 /**
  658  * Used by TestFiltering().
  659  */
  660 class TestFilter : public UnicodeFilter {
  661     virtual TestFilter* clone() const {
  662         return new TestFilter(*this);
  663     }
  664     virtual UBool contains(UChar32 c) const {
  665         return c != (UChar)0x0063 /*c*/;
  666     }
  667     // Stubs
  668     virtual UnicodeString& toPattern(UnicodeString& result,
  669                                      UBool /*escapeUnprintable*/) const {
  670         return result;
  671     }
  672     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
  673         return FALSE;
  674     }
  675     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
  676 public:
  677     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
  678 };
  679 
  680 /**
  681  * Do some basic tests of filtering.
  682  */
  683 void TransliteratorTest::TestFiltering(void) {
  684     UParseError parseError;
  685     UErrorCode status = U_ZERO_ERROR;
  686     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
  687     if (hex == 0) {
  688         errln("FAIL: createInstance(Any-Hex) failed");
  689         return;
  690     }
  691     hex->adoptFilter(new TestFilter());
  692     UnicodeString s("abcde");
  693     hex->transliterate(s);
  694     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
  695     if (s == exp) {
  696         logln(UnicodeString("Ok:   \"") + exp + "\"");
  697     } else {
  698         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
  699     }
  700     
  701     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
  702     UnicodeFilter *f = hex->orphanFilter();
  703     if (f == NULL){
  704         errln("FAIL: orphanFilter() should get a UnicodeFilter");
  705     } else {
  706         delete f;
  707     }
  708     delete hex;
  709 }
  710 
  711 /**
  712  * Test anchors
  713  */
  714 void TransliteratorTest::TestAnchors(void) {
  715     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
  716            "aaa",
  717            "012");
  718     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
  719            "aaa",
  720            "012");
  721     expect(UnicodeString("^ab  > 01 ;"
  722            " ab  > |8 ;"
  723            "  b  > k ;"
  724            " 8x$ > 45 ;"
  725            " 8x  > 77 ;", ""),
  726 
  727            "ababbabxabx",
  728            "018k7745");
  729     expect(UnicodeString("$s = [z$] ;"
  730            "$s{ab    > 01 ;"
  731            "   ab    > |8 ;"
  732            "    b    > k ;"
  733            "   8x}$s > 45 ;"
  734            "   8x    > 77 ;", ""),
  735 
  736            "abzababbabxzabxabx",
  737            "01z018k45z01x45");
  738 }
  739 
  740 /**
  741  * Test pattern quoting and escape mechanisms.
  742  */
  743 void TransliteratorTest::TestPatternQuoting(void) {
  744     // Array of 3n items
  745     // Each item is <rules>, <input>, <expected output>
  746     const UnicodeString DATA[] = {
  747         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
  748         UnicodeString(UChar(0x4E01)),
  749         "[male adult]"
  750     };
  751 
  752     for (int32_t i=0; i<3; i+=3) {
  753         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
  754         UParseError parseError;
  755         UErrorCode status = U_ZERO_ERROR;
  756         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
  757         if (U_FAILURE(status)) {
  758             errln("RBT constructor failed");
  759         } else {
  760             expect(*t, DATA[i+1], DATA[i+2]);
  761         }
  762         delete t;
  763     }
  764 }
  765 
  766 /**
  767  * Regression test for bugs found in Greek transliteration.
  768  */
  769 void TransliteratorTest::TestJ277(void) {
  770     UErrorCode status = U_ZERO_ERROR;
  771     UParseError parseError;
  772     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
  773     if (gl == NULL) {
  774         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
  775         return;
  776     }
  777 
  778     UChar sigma = 0x3C3;
  779     UChar upsilon = 0x3C5;
  780     UChar nu = 0x3BD;
  781 //    UChar PHI = 0x3A6;
  782     UChar alpha = 0x3B1;
  783 //    UChar omega = 0x3C9;
  784 //    UChar omicron = 0x3BF;
  785 //    UChar epsilon = 0x3B5;
  786 
  787     // sigma upsilon nu -> syn
  788     UnicodeString syn;
  789     syn.append(sigma).append(upsilon).append(nu);
  790     expect(*gl, syn, "syn");
  791 
  792     // sigma alpha upsilon nu -> saun
  793     UnicodeString sayn;
  794     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
  795     expect(*gl, sayn, "saun");
  796 
  797     // Again, using a smaller rule set
  798     UnicodeString rules(
  799                 "$alpha   = \\u03B1;"
  800                 "$nu      = \\u03BD;"
  801                 "$sigma   = \\u03C3;"
  802                 "$ypsilon = \\u03C5;"
  803                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
  804                 "s <>           $sigma;"
  805                 "a <>           $alpha;"
  806                 "u <>  $vowel { $ypsilon;"
  807                 "y <>           $ypsilon;"
  808                 "n <>           $nu;",
  809                 "");
  810     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
  811     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
  812     expect(*mini, syn, "syn");
  813     expect(*mini, sayn, "saun");
  814     delete mini;
  815     mini = NULL;
  816 
  817 #if !UCONFIG_NO_FORMATTING
  818     // Transliterate the Greek locale data
  819     Locale el("el");
  820     DateFormatSymbols syms(el, status);
  821     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
  822     int32_t i, count;
  823     const UnicodeString* data = syms.getMonths(count);
  824     for (i=0; i<count; ++i) {
  825         if (data[i].length() == 0) {
  826             continue;
  827         }
  828         UnicodeString out(data[i]);
  829         gl->transliterate(out);
  830         UBool ok = TRUE;
  831         if (data[i].length() >= 2 && out.length() >= 2 &&
  832             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
  833             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
  834                 ok = FALSE;
  835             }
  836         }
  837         if (ok) {
  838             logln(prettify(data[i] + " -> " + out));
  839         } else {
  840             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
  841         }
  842     }
  843 #endif
  844 
  845     delete gl;
  846 }
  847 
  848 /**
  849  * Prefix, suffix support in hex transliterators
  850  */
  851 void TransliteratorTest::TestJ243(void) {
  852     UErrorCode ec = U_ZERO_ERROR;
  853 
  854     // Test default Hex-Any, which should handle
  855     // \u, \U, u+, and U+
  856     Transliterator *hex =
  857         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
  858     if (assertSuccess("getInstance", ec)) {
  859         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
  860     }
  861     delete hex;
  862 
  863 //    // Try a custom Hex-Unicode
  864 //    // \uXXXX and &#xXXXX;
  865 //    ec = U_ZERO_ERROR;
  866 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
  867 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
  868 //           "abcd5fx012&#x00033;");
  869 //    // Try custom Any-Hex (default is tested elsewhere)
  870 //    ec = U_ZERO_ERROR;
  871 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
  872 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
  873 }
  874 
  875 /**
  876  * Parsers need better syntax error messages.
  877  */
  878 void TransliteratorTest::TestJ329(void) {
  879     
  880     struct { UBool containsErrors; const char* rule; } DATA[] = {
  881         { FALSE, "a > b; c > d" },
  882         { TRUE,  "a > b; no operator; c > d" },
  883     };
  884     int32_t DATA_length = UPRV_LENGTHOF(DATA);
  885 
  886     for (int32_t i=0; i<DATA_length; ++i) {
  887         UErrorCode status = U_ZERO_ERROR;
  888         UParseError parseError;
  889         Transliterator *rbt = Transliterator::createFromRules("<ID>",
  890                                     DATA[i].rule,
  891                                     UTRANS_FORWARD,
  892                                     parseError,
  893                                     status);
  894         UBool gotError = U_FAILURE(status);
  895         UnicodeString desc(DATA[i].rule);
  896         desc.append(gotError ? " -> error" : " -> no error");
  897         if (gotError) {
  898             desc = desc + ", ParseError code=" + u_errorName(status) +
  899                 " line=" + parseError.line +
  900                 " offset=" + parseError.offset +
  901                 " context=" + parseError.preContext;
  902         }
  903         if (gotError == DATA[i].containsErrors) {
  904             logln(UnicodeString("Ok:   ") + desc);
  905         } else {
  906             errln(UnicodeString("FAIL: ") + desc);
  907         }
  908         delete rbt;
  909     }
  910 }
  911 
  912 /**
  913  * Test segments and segment references.
  914  */
  915 void TransliteratorTest::TestSegments(void) {
  916     // Array of 3n items
  917     // Each item is <rules>, <input>, <expected output>
  918     UnicodeString DATA[] = {
  919         "([a-z]) '.' ([0-9]) > $2 '-' $1",
  920         "abc.123.xyz.456",
  921         "ab1-c23.xy4-z56",
  922 
  923         // nested
  924         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
  925         "a1 b2",
  926         "a1.a.1 b2.b.2",
  927     };
  928     int32_t DATA_length = UPRV_LENGTHOF(DATA);
  929 
  930     for (int32_t i=0; i<DATA_length; i+=3) {
  931         logln("Pattern: " + prettify(DATA[i]));
  932         UParseError parseError;
  933         UErrorCode status = U_ZERO_ERROR;
  934         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
  935         if (U_FAILURE(status)) {
  936             errln("FAIL: RBT constructor");
  937         } else {
  938             expect(*t, DATA[i+1], DATA[i+2]);
  939         }
  940         delete t;
  941     }
  942 }
  943 
  944 /**
  945  * Test cursor positioning outside of the key
  946  */
  947 void TransliteratorTest::TestCursorOffset(void) {
  948     // Array of 3n items
  949     // Each item is <rules>, <input>, <expected output>
  950     UnicodeString DATA[] = {
  951         "pre {alpha} post > | @ ALPHA ;"
  952         "eALPHA > beta ;"
  953         "pre {beta} post > BETA @@ | ;"
  954         "post > xyz",
  955 
  956         "prealphapost prebetapost",
  957 
  958         "prbetaxyz preBETApost",
  959     };
  960     int32_t DATA_length = UPRV_LENGTHOF(DATA);
  961 
  962     for (int32_t i=0; i<DATA_length; i+=3) {
  963         logln("Pattern: " + prettify(DATA[i]));
  964         UParseError parseError;
  965         UErrorCode status = U_ZERO_ERROR;
  966         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
  967         if (U_FAILURE(status)) {
  968             errln("FAIL: RBT constructor");
  969         } else {
  970             expect(*t, DATA[i+1], DATA[i+2]);
  971         }
  972         delete t;
  973     }
  974 }
  975 
  976 /**
  977  * Test zero length and > 1 char length variable values.  Test
  978  * use of variable refs in UnicodeSets.
  979  */
  980 void TransliteratorTest::TestArbitraryVariableValues(void) {
  981     // Array of 3n items
  982     // Each item is <rules>, <input>, <expected output>
  983     UnicodeString DATA[] = {
  984         "$abe = ab;"
  985         "$pat = x[yY]z;"
  986         "$ll  = 'a-z';"
  987         "$llZ = [$ll];"
  988         "$llY = [$ll$pat];"
  989         "$emp = ;"
  990 
  991         "$abe > ABE;"
  992         "$pat > END;"
  993         "$llZ > 1;"
  994         "$llY > 2;"
  995         "7$emp 8 > 9;"
  996         "",
  997 
  998         "ab xYzxyz stY78",
  999         "ABE ENDEND 1129",
 1000     };
 1001     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 1002 
 1003     for (int32_t i=0; i<DATA_length; i+=3) {
 1004         logln("Pattern: " + prettify(DATA[i]));
 1005         UParseError parseError;
 1006         UErrorCode status = U_ZERO_ERROR;
 1007         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 1008         if (U_FAILURE(status)) {
 1009             errln("FAIL: RBT constructor");
 1010         } else {
 1011             expect(*t, DATA[i+1], DATA[i+2]);
 1012         }
 1013         delete t;
 1014     }
 1015 }
 1016 
 1017 /**
 1018  * Confirm that the contextStart, contextLimit, start, and limit
 1019  * behave correctly. J474.
 1020  */
 1021 void TransliteratorTest::TestPositionHandling(void) {
 1022     // Array of 3n items
 1023     // Each item is <rules>, <input>, <expected output>
 1024     const char* DATA[] = {
 1025         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
 1026         "xtat txtb", // pos 0,9,0,9
 1027         "xTTaSS TTxUUb",
 1028 
 1029         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
 1030         "xtat txtb", // pos 2,9,3,8
 1031         "xtaSS TTxUUb",
 1032 
 1033         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
 1034         "xtat txtb", // pos 3,8,3,8
 1035         "xtaTT TTxTTb",
 1036     };
 1037 
 1038     // Array of 4n positions -- these go with the DATA array
 1039     // They are: contextStart, contextLimit, start, limit
 1040     int32_t POS[] = {
 1041         0, 9, 0, 9,
 1042         2, 9, 3, 8,
 1043         3, 8, 3, 8,
 1044     };
 1045 
 1046     int32_t n = UPRV_LENGTHOF(DATA) / 3;
 1047     for (int32_t i=0; i<n; i++) {
 1048         UErrorCode status = U_ZERO_ERROR;
 1049         UParseError parseError;
 1050         Transliterator *t = Transliterator::createFromRules("<ID>",
 1051                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
 1052         if (U_FAILURE(status)) {
 1053             delete t;
 1054             errln("FAIL: RBT constructor");
 1055             return;
 1056         }
 1057         UTransPosition pos;
 1058         pos.contextStart= POS[4*i];
 1059         pos.contextLimit = POS[4*i+1];
 1060         pos.start = POS[4*i+2];
 1061         pos.limit = POS[4*i+3];
 1062         UnicodeString rsource(DATA[3*i+1]);
 1063         t->transliterate(rsource, pos, status);
 1064         if (U_FAILURE(status)) {
 1065             delete t;
 1066             errln("FAIL: transliterate");
 1067             return;
 1068         }
 1069         t->finishTransliteration(rsource, pos);
 1070         expectAux(DATA[3*i],
 1071                   DATA[3*i+1],
 1072                   rsource,
 1073                   DATA[3*i+2]);
 1074         delete t;
 1075     }
 1076 }
 1077 
 1078 /**
 1079  * Test the Hiragana-Katakana transliterator.
 1080  */
 1081 void TransliteratorTest::TestHiraganaKatakana(void) {
 1082     UParseError parseError;
 1083     UErrorCode status = U_ZERO_ERROR;
 1084     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
 1085     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
 1086     if (hk == 0 || kh == 0) {
 1087         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
 1088         delete hk;
 1089         delete kh;
 1090         return;
 1091     }
 1092 
 1093     // Array of 3n items
 1094     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
 1095     const char* DATA[] = {
 1096         "both",
 1097         "\\u3042\\u3090\\u3099\\u3092\\u3050",
 1098         "\\u30A2\\u30F8\\u30F2\\u30B0",
 1099 
 1100         "kh",
 1101         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
 1102         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
 1103     };
 1104     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 1105 
 1106     for (int32_t i=0; i<DATA_length; i+=3) {
 1107         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
 1108         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
 1109         switch (*DATA[i]) {
 1110         case 0x68: //'h': // Hiragana-Katakana
 1111             expect(*hk, h, k);
 1112             break;
 1113         case 0x6B: //'k': // Katakana-Hiragana
 1114             expect(*kh, k, h);
 1115             break;
 1116         case 0x62: //'b': // both
 1117             expect(*hk, h, k);
 1118             expect(*kh, k, h);
 1119             break;
 1120         }
 1121     }
 1122     delete hk;
 1123     delete kh;
 1124 }
 1125 
 1126 /**
 1127  * Test cloning / copy constructor of RBT.
 1128  */
 1129 void TransliteratorTest::TestCopyJ476(void) {
 1130     // The real test here is what happens when the destructors are
 1131     // called.  So we let one object get destructed, and check to
 1132     // see that its copy still works.
 1133     Transliterator *t2 = 0;
 1134     {
 1135         UParseError parseError;
 1136         UErrorCode status = U_ZERO_ERROR;
 1137         Transliterator *t1 = Transliterator::createFromRules("t1",
 1138             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
 1139         if (U_FAILURE(status)) {
 1140             errln("FAIL: RBT constructor");
 1141             return;
 1142         }
 1143         t2 = t1->clone(); // Call copy constructor under the covers.
 1144         expect(*t1, "abcfoofoo", "ABcbar");
 1145         delete t1;
 1146     }
 1147     expect(*t2, "abcfoofoo", "ABcbar");
 1148     delete t2;
 1149 }
 1150 
 1151 /**
 1152  * Test inter-Indic transliterators.  These are composed.
 1153  * ICU4C Jitterbug 483.
 1154  */
 1155 void TransliteratorTest::TestInterIndic(void) {
 1156     UnicodeString ID("Devanagari-Gujarati", "");
 1157     UErrorCode status = U_ZERO_ERROR;
 1158     UParseError parseError;
 1159     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
 1160     if (dg == 0) {
 1161         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
 1162         return;
 1163     }
 1164     UnicodeString id = dg->getID();
 1165     if (id != ID) {
 1166         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
 1167     }
 1168     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
 1169     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
 1170     expect(*dg, dev, guj);
 1171     delete dg;
 1172 }
 1173 
 1174 /**
 1175  * Test filter syntax in IDs. (J918)
 1176  */
 1177 void TransliteratorTest::TestFilterIDs(void) {
 1178     // Array of 3n strings:
 1179     // <id>, <inverse id>, <input>, <expected output>
 1180     const char* DATA[] = {
 1181         "[aeiou]Any-Hex", // ID
 1182         "[aeiou]Hex-Any", // expected inverse ID
 1183         "quizzical",      // src
 1184         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
 1185         
 1186         "[aeiou]Any-Hex;[^5]Hex-Any",
 1187         "[^5]Any-Hex;[aeiou]Hex-Any",
 1188         "quizzical",
 1189         "q\\u0075izzical",
 1190         
 1191         "[abc]Null",
 1192         "[abc]Null",
 1193         "xyz",
 1194         "xyz",
 1195     };
 1196     enum { DATA_length = UPRV_LENGTHOF(DATA) };
 1197 
 1198     for (int i=0; i<DATA_length; i+=4) {
 1199         UnicodeString ID(DATA[i], "");
 1200         UnicodeString uID(DATA[i+1], "");
 1201         UnicodeString data2(DATA[i+2], "");
 1202         UnicodeString data3(DATA[i+3], "");
 1203         UParseError parseError;
 1204         UErrorCode status = U_ZERO_ERROR;
 1205         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
 1206         if (t == 0) {
 1207             errln("FAIL: createInstance(" + ID + ") returned NULL");
 1208             return;
 1209         }
 1210         expect(*t, data2, data3);
 1211 
 1212         // Check the ID
 1213         if (ID != t->getID()) {
 1214             errln("FAIL: createInstance(" + ID + ").getID() => " +
 1215                   t->getID());
 1216         }
 1217 
 1218         // Check the inverse
 1219         Transliterator *u = t->createInverse(status);
 1220         if (u == 0) {
 1221             errln("FAIL: " + ID + ".createInverse() returned NULL");
 1222         } else if (u->getID() != uID) {
 1223             errln("FAIL: " + ID + ".createInverse().getID() => " +
 1224                   u->getID() + ", expected " + uID);
 1225         }
 1226 
 1227         delete t;
 1228         delete u;
 1229     }
 1230 }
 1231 
 1232 /**
 1233  * Test the case mapping transliterators.
 1234  */
 1235 void TransliteratorTest::TestCaseMap(void) {
 1236     UParseError parseError;
 1237     UErrorCode status = U_ZERO_ERROR;
 1238     Transliterator* toUpper =
 1239         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
 1240     Transliterator* toLower =
 1241         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
 1242     Transliterator* toTitle =
 1243         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
 1244     if (toUpper==0 || toLower==0 || toTitle==0) {
 1245         errln("FAIL: createInstance returned NULL");
 1246         delete toUpper;
 1247         delete toLower;
 1248         delete toTitle;
 1249         return;
 1250     }
 1251 
 1252     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
 1253            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
 1254     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
 1255            "the quick brown foX jumped over the lazY dogs.");
 1256     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
 1257            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
 1258 
 1259     delete toUpper;
 1260     delete toLower;
 1261     delete toTitle;
 1262 }
 1263 
 1264 /**
 1265  * Test the name mapping transliterators.
 1266  */
 1267 void TransliteratorTest::TestNameMap(void) {
 1268     UParseError parseError;
 1269     UErrorCode status = U_ZERO_ERROR;
 1270     Transliterator* uni2name =
 1271         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
 1272     Transliterator* name2uni =
 1273         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
 1274     if (uni2name==0 || name2uni==0) {
 1275         errln("FAIL: createInstance returned NULL");
 1276         delete uni2name;
 1277         delete name2uni;
 1278         return;
 1279     }
 1280 
 1281     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
 1282     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
 1283            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
 1284     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
 1285            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
 1286 
 1287     delete uni2name;
 1288     delete name2uni;
 1289 
 1290     // round trip
 1291     Transliterator* t =
 1292         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
 1293     if (t==0) {
 1294         errln("FAIL: createInstance returned NULL");
 1295         delete t;
 1296         return;
 1297     }
 1298 
 1299     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
 1300     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
 1301     expect(*t, s, s);
 1302     delete t;
 1303 }
 1304 
 1305 /**
 1306  * Test liberalized ID syntax.  1006c
 1307  */
 1308 void TransliteratorTest::TestLiberalizedID(void) {
 1309     // Some test cases have an expected getID() value of NULL.  This
 1310     // means I have disabled the test case for now.  This stuff is
 1311     // still under development, and I haven't decided whether to make
 1312     // getID() return canonical case yet.  It will all get rewritten
 1313     // with the move to Source-Target/Variant IDs anyway. [aliu]
 1314     const char* DATA[] = {
 1315         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
 1316         "  Null  ", "Null", "whitespace",
 1317         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
 1318         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
 1319     };
 1320     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
 1321     UParseError parseError;
 1322     UErrorCode status= U_ZERO_ERROR;
 1323     for (int32_t i=0; i<DATA_length; i+=3) {
 1324         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
 1325         if (t == 0) {
 1326             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
 1327                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
 1328         } else {
 1329             UnicodeString exp;
 1330             if (DATA[i+1]) {
 1331                 exp = UnicodeString(DATA[i+1], "");
 1332             }
 1333             // Don't worry about getID() if the expected char*
 1334             // is NULL -- see above.
 1335             if (exp.length() == 0 || exp == t->getID()) {
 1336                 logln(UnicodeString("Ok: ") + DATA[i+2] +
 1337                       " create ID \"" + DATA[i] + "\" => \"" +
 1338                       exp + "\"");
 1339             } else {
 1340                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
 1341                       " create ID \"" + DATA[i] + "\" => \"" +
 1342                       t->getID() + "\", exp \"" + exp + "\"");
 1343             }
 1344             delete t;
 1345         }
 1346     }
 1347 }
 1348 
 1349 /* test for Jitterbug 912 */
 1350 void TransliteratorTest::TestCreateInstance(){
 1351     const char* FORWARD = "F";
 1352     const char* REVERSE = "R";
 1353     const char* DATA[] = {
 1354         // Column 1: id
 1355         // Column 2: direction
 1356         // Column 3: expected ID, or "" if expect failure
 1357         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
 1358 
 1359         // JB#2689: bad compound causes crash
 1360         "InvalidSource-InvalidTarget", FORWARD, "",
 1361         "InvalidSource-InvalidTarget", REVERSE, "",
 1362         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
 1363         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
 1364         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
 1365         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
 1366 
 1367         NULL
 1368     };
 1369 
 1370     for (int32_t i=0; DATA[i]; i+=3) {
 1371         UParseError err;
 1372         UErrorCode ec = U_ZERO_ERROR;
 1373         UnicodeString id(DATA[i]);
 1374         UTransDirection dir = (DATA[i+1]==FORWARD)?
 1375             UTRANS_FORWARD:UTRANS_REVERSE;
 1376         UnicodeString expID(DATA[i+2]);
 1377         Transliterator* t =
 1378             Transliterator::createInstance(id,dir,err,ec);
 1379         UnicodeString newID;
 1380         if (t) {
 1381             newID = t->getID();
 1382         }
 1383         UBool ok = (newID == expID);
 1384         if (!t) {
 1385             newID = u_errorName(ec);
 1386         }
 1387         if (ok) {
 1388             logln((UnicodeString)"Ok: createInstance(" +
 1389                   id + "," + DATA[i+1] + ") => " + newID);
 1390         } else {
 1391             dataerrln((UnicodeString)"FAIL: createInstance(" +
 1392                   id + "," + DATA[i+1] + ") => " + newID +
 1393                   ", expected " + expID);
 1394         }
 1395         delete t;
 1396     }
 1397 }
 1398 
 1399 /**
 1400  * Test the normalization transliterator.
 1401  */
 1402 void TransliteratorTest::TestNormalizationTransliterator() {
 1403     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
 1404     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
 1405     const char* CANON[] = {
 1406         // Input               Decomposed            Composed
 1407         "cat",                "cat",                "cat"               ,
 1408         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
 1409 
 1410         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
 1411         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
 1412 
 1413         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
 1414         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
 1415         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
 1416 
 1417         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
 1418         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
 1419 
 1420         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
 1421         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
 1422         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
 1423 
 1424         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
 1425         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
 1426 
 1427         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
 1428         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
 1429 
 1430         "Henry IV",           "Henry IV",           "Henry IV"          ,
 1431         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
 1432 
 1433         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
 1434         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
 1435         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
 1436         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
 1437         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
 1438 
 1439         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
 1440         0 // end
 1441     };
 1442 
 1443     const char* COMPAT[] = {
 1444         // Input               Decomposed            Composed
 1445         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
 1446 
 1447         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
 1448         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
 1449 
 1450         "Henry IV",           "Henry IV",           "Henry IV"          ,
 1451         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
 1452 
 1453         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
 1454         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
 1455 
 1456         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
 1457         0 // end
 1458     };
 1459 
 1460     int32_t i;
 1461     UParseError parseError;
 1462     UErrorCode status = U_ZERO_ERROR;
 1463     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
 1464     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
 1465     if (!NFD || !NFC) {
 1466         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
 1467         delete NFD;
 1468         delete NFC;
 1469         return;
 1470     }
 1471     for (i=0; CANON[i]; i+=3) {
 1472         UnicodeString in = CharsToUnicodeString(CANON[i]);
 1473         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
 1474         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
 1475         expect(*NFD, in, expd);
 1476         expect(*NFC, in, expc);
 1477     }
 1478     delete NFD;
 1479     delete NFC;
 1480 
 1481     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
 1482     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
 1483     if (!NFKD || !NFKC) {
 1484         dataerrln("FAIL: createInstance failed");
 1485         delete NFKD;
 1486         delete NFKC;
 1487         return;
 1488     }
 1489     for (i=0; COMPAT[i]; i+=3) {
 1490         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
 1491         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
 1492         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
 1493         expect(*NFKD, in, expkd);
 1494         expect(*NFKC, in, expkc);
 1495     }
 1496     delete NFKD;
 1497     delete NFKC;
 1498 
 1499     UParseError pe;
 1500     status = U_ZERO_ERROR;
 1501     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
 1502                                                        UTRANS_FORWARD,
 1503                                                        pe, status);
 1504     if (t == 0) {
 1505         errln("FAIL: createInstance failed");
 1506     }
 1507     expect(*t, CharsToUnicodeString("\\u010dx"),
 1508            CharsToUnicodeString("c\\u030C"));
 1509     delete t;
 1510 }
 1511 
 1512 /**
 1513  * Test we can create basic transliterator even without data.
 1514  */
 1515 void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() {
 1516     const char16_t* TEST_DATA = u"\u0124e\u0301 \uFB01nd x";
 1517     const char16_t* EXPECTED_RESULTS[] = {
 1518         u"H\u0302e\u0301 \uFB01nd x",  // NFD
 1519         u"\u0124\u00E9 \uFB01nd x",  // NFC
 1520         u"H\u0302e\u0301 find x",  // NFKD
 1521         u"\u0124\u00E9 find x",  // NFKC
 1522         u"\u0124e\u0301 \uFB01nd x",  // Hex-Any
 1523         u"\u0125e\u0301 \uFB01nd x",  // Lower
 1524         u"\u0124e\uFB01ndx",  // [:^L:]Remove
 1525         u"H\u0302e\u0301 \uFB01nd ",  // NFD; [x]Remove
 1526         u"h\u0302e\u0301 find x",  // Lower; NFKD;
 1527         u"hefindx",  // Lower; NFKD; [:^L:]Remove; NFC;
 1528         u"\u0124e \uFB01nd x",  // [:Nonspacing Mark:] Remove;
 1529         u"He \uFB01nd x",  // NFD; [:Nonspacing Mark:] Remove; NFC;
 1530         // end
 1531         0
 1532     };
 1533 
 1534     const char* BASIC_TRANSLITERATOR_ID[] = {
 1535         "NFD",
 1536         "NFC",
 1537         "NFKD",
 1538         "NFKC",
 1539         "Hex-Any",
 1540         "Lower",
 1541         "[:^L:]Remove",
 1542         "NFD; [x]Remove",
 1543         "Lower; NFKD;",
 1544         "Lower; NFKD; [:^L:]Remove; NFC;",
 1545         "[:Nonspacing Mark:] Remove;",
 1546         "NFD; [:Nonspacing Mark:] Remove; NFC;",
 1547         // end
 1548         0
 1549     };
 1550     const char* BASIC_TRANSLITERATOR_RULES[] = {
 1551         "::Lower; ::NFKD;",
 1552         "::Lower; ::NFKD; ::[:^L:]Remove; ::NFC;",
 1553         "::[:Nonspacing Mark:] Remove;",
 1554         "::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;",
 1555         // end
 1556         0
 1557     };
 1558     for (int32_t i=0; BASIC_TRANSLITERATOR_ID[i]; i++) {
 1559         UErrorCode status = U_ZERO_ERROR;
 1560         UParseError parseError;
 1561         std::unique_ptr<Transliterator> translit(Transliterator::createInstance(
 1562             BASIC_TRANSLITERATOR_ID[i], UTRANS_FORWARD, parseError, status));
 1563         if (translit.get() == nullptr || !U_SUCCESS(status)) {
 1564             dataerrln("FAIL: createInstance %s failed", BASIC_TRANSLITERATOR_ID[i]);
 1565             continue;
 1566         }
 1567         UnicodeString data(TEST_DATA);
 1568         UnicodeString expected(EXPECTED_RESULTS[i]);
 1569         translit->transliterate(data);
 1570         if (data != expected) {
 1571             dataerrln(UnicodeString("FAIL: expected translit(") +
 1572                       BASIC_TRANSLITERATOR_ID[i] + ") = '" +
 1573                       EXPECTED_RESULTS[i] + "' but got '" + data);
 1574             continue;
 1575         }
 1576     }
 1577     for (int32_t i=0; BASIC_TRANSLITERATOR_RULES[i]; i++) {
 1578         UErrorCode status = U_ZERO_ERROR;
 1579         UParseError parseError;
 1580         std::unique_ptr<Transliterator> translit(Transliterator::createFromRules(
 1581             "Test",
 1582             BASIC_TRANSLITERATOR_RULES[i], UTRANS_FORWARD, parseError, status));
 1583         if (translit.get() == nullptr || !U_SUCCESS(status)) {
 1584             dataerrln("FAIL: createFromRules %s failed", BASIC_TRANSLITERATOR_RULES[i]);
 1585             continue;
 1586         }
 1587     }
 1588 }
 1589 
 1590 /**
 1591  * Test compound RBT rules.
 1592  */
 1593 void TransliteratorTest::TestCompoundRBT(void) {
 1594     // Careful with spacing and ';' here:  Phrase this exactly
 1595     // as toRules() is going to return it.  If toRules() changes
 1596     // with regard to spacing or ';', then adjust this string.
 1597     UnicodeString rule("::Hex-Any;\n"
 1598                        "::Any-Lower;\n"
 1599                        "a > '.A.';\n"
 1600                        "b > '.B.';\n"
 1601                        "::[^t]Any-Upper;", "");
 1602     UParseError parseError;
 1603     UErrorCode status = U_ZERO_ERROR;
 1604     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
 1605     if (t == 0) {
 1606         errln("FAIL: createFromRules failed");
 1607         return;
 1608     }
 1609     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
 1610            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
 1611     UnicodeString r;
 1612     t->toRules(r, TRUE);
 1613     if (r == rule) {
 1614         logln((UnicodeString)"OK: toRules() => " + r);
 1615     } else {
 1616         errln((UnicodeString)"FAIL: toRules() => " + r +
 1617               ", expected " + rule);
 1618     }
 1619     delete t;
 1620 
 1621     // Now test toRules
 1622     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
 1623     if (t == 0) {
 1624         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
 1625         return;
 1626     }
 1627     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
 1628     t->toRules(r, TRUE);
 1629     if (r != exp) {
 1630         errln((UnicodeString)"FAIL: toRules() => " + r +
 1631               ", expected " + exp);
 1632     } else {
 1633         logln((UnicodeString)"OK: toRules() => " + r);
 1634     }
 1635     delete t;
 1636 
 1637     // Round trip the result of toRules
 1638     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
 1639     if (t == 0) {
 1640         errln("FAIL: createFromRules #2 failed");
 1641         return;
 1642     } else {
 1643         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
 1644     }
 1645 
 1646     // Test toRules again
 1647     t->toRules(r, TRUE);
 1648     if (r != exp) {
 1649         errln((UnicodeString)"FAIL: toRules() => " + r +
 1650               ", expected " + exp);
 1651     } else {
 1652         logln((UnicodeString)"OK: toRules() => " + r);
 1653     }
 1654 
 1655     delete t;
 1656 
 1657     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
 1658     // to what the regenerated ID will look like.
 1659     UnicodeString id("Upper(Lower);(NFKC)", "");
 1660     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 1661     if (t == 0) {
 1662         errln("FAIL: createInstance #2 failed");
 1663         return;
 1664     }
 1665     if (t->getID() == id) {
 1666         logln((UnicodeString)"OK: created " + id);
 1667     } else {
 1668         errln((UnicodeString)"FAIL: createInstance(" + id +
 1669               ").getID() => " + t->getID());
 1670     }
 1671 
 1672     Transliterator *u = t->createInverse(status);
 1673     if (u == 0) {
 1674         errln("FAIL: createInverse failed");
 1675         delete t;
 1676         return;
 1677     }
 1678     exp = "NFKC();Lower(Upper)";
 1679     if (u->getID() == exp) {
 1680         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
 1681               u->getID());
 1682     } else {
 1683         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
 1684               u->getID());
 1685     }
 1686     delete t;
 1687     delete u;
 1688 }
 1689 
 1690 /**
 1691  * Compound filter semantics were orginially not implemented
 1692  * correctly.  Originally, each component filter f(i) is replaced by
 1693  * f'(i) = f(i) && g, where g is the filter for the compound
 1694  * transliterator.
 1695  * 
 1696  * From Mark:
 1697  *
 1698  * Suppose and I have a transliterator X. Internally X is
 1699  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
 1700  * 
 1701  * The compound should convert all greek characters (through latin) to
 1702  * cyrillic, then lowercase the result. The filter should say "don't
 1703  * touch 'A' in the original". But because an intermediate result
 1704  * happens to go through "A", the Greek Alpha gets hung up.
 1705  */
 1706 void TransliteratorTest::TestCompoundFilter(void) {
 1707     UParseError parseError;
 1708     UErrorCode status = U_ZERO_ERROR;
 1709     Transliterator *t = Transliterator::createInstance
 1710         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
 1711     if (t == 0) {
 1712         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
 1713         return;
 1714     }
 1715     t->adoptFilter(new UnicodeSet("[^A]", status));
 1716     if (U_FAILURE(status)) {
 1717         errln("FAIL: UnicodeSet ct failed");
 1718         delete t;
 1719         return;
 1720     }
 1721     
 1722     // Only the 'A' at index 1 should remain unchanged
 1723     expect(*t,
 1724            CharsToUnicodeString("BA\\u039A\\u0391"),
 1725            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
 1726     delete t;
 1727 }
 1728 
 1729 void TransliteratorTest::TestRemove(void) {
 1730     UParseError parseError;
 1731     UErrorCode status = U_ZERO_ERROR;
 1732     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
 1733     if (t == 0) {
 1734         errln("FAIL: createInstance failed");
 1735         return;
 1736     }
 1737     
 1738     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
 1739     
 1740     // extra test for RemoveTransliterator::clone(), which at one point wasn't
 1741     // duplicating the filter
 1742     Transliterator* t2 = t->clone();
 1743     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
 1744     
 1745     delete t;
 1746     delete t2;
 1747 }
 1748 
 1749 void TransliteratorTest::TestToRules(void) {
 1750     const char* RBT = "rbt";
 1751     const char* SET = "set";
 1752     static const char* DATA[] = {
 1753         RBT,
 1754         "$a=\\u4E61; [$a] > A;",
 1755         "[\\u4E61] > A;",
 1756 
 1757         RBT,
 1758         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
 1759         "[[:Zs:][:Zl:]]{a} > A;",
 1760 
 1761         SET,
 1762         "[[:Zs:][:Zl:]]",
 1763         "[[:Zs:][:Zl:]]",
 1764 
 1765         SET,
 1766         "[:Ps:]",
 1767         "[:Ps:]",
 1768 
 1769         SET,
 1770         "[:L:]",
 1771         "[:L:]",
 1772 
 1773         SET,
 1774         "[[:L:]-[A]]",
 1775         "[[:L:]-[A]]",
 1776 
 1777         SET,
 1778         "[~[:Lu:][:Ll:]]",
 1779         "[~[:Lu:][:Ll:]]",
 1780 
 1781         SET,
 1782         "[~[a-z]]",
 1783         "[~[a-z]]",
 1784 
 1785         RBT,
 1786         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
 1787         "[^[:Zs:]]{a} > A;",
 1788 
 1789         RBT,
 1790         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
 1791         "[[a-z]-[:Zs:]]{a} > A;",
 1792 
 1793         RBT,
 1794         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
 1795         "[[:Zs:]&[a-z]]{a} > A;",
 1796 
 1797         RBT,
 1798         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
 1799         "[x[:Zs:]]{a} > A;",
 1800 
 1801         RBT,
 1802         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
 1803         "$macron = \\u0304 ;"
 1804         "$evowel = [aeiouyAEIOUY] ;"
 1805         "$iotasub = \\u0345 ;" 
 1806         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
 1807         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
 1808 
 1809         RBT,
 1810         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
 1811         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
 1812     };
 1813     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
 1814 
 1815     for (int32_t d=0; d < DATA_length; d+=3) {
 1816         if (DATA[d] == RBT) {
 1817             // Transliterator test
 1818             UParseError parseError;
 1819             UErrorCode status = U_ZERO_ERROR;
 1820             Transliterator *t = Transliterator::createFromRules("ID",
 1821                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
 1822             if (t == 0) {
 1823                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
 1824                 return;
 1825             }
 1826             UnicodeString rules, escapedRules;
 1827             t->toRules(rules, FALSE);
 1828             t->toRules(escapedRules, TRUE);
 1829             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
 1830             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
 1831             if (rules == expRules) {
 1832                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
 1833                       " => " + rules);
 1834             } else {
 1835                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
 1836                       " => " + rules + ", exp " + expRules);
 1837             }
 1838             if (escapedRules == expEscapedRules) {
 1839                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
 1840                       " => " + escapedRules);
 1841             } else {
 1842                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
 1843                       " => " + escapedRules + ", exp " + expEscapedRules);
 1844             }
 1845             delete t;
 1846             
 1847         } else {
 1848             // UnicodeSet test
 1849             UErrorCode status = U_ZERO_ERROR;
 1850             UnicodeString pat(DATA[d+1], -1, US_INV);
 1851             UnicodeString expToPat(DATA[d+2], -1, US_INV);
 1852             UnicodeSet set(pat, status);
 1853             if (U_FAILURE(status)) {
 1854                 errln("FAIL: UnicodeSet ct failed");
 1855                 return;
 1856             }
 1857             // Adjust spacing etc. as necessary.
 1858             UnicodeString toPat;
 1859             set.toPattern(toPat);
 1860             if (expToPat == toPat) {
 1861                 logln((UnicodeString)"Ok: " + pat +
 1862                       " => " + toPat);
 1863             } else {
 1864                 errln((UnicodeString)"FAIL: " + pat +
 1865                       " => " + prettify(toPat, TRUE) +
 1866                       ", exp " + prettify(pat, TRUE));
 1867             }
 1868         }
 1869     }
 1870 }
 1871 
 1872 void TransliteratorTest::TestContext() {
 1873     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
 1874     expect("de > x; {d}e > y;",
 1875            "de",
 1876            "ye",
 1877            &pos);
 1878 
 1879     expect("ab{c} > z;",
 1880            "xadabdabcy",
 1881            "xadabdabzy");
 1882 }
 1883 
 1884 void TransliteratorTest::TestSupplemental() { 
 1885 
 1886     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
 1887                                 "a > $a; $s > i;"),
 1888            CharsToUnicodeString("ab\\U0001030Fx"),
 1889            CharsToUnicodeString("\\U00010300bix"));
 1890 
 1891     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
 1892                                 "$b=[A-Z\\U00010400-\\U0001044D];"
 1893                                 "($a)($b) > $2 $1;"),
 1894            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
 1895            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
 1896 
 1897     // k|ax\\U00010300xm
 1898 
 1899     // k|a\\U00010400\\U00010300xm
 1900     // ky|\\U00010400\\U00010300xm
 1901     // ky\\U00010400|\\U00010300xm
 1902 
 1903     // ky\\U00010400|\\U00010300\\U00010400m
 1904     // ky\\U00010400y|\\U00010400m
 1905     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
 1906                                 "$a {x} > | @ \\U00010400;"
 1907                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
 1908            CharsToUnicodeString("kax\\U00010300xm"),
 1909            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
 1910 
 1911     expectT("Any-Name",
 1912            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
 1913            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
 1914 
 1915     expectT("Any-Hex/Unicode",
 1916            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1917            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
 1918 
 1919     expectT("Any-Hex/C",
 1920            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1921            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
 1922 
 1923     expectT("Any-Hex/Perl",
 1924            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1925            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
 1926 
 1927     expectT("Any-Hex/Java",
 1928            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1929            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
 1930 
 1931     expectT("Any-Hex/XML",
 1932            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1933            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
 1934 
 1935     expectT("Any-Hex/XML10",
 1936            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1937            "&#66352;&#1113856;&#917601;&#160;");
 1938 
 1939     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
 1940            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
 1941            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
 1942 }
 1943 
 1944 void TransliteratorTest::TestQuantifier() { 
 1945 
 1946     // Make sure @ in a quantified anteContext works
 1947     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
 1948            "AAAAAb",
 1949            "aaa(aac)");
 1950 
 1951     // Make sure @ in a quantified postContext works
 1952     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
 1953            "baaaaa",
 1954            "caa(aaa)");
 1955 
 1956     // Make sure @ in a quantified postContext with seg ref works
 1957     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
 1958            "baaaaa",
 1959            "baa(aaa)");
 1960 
 1961     // Make sure @ past ante context doesn't enter ante context
 1962     UTransPosition pos = {0, 5, 3, 5};
 1963     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
 1964            "xxxab",
 1965            "xxx(ac)",
 1966            &pos);
 1967 
 1968     // Make sure @ past post context doesn't pass limit
 1969     UTransPosition pos2 = {0, 4, 0, 2};
 1970     expect("{b} a+ > c @@ |; x > y; a > A;",
 1971            "baxx",
 1972            "caxx",
 1973            &pos2);
 1974 
 1975     // Make sure @ past post context doesn't enter post context
 1976     expect("{b} a+ > c @@ |; x > y; a > A;",
 1977            "baxx",
 1978            "cayy");
 1979 
 1980     expect("(ab)? c > d;",
 1981            "c abc ababc",
 1982            "d d abd");
 1983     
 1984     // NOTE: The (ab)+ when referenced just yields a single "ab",
 1985     // not the full sequence of them.  This accords with perl behavior.
 1986     expect("(ab)+ {x} > '(' $1 ')';",
 1987            "x abx ababxy",
 1988            "x ab(ab) abab(ab)y");
 1989 
 1990     expect("b+ > x;",
 1991            "ac abc abbc abbbc",
 1992            "ac axc axc axc");
 1993 
 1994     expect("[abc]+ > x;",
 1995            "qac abrc abbcs abtbbc",
 1996            "qx xrx xs xtx");
 1997 
 1998     expect("q{(ab)+} > x;",
 1999            "qa qab qaba qababc qaba",
 2000            "qa qx qxa qxc qxa");
 2001 
 2002     expect("q(ab)* > x;",
 2003            "qa qab qaba qababc",
 2004            "xa x xa xc");
 2005 
 2006     // NOTE: The (ab)+ when referenced just yields a single "ab",
 2007     // not the full sequence of them.  This accords with perl behavior.
 2008     expect("q(ab)* > '(' $1 ')';",
 2009            "qa qab qaba qababc",
 2010            "()a (ab) (ab)a (ab)c");
 2011 
 2012     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
 2013     // quoted string
 2014     expect("'ab'+ > x;",
 2015            "bb ab ababb",
 2016            "bb x xb");
 2017 
 2018     // $foo+ and $foo* -- the quantifier should apply to the entire
 2019     // variable reference
 2020     expect("$var = ab; $var+ > x;",
 2021            "bb ab ababb",
 2022            "bb x xb");
 2023 }
 2024 
 2025 class TestTrans : public Transliterator {
 2026 public:
 2027     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
 2028     }
 2029     virtual TestTrans* clone(void) const {
 2030         return new TestTrans(getID());
 2031     }
 2032     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
 2033         UBool /*isIncremental*/) const
 2034     {
 2035         offsets.start = offsets.limit;
 2036     }
 2037     virtual UClassID getDynamicClassID() const;
 2038     static UClassID U_EXPORT2 getStaticClassID();
 2039 };
 2040 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
 2041 
 2042 /**
 2043  * Test Source-Target/Variant.
 2044  */
 2045 void TransliteratorTest::TestSTV(void) {
 2046     int32_t ns = Transliterator::countAvailableSources();
 2047     if (ns < 0 || ns > 255) {
 2048         errln((UnicodeString)"FAIL: Bad source count: " + ns);
 2049         return;
 2050     }
 2051     int32_t i, j;
 2052     for (i=0; i<ns; ++i) {
 2053         UnicodeString source;
 2054         Transliterator::getAvailableSource(i, source);
 2055         logln((UnicodeString)"" + i + ": " + source);
 2056         if (source.length() == 0) {
 2057             errln("FAIL: empty source");
 2058             continue;
 2059         }
 2060         int32_t nt = Transliterator::countAvailableTargets(source);
 2061         if (nt < 0 || nt > 255) {
 2062             errln((UnicodeString)"FAIL: Bad target count: " + nt);
 2063             continue;
 2064         }
 2065         for (int32_t j=0; j<nt; ++j) {
 2066             UnicodeString target;
 2067             Transliterator::getAvailableTarget(j, source, target);
 2068             logln((UnicodeString)" " + j + ": " + target);
 2069             if (target.length() == 0) {
 2070                 errln("FAIL: empty target");
 2071                 continue;
 2072             }
 2073             int32_t nv = Transliterator::countAvailableVariants(source, target);
 2074             if (nv < 0 || nv > 255) {
 2075                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
 2076                 continue;
 2077             }
 2078             for (int32_t k=0; k<nv; ++k) {
 2079                 UnicodeString variant;
 2080                 Transliterator::getAvailableVariant(k, source, target, variant);
 2081                 if (variant.length() == 0) { 
 2082                     logln((UnicodeString)"  " + k + ": <empty>");
 2083                 } else {
 2084                     logln((UnicodeString)"  " + k + ": " + variant);
 2085                 }
 2086             }
 2087         }
 2088     }
 2089 
 2090     // Test registration
 2091     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
 2092     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
 2093     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
 2094     for (i=0; i<3; ++i) {
 2095         Transliterator *t = new TestTrans(IDS[i]);
 2096         if (t == 0) {
 2097             errln("FAIL: out of memory");
 2098             return;
 2099         }
 2100         if (t->getID() != IDS[i]) {
 2101             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
 2102             delete t;
 2103             return;
 2104         }
 2105         Transliterator::registerInstance(t);
 2106         UErrorCode status = U_ZERO_ERROR;
 2107         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
 2108         if (t == NULL) {
 2109             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
 2110                   IDS[i]);
 2111         } else {
 2112             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
 2113                   IDS[i]);
 2114             delete t;
 2115         }
 2116         Transliterator::unregister(IDS[i]);
 2117         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
 2118         if (t != NULL) {
 2119             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
 2120                   IDS[i]);
 2121             delete t;
 2122         }
 2123     }
 2124 
 2125     // Make sure getAvailable API reflects removal
 2126     int32_t n = Transliterator::countAvailableIDs();
 2127     for (i=0; i<n; ++i) {
 2128         UnicodeString id = Transliterator::getAvailableID(i);
 2129         for (j=0; j<3; ++j) {
 2130             if (id.caseCompare(FULL_IDS[j],0)==0) {
 2131                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
 2132             }
 2133         }
 2134     }
 2135     n = Transliterator::countAvailableTargets("Any");
 2136     for (i=0; i<n; ++i) {
 2137         UnicodeString t;
 2138         Transliterator::getAvailableTarget(i, "Any", t);
 2139         if (t.caseCompare(IDS[0],0)==0) {
 2140             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
 2141         }
 2142     }
 2143     n = Transliterator::countAvailableSources();
 2144     for (i=0; i<n; ++i) {
 2145         UnicodeString s;
 2146         Transliterator::getAvailableSource(i, s);
 2147         for (j=0; j<3; ++j) {
 2148             if (SOURCES[j] == NULL) continue;
 2149             if (s.caseCompare(SOURCES[j],0)==0) {
 2150                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
 2151             }
 2152         }
 2153     }
 2154 }
 2155 
 2156 /**
 2157  * Test inverse of Greek-Latin; Title()
 2158  */
 2159 void TransliteratorTest::TestCompoundInverse(void) {
 2160     UParseError parseError;
 2161     UErrorCode status = U_ZERO_ERROR;
 2162     Transliterator *t = Transliterator::createInstance
 2163         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
 2164     if (t == 0) {
 2165         dataerrln("FAIL: createInstance - %s", u_errorName(status));
 2166         return;
 2167     }
 2168     UnicodeString exp("(Title);Latin-Greek");
 2169     if (t->getID() == exp) {
 2170         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
 2171               t->getID());
 2172     } else {
 2173         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
 2174               t->getID() + "\", expected \"" + exp + "\"");
 2175     }
 2176     delete t;
 2177 }
 2178 
 2179 /**
 2180  * Test NFD chaining with RBT
 2181  */
 2182 void TransliteratorTest::TestNFDChainRBT() {
 2183     UParseError pe;
 2184     UErrorCode ec = U_ZERO_ERROR;
 2185     Transliterator* t = Transliterator::createFromRules(
 2186                                "TEST", "::NFD; aa > Q; a > q;",
 2187                                UTRANS_FORWARD, pe, ec);
 2188     if (t == NULL || U_FAILURE(ec)) {
 2189         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
 2190         return;
 2191     }
 2192     expect(*t, "aa", "Q");
 2193     delete t;
 2194 
 2195     // TEMPORARY TESTS -- BEING DEBUGGED
 2196 //=-    UnicodeString s, s2;
 2197 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
 2198 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
 2199 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
 2200 //=-    expect(*t, s, s2);
 2201 //=-    delete t;
 2202 //=-
 2203 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
 2204 //=-    expect(*t, s2, s);
 2205 //=-    delete t;
 2206 //=-
 2207 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
 2208 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
 2209 //=-    expect(*t, s, s);
 2210 //=-    delete t;
 2211 
 2212 //    const char* source[] = {
 2213 //        /*
 2214 //        "\\u015Br\\u012Bmad",
 2215 //        "bhagavadg\\u012Bt\\u0101",
 2216 //        "adhy\\u0101ya",
 2217 //        "arjuna",
 2218 //        "vi\\u1E63\\u0101da",
 2219 //        "y\\u014Dga",
 2220 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
 2221 //        "uv\\u0101cr\\u0325",
 2222 //        */
 2223 //        "rmk\\u1E63\\u0113t",
 2224 //      //"dharmak\\u1E63\\u0113tr\\u0113",
 2225 //        /*
 2226 //        "kuruk\\u1E63\\u0113tr\\u0113",
 2227 //        "samav\\u0113t\\u0101",
 2228 //        "yuyutsava-\\u1E25",
 2229 //        "m\\u0101mak\\u0101-\\u1E25",
 2230 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
 2231 //        "kimakurvata",
 2232 //        "san\\u0304java",
 2233 //        */
 2234 //
 2235 //        0
 2236 //    };
 2237 //    const char* expected[] = {
 2238 //        /*
 2239 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
 2240 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
 2241 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
 2242 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
 2243 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
 2244 //        "\\u092f\\u094b\\u0917",
 2245 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
 2246 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
 2247 //        */
 2248 //        "\\u0927",
 2249 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
 2250 //        /*
 2251 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
 2252 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
 2253 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
 2254 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
 2255 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
 2256 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
 2257 //        "\\u0938\\u0902\\u091c\\u0935",
 2258 //        */
 2259 //        0
 2260 //    };
 2261 //    UErrorCode status = U_ZERO_ERROR;
 2262 //    UParseError parseError;
 2263 //    UnicodeString message;
 2264 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
 2265 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
 2266 //    if(U_FAILURE(status)){
 2267 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
 2268 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
 2269 //        delete latinToDevToLatin;
 2270 //        delete devToLatinToDev;
 2271 //        return;
 2272 //    }
 2273 //    UnicodeString gotResult;
 2274 //    for(int i= 0; source[i] != 0; i++){
 2275 //        gotResult = source[i];
 2276 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
 2277 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
 2278 //    }
 2279 //    delete latinToDevToLatin;
 2280 //    delete devToLatinToDev;
 2281 }
 2282 
 2283 /**
 2284  * Inverse of "Null" should be "Null". (J21)
 2285  */
 2286 void TransliteratorTest::TestNullInverse() {
 2287     UParseError pe;
 2288     UErrorCode ec = U_ZERO_ERROR;
 2289     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
 2290     if (t == 0 || U_FAILURE(ec)) {
 2291         errln("FAIL: createInstance");
 2292         return;
 2293     }
 2294     Transliterator *u = t->createInverse(ec);
 2295     if (u == 0 || U_FAILURE(ec)) {
 2296         errln("FAIL: createInverse");
 2297         delete t;
 2298         return;
 2299     }
 2300     if (u->getID() != "Null") {
 2301         errln("FAIL: Inverse of Null should be Null");
 2302     }
 2303     delete t;
 2304     delete u;
 2305 }
 2306 
 2307 /**
 2308  * Check ID of inverse of alias. (J22)
 2309  */
 2310 void TransliteratorTest::TestAliasInverseID() {
 2311     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
 2312     UParseError pe;
 2313     UErrorCode ec = U_ZERO_ERROR;
 2314     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
 2315     if (t == 0 || U_FAILURE(ec)) {
 2316         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
 2317         return;
 2318     }
 2319     Transliterator *u = t->createInverse(ec);
 2320     if (u == 0 || U_FAILURE(ec)) {
 2321         errln("FAIL: createInverse");
 2322         delete t;
 2323         return;
 2324     }
 2325     UnicodeString exp = "Hangul-Latin";
 2326     UnicodeString got = u->getID();
 2327     if (got != exp) {
 2328         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
 2329               ", expected " + exp);
 2330     }
 2331     delete t;
 2332     delete u;
 2333 }
 2334 
 2335 /**
 2336  * Test IDs of inverses of compound transliterators. (J20)
 2337  */
 2338 void TransliteratorTest::TestCompoundInverseID() {
 2339     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
 2340     UParseError pe;
 2341     UErrorCode ec = U_ZERO_ERROR;
 2342     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
 2343     if (t == 0 || U_FAILURE(ec)) {
 2344         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
 2345         return;
 2346     }
 2347     Transliterator *u = t->createInverse(ec);
 2348     if (u == 0 || U_FAILURE(ec)) {
 2349         errln("FAIL: createInverse");
 2350         delete t;
 2351         return;
 2352     }
 2353     UnicodeString exp = "NFD(NFC);Jamo-Latin";
 2354     UnicodeString got = u->getID();
 2355     if (got != exp) {
 2356         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
 2357               ", expected " + exp);
 2358     }
 2359     delete t;
 2360     delete u;
 2361 }
 2362 
 2363 /**
 2364  * Test undefined variable.
 2365 
 2366  */
 2367 void TransliteratorTest::TestUndefinedVariable() {
 2368     UnicodeString rule = "$initial } a <> \\u1161;";
 2369     UParseError pe;
 2370     UErrorCode ec = U_ZERO_ERROR;
 2371     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
 2372     delete t;
 2373     if (U_FAILURE(ec)) {
 2374         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
 2375               u_errorName(ec));
 2376         return;
 2377     }
 2378     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
 2379           u_errorName(ec));
 2380 }
 2381 
 2382 /**
 2383  * Test empty context.
 2384  */
 2385 void TransliteratorTest::TestEmptyContext() {
 2386     expect(" { a } > b;", "xay a ", "xby b ");
 2387 }
 2388 
 2389 /**
 2390 * Test compound filter ID syntax
 2391 */
 2392 void TransliteratorTest::TestCompoundFilterID(void) {
 2393     static const char* DATA[] = {
 2394         // Col. 1 = ID or rule set (latter must start with #)
 2395 
 2396         // = columns > 1 are null if expect col. 1 to be illegal =
 2397 
 2398         // Col. 2 = direction, "F..." or "R..."
 2399         // Col. 3 = source string
 2400         // Col. 4 = exp result
 2401 
 2402         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
 2403         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
 2404         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
 2405         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
 2406         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
 2407         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
 2408         NULL,
 2409     };
 2410 
 2411     for (int32_t i=0; DATA[i]; i+=4) {
 2412         UnicodeString id = CharsToUnicodeString(DATA[i]);
 2413         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
 2414             UTRANS_REVERSE : UTRANS_FORWARD;
 2415         UnicodeString source;
 2416         UnicodeString exp;
 2417         if (DATA[i+2] != NULL) {
 2418             source = CharsToUnicodeString(DATA[i+2]);
 2419             exp = CharsToUnicodeString(DATA[i+3]);
 2420         }
 2421         UBool expOk = (DATA[i+1] != NULL);
 2422         LocalPointer<Transliterator> t;
 2423         UParseError pe;
 2424         UErrorCode ec = U_ZERO_ERROR;
 2425         if (id.charAt(0) == 0x23/*#*/) {
 2426             t.adoptInstead(Transliterator::createFromRules("ID", id, direction, pe, ec));
 2427         } else {
 2428             t.adoptInstead(Transliterator::createInstance(id, direction, pe, ec));
 2429         }
 2430         UBool ok = (t.isValid() && U_SUCCESS(ec));
 2431         UnicodeString transID;
 2432         if (t.isValid()) {
 2433             transID = t->getID();
 2434         }
 2435         else {
 2436             transID = UnicodeString("NULL", "");
 2437         }
 2438         if (ok == expOk) {
 2439             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
 2440                   u_errorName(ec));
 2441             if (source.length() != 0) {
 2442                 expect(*t, source, exp);
 2443             }
 2444         } else {
 2445             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
 2446                   u_errorName(ec));
 2447         }
 2448     }
 2449 }
 2450 
 2451 /**
 2452  * Test new property set syntax
 2453  */
 2454 void TransliteratorTest::TestPropertySet() {
 2455     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
 2456     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
 2457            "[ a stitch ]\n[ in time ]\r[ saves 9]");
 2458 }
 2459 
 2460 /**
 2461  * Test various failure points of the new 2.0 engine.
 2462  */
 2463 void TransliteratorTest::TestNewEngine() {
 2464     UParseError pe;
 2465     UErrorCode ec = U_ZERO_ERROR;
 2466     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
 2467     if (t == 0 || U_FAILURE(ec)) {
 2468         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
 2469         return;
 2470     }
 2471     // Katakana should be untouched
 2472     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
 2473            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
 2474 
 2475     delete t;
 2476 
 2477 #if 1
 2478     // This test will only work if Transliterator.ROLLBACK is
 2479     // true.  Otherwise, this test will fail, revealing a
 2480     // limitation of global filters in incremental mode.
 2481     Transliterator *a =
 2482         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
 2483     Transliterator *A =
 2484         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
 2485     if (U_FAILURE(ec)) {
 2486         delete a;
 2487         delete A;
 2488         return;
 2489     }
 2490 
 2491     Transliterator* array[3];
 2492     array[0] = a;
 2493     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
 2494     array[2] = A;
 2495     if (U_FAILURE(ec)) {
 2496         errln("FAIL: createInstance NFD");
 2497         delete a;
 2498         delete A;
 2499         delete array[1];
 2500         return;
 2501     }
 2502 
 2503     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
 2504     if (U_FAILURE(ec)) {
 2505         errln("FAIL: UnicodeSet constructor");
 2506         delete a;
 2507         delete A;
 2508         delete array[1];
 2509         delete t;
 2510         return;
 2511     }
 2512 
 2513     expect(*t, "aAaA", "bAbA");
 2514 
 2515     assertTrue("countElements", t->countElements() == 3);
 2516     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
 2517     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
 2518     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
 2519     assertSuccess("getElement", ec);
 2520 
 2521     delete a;
 2522     delete A;
 2523     delete array[1];
 2524     delete t;
 2525 #endif
 2526 
 2527     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
 2528            "a",
 2529            "ax");
 2530 
 2531     UnicodeString gr = CharsToUnicodeString(
 2532         "$ddot = \\u0308 ;"
 2533         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
 2534         "$rough = \\u0314 ;"
 2535         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
 2536         "\\u03b1 <> a ;"
 2537         "$rough <> h ;");
 2538 
 2539     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
 2540 }
 2541 
 2542 /**
 2543  * Test quantified segment behavior.  We want:
 2544  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
 2545  */
 2546 void TransliteratorTest::TestQuantifiedSegment(void) {
 2547     // The normal case
 2548     expect("([abc]+) > x $1 x;", "cba", "xcbax");
 2549 
 2550     // The tricky case; the quantifier is around the segment
 2551     expect("([abc])+ > x $1 x;", "cba", "xax");
 2552 
 2553     // Tricky case in reverse direction
 2554     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
 2555 
 2556     // Check post-context segment
 2557     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
 2558 
 2559     // Test toRule/toPattern for non-quantified segment.
 2560     // Careful with spacing here.
 2561     UnicodeString r("([a-c]){q} > x $1 x;");
 2562     UParseError pe;
 2563     UErrorCode ec = U_ZERO_ERROR;
 2564     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
 2565     if (U_FAILURE(ec)) {
 2566         errln("FAIL: createFromRules");
 2567         delete t;
 2568         return;
 2569     }
 2570     UnicodeString rr;
 2571     t->toRules(rr, TRUE);
 2572     if (r != rr) {
 2573         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
 2574     } else {
 2575         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
 2576     }
 2577     delete t;
 2578 
 2579     // Test toRule/toPattern for quantified segment.
 2580     // Careful with spacing here.
 2581     r = "([a-c])+{q} > x $1 x;";
 2582     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
 2583     if (U_FAILURE(ec)) {
 2584         errln("FAIL: createFromRules");
 2585         delete t;
 2586         return;
 2587     }
 2588     t->toRules(rr, TRUE);
 2589     if (r != rr) {
 2590         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
 2591     } else {
 2592         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
 2593     }
 2594     delete t;
 2595 }
 2596 
 2597 //======================================================================
 2598 // Ram's tests
 2599 //======================================================================
 2600 void TransliteratorTest::TestDevanagariLatinRT(){
 2601     const int MAX_LEN= 52;
 2602     const char* const source[MAX_LEN] = {
 2603         "bh\\u0101rata",
 2604         "kra",
 2605         "k\\u1E63a",
 2606         "khra",
 2607         "gra",
 2608         "\\u1E45ra",
 2609         "cra",
 2610         "chra",
 2611         "j\\u00F1a",
 2612         "jhra",
 2613         "\\u00F1ra",
 2614         "\\u1E6Dya",
 2615         "\\u1E6Dhra",
 2616         "\\u1E0Dya",
 2617       //"r\\u0323ya", // \u095c is not valid in Devanagari
 2618         "\\u1E0Dhya",
 2619         "\\u1E5Bhra",
 2620         "\\u1E47ra",
 2621         "tta",
 2622         "thra",
 2623         "dda",
 2624         "dhra",
 2625         "nna",
 2626         "pra",
 2627         "phra",
 2628         "bra",
 2629         "bhra",
 2630         "mra",
 2631         "\\u1E49ra",
 2632       //"l\\u0331ra",
 2633         "yra",
 2634         "\\u1E8Fra",
 2635       //"l-",
 2636         "vra",
 2637         "\\u015Bra",
 2638         "\\u1E63ra",
 2639         "sra",
 2640         "hma",
 2641         "\\u1E6D\\u1E6Da",
 2642         "\\u1E6D\\u1E6Dha",
 2643         "\\u1E6Dh\\u1E6Dha",
 2644         "\\u1E0D\\u1E0Da",
 2645         "\\u1E0D\\u1E0Dha",
 2646         "\\u1E6Dya",
 2647         "\\u1E6Dhya",
 2648         "\\u1E0Dya",
 2649         "\\u1E0Dhya",
 2650         // Not roundtrippable -- 
 2651         // \\u0939\\u094d\\u094d\\u092E  - hma
 2652         // \\u0939\\u094d\\u092E         - hma
 2653         // CharsToUnicodeString("hma"),
 2654         "hya",
 2655         "\\u015Br\\u0325",
 2656         "\\u015Bca",
 2657         "\\u0115",
 2658         "san\\u0304j\\u012Bb s\\u0113nagupta",
 2659         "\\u0101nand vaddir\\u0101ju",    
 2660         "\\u0101",
 2661         "a"
 2662     };
 2663     const char* const expected[MAX_LEN] = {
 2664         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
 2665         "\\u0915\\u094D\\u0930",          /* kra         */
 2666         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
 2667         "\\u0916\\u094D\\u0930",          /* khra        */
 2668         "\\u0917\\u094D\\u0930",          /* gra         */
 2669         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
 2670         "\\u091A\\u094D\\u0930",          /* cra         */
 2671         "\\u091B\\u094D\\u0930",          /* chra        */
 2672         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
 2673         "\\u091D\\u094D\\u0930",          /* jhra        */
 2674         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
 2675         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
 2676         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
 2677         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
 2678       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
 2679         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
 2680         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
 2681         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
 2682         "\\u0924\\u094D\\u0924",          /* tta         */
 2683         "\\u0925\\u094D\\u0930",          /* thra        */
 2684         "\\u0926\\u094D\\u0926",          /* dda         */
 2685         "\\u0927\\u094D\\u0930",          /* dhra        */
 2686         "\\u0928\\u094D\\u0928",          /* nna         */
 2687         "\\u092A\\u094D\\u0930",          /* pra         */
 2688         "\\u092B\\u094D\\u0930",          /* phra        */
 2689         "\\u092C\\u094D\\u0930",          /* bra         */
 2690         "\\u092D\\u094D\\u0930",          /* bhra        */
 2691         "\\u092E\\u094D\\u0930",          /* mra         */
 2692         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
 2693       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
 2694         "\\u092F\\u094D\\u0930",          /* yra         */
 2695         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
 2696       //"l-",
 2697         "\\u0935\\u094D\\u0930",          /* vra         */
 2698         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
 2699         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
 2700         "\\u0938\\u094D\\u0930",          /* sra         */
 2701         "\\u0939\\u094d\\u092E",          /* hma         */
 2702         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
 2703         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
 2704         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
 2705         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
 2706         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
 2707         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
 2708         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
 2709         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
 2710         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
 2711      // "hma",                         /* hma         */
 2712         "\\u0939\\u094D\\u092F",          /* hya         */
 2713         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
 2714         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
 2715         "\\u090d",                        /* e\\u0306    */
 2716         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
 2717         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",    
 2718         "\\u0906",
 2719         "\\u0905",
 2720     };
 2721     UErrorCode status = U_ZERO_ERROR;
 2722     UParseError parseError;
 2723     UnicodeString message;
 2724     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
 2725     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
 2726     if(U_FAILURE(status)){
 2727         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
 2728         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
 2729         return;
 2730     }
 2731     UnicodeString gotResult;
 2732     for(int i= 0; i<MAX_LEN; i++){
 2733         gotResult = source[i];
 2734         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
 2735         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
 2736     }
 2737     delete latinToDev;
 2738     delete devToLatin;
 2739 }
 2740 
 2741 void TransliteratorTest::TestTeluguLatinRT(){
 2742     const int MAX_LEN=10;
 2743     const char* const source[MAX_LEN] = {   
 2744         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
 2745         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
 2746         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
 2747         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
 2748         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
 2749         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
 2750         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
 2751         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
 2752         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
 2753         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
 2754     };
 2755 
 2756     const char* const expected[MAX_LEN] = {
 2757         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",     
 2758         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",     
 2759         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
 2760         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
 2761         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
 2762         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
 2763         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
 2764         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
 2765         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
 2766         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
 2767     };
 2768 
 2769     UErrorCode status = U_ZERO_ERROR;
 2770     UParseError parseError;
 2771     UnicodeString message;
 2772     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
 2773     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
 2774     if(U_FAILURE(status)){
 2775         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
 2776         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
 2777         return;
 2778     }
 2779     UnicodeString gotResult;
 2780     for(int i= 0; i<MAX_LEN; i++){
 2781         gotResult = source[i];
 2782         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
 2783         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
 2784     }
 2785     delete latinToDev;
 2786     delete devToLatin;
 2787 }
 2788 
 2789 void TransliteratorTest::TestSanskritLatinRT(){
 2790     const int MAX_LEN =16;
 2791     const char* const source[MAX_LEN] = {
 2792         "rmk\\u1E63\\u0113t",
 2793         "\\u015Br\\u012Bmad",
 2794         "bhagavadg\\u012Bt\\u0101",
 2795         "adhy\\u0101ya",
 2796         "arjuna",
 2797         "vi\\u1E63\\u0101da",
 2798         "y\\u014Dga",
 2799         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
 2800         "uv\\u0101cr\\u0325",
 2801         "dharmak\\u1E63\\u0113tr\\u0113",
 2802         "kuruk\\u1E63\\u0113tr\\u0113",
 2803         "samav\\u0113t\\u0101",
 2804         "yuyutsava\\u1E25",
 2805         "m\\u0101mak\\u0101\\u1E25",
 2806     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
 2807         "kimakurvata",
 2808         "san\\u0304java",
 2809     };
 2810     const char* const expected[MAX_LEN] = {
 2811         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
 2812         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
 2813         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
 2814         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
 2815         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
 2816         "\\u0935\\u093f\\u0937\\u093e\\u0926",
 2817         "\\u092f\\u094b\\u0917",
 2818         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
 2819         "\\u0909\\u0935\\u093E\\u091A\\u0943",
 2820         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
 2821         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
 2822         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
 2823         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
 2824         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
 2825     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
 2826         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
 2827         "\\u0938\\u0902\\u091c\\u0935",
 2828     };
 2829     UErrorCode status = U_ZERO_ERROR;
 2830     UParseError parseError;
 2831     UnicodeString message;
 2832     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
 2833     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
 2834     if(U_FAILURE(status)){
 2835         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
 2836         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
 2837         return;
 2838     }
 2839     UnicodeString gotResult;
 2840     for(int i= 0; i<MAX_LEN; i++){
 2841         gotResult = source[i];
 2842         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
 2843         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
 2844     }
 2845     delete latinToDev;
 2846     delete devToLatin;
 2847 }
 2848 
 2849 
 2850 void TransliteratorTest::TestCompoundLatinRT(){
 2851     const char* const source[] = {
 2852         "rmk\\u1E63\\u0113t",
 2853         "\\u015Br\\u012Bmad",
 2854         "bhagavadg\\u012Bt\\u0101",
 2855         "adhy\\u0101ya",
 2856         "arjuna",
 2857         "vi\\u1E63\\u0101da",
 2858         "y\\u014Dga",
 2859         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
 2860         "uv\\u0101cr\\u0325",
 2861         "dharmak\\u1E63\\u0113tr\\u0113",
 2862         "kuruk\\u1E63\\u0113tr\\u0113",
 2863         "samav\\u0113t\\u0101",
 2864         "yuyutsava\\u1E25",
 2865         "m\\u0101mak\\u0101\\u1E25",
 2866      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
 2867         "kimakurvata",
 2868         "san\\u0304java"
 2869     };
 2870     const int MAX_LEN = UPRV_LENGTHOF(source);
 2871     const char* const expected[MAX_LEN] = {
 2872         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
 2873         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
 2874         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
 2875         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
 2876         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
 2877         "\\u0935\\u093f\\u0937\\u093e\\u0926",
 2878         "\\u092f\\u094b\\u0917",
 2879         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
 2880         "\\u0909\\u0935\\u093E\\u091A\\u0943",
 2881         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
 2882         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
 2883         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
 2884         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
 2885         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
 2886     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
 2887         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
 2888         "\\u0938\\u0902\\u091c\\u0935"
 2889     };
 2890     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
 2891         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
 2892         return;
 2893     }
 2894 
 2895     UErrorCode status = U_ZERO_ERROR;
 2896     UParseError parseError;
 2897     UnicodeString message;
 2898     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
 2899     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
 2900     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
 2901     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
 2902 
 2903     if(U_FAILURE(status)){
 2904         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
 2905         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
 2906         return;
 2907     }
 2908     UnicodeString gotResult;
 2909     for(int i= 0; i<MAX_LEN; i++){
 2910         gotResult = source[i];
 2911         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
 2912         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
 2913         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
 2914 
 2915     }
 2916     delete(latinToDevToLatin);
 2917     delete(devToLatinToDev);  
 2918     delete(devToTelToDev);    
 2919     delete(latinToTelToLatin);
 2920 }
 2921 
 2922 /**
 2923  * Test Gurmukhi-Devanagari Tippi and Bindi
 2924  */
 2925 void TransliteratorTest::TestGurmukhiDevanagari(){
 2926     // the rule says:
 2927     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
 2928     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
 2929     UErrorCode status = U_ZERO_ERROR;
 2930     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
 2931     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
 2932     UParseError parseError;
 2933 
 2934     UnicodeSetIterator vIter(vowel);
 2935     UnicodeSetIterator nvIter(non_vowel);
 2936     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
 2937     if(U_FAILURE(status)) {
 2938       dataerrln("Error creating transliterator %s", u_errorName(status));
 2939       delete trans;
 2940       return;
 2941     }
 2942     UnicodeString src (" \\u0902", -1, US_INV);
 2943     UnicodeString expected(" \\u0A02", -1, US_INV);
 2944     src = src.unescape();
 2945     expected= expected.unescape();
 2946 
 2947     while(vIter.next()){
 2948         src.setCharAt(0,(UChar) vIter.getCodepoint());
 2949         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
 2950         expect(*trans,src,expected);
 2951     }
 2952     
 2953     expected.setCharAt(1,0x0A70);
 2954     while(nvIter.next()){
 2955         //src.setCharAt(0,(char) nvIter.codepoint);
 2956         src.setCharAt(0,(UChar)nvIter.getCodepoint());
 2957         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
 2958         expect(*trans,src,expected);
 2959     }
 2960     delete trans;
 2961 }
 2962 /**
 2963  * Test instantiation from a locale.
 2964  */
 2965 void TransliteratorTest::TestLocaleInstantiation(void) {
 2966     UParseError pe;
 2967     UErrorCode ec = U_ZERO_ERROR;
 2968     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
 2969     if (U_FAILURE(ec)) {
 2970         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
 2971         delete t;
 2972         return;
 2973     }
 2974     expect(*t, CharsToUnicodeString("\\u0430"), "a");
 2975     delete t;
 2976     
 2977     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
 2978     if (U_FAILURE(ec)) {
 2979         errln("FAIL: createInstance(en-el)");
 2980         delete t;
 2981         return;
 2982     }
 2983     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
 2984     delete t;
 2985 }
 2986         
 2987 /**
 2988  * Test title case handling of accent (should ignore accents)
 2989  */
 2990 void TransliteratorTest::TestTitleAccents(void) {
 2991     UParseError pe;
 2992     UErrorCode ec = U_ZERO_ERROR;
 2993     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
 2994     if (U_FAILURE(ec)) {
 2995         errln("FAIL: createInstance(Title)");
 2996         delete t;
 2997         return;
 2998     }
 2999     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
 3000     delete t;
 3001 }
 3002 
 3003 /**
 3004  * Basic test of a locale resource based rule.
 3005  */
 3006 void TransliteratorTest::TestLocaleResource() {
 3007     const char* DATA[] = {
 3008         // id                    from               to
 3009         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
 3010         "Latin-el",              "b",               "\\u03bc\\u03c0",
 3011         "Latin-Greek",           "b",               "\\u03B2",
 3012         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
 3013         "el-Latin",              "\\u03B2",         "v",
 3014         "Greek-Latin",           "\\u03B2",         "b",
 3015     };
 3016     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
 3017     for (int32_t i=0; i<DATA_length; i+=3) {
 3018         UParseError pe;
 3019         UErrorCode ec = U_ZERO_ERROR;
 3020         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
 3021         if (U_FAILURE(ec)) {
 3022             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
 3023             delete t;
 3024             continue;
 3025         }
 3026         expect(*t, CharsToUnicodeString(DATA[i+1]),
 3027                CharsToUnicodeString(DATA[i+2]));
 3028         delete t;
 3029     }
 3030 }
 3031 
 3032 /**
 3033  * Make sure parse errors reference the right line.
 3034  */
 3035 void TransliteratorTest::TestParseError() {
 3036     static const char* rule =
 3037         "a > b;\n"
 3038         "# more stuff\n"
 3039         "d << b;";
 3040     UErrorCode ec = U_ZERO_ERROR;
 3041     UParseError pe;
 3042     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
 3043     delete t;
 3044     if (U_FAILURE(ec)) {
 3045         UnicodeString err(pe.preContext);
 3046         err.append((UChar)124/*|*/).append(pe.postContext);
 3047         if (err.indexOf("d << b") >= 0) {
 3048             logln("Ok: " + err);
 3049         } else {
 3050             errln("FAIL: " + err);
 3051         }
 3052     }
 3053     else {
 3054         errln("FAIL: no syntax error");
 3055     }
 3056     static const char* maskingRule =
 3057         "a>x;\n"
 3058         "# more stuff\n"
 3059         "ab>y;";
 3060     ec = U_ZERO_ERROR;
 3061     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
 3062     if (ec != U_RULE_MASK_ERROR) {
 3063         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
 3064     }
 3065     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
 3066         errln("FAIL: did not get expected precontext");
 3067     }
 3068     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
 3069         errln("FAIL: did not get expected postcontext");
 3070     }
 3071 }
 3072 
 3073 /**
 3074  * Make sure sets on output are disallowed.
 3075  */
 3076 void TransliteratorTest::TestOutputSet() {
 3077     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
 3078     UErrorCode ec = U_ZERO_ERROR;
 3079     UParseError pe;
 3080     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
 3081     delete t;
 3082     if (U_FAILURE(ec)) {
 3083         UnicodeString err(pe.preContext);
 3084         err.append((UChar)124/*|*/).append(pe.postContext);
 3085         logln("Ok: " + err);
 3086         return;
 3087     }
 3088     errln("FAIL: No syntax error");
 3089 }        
 3090 
 3091 /**
 3092  * Test the use variable range pragma, making sure that use of
 3093  * variable range characters is detected and flagged as an error.
 3094  */
 3095 void TransliteratorTest::TestVariableRange() {
 3096     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
 3097     UErrorCode ec = U_ZERO_ERROR;
 3098     UParseError pe;
 3099     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
 3100     delete t;
 3101     if (U_FAILURE(ec)) {
 3102         UnicodeString err(pe.preContext);
 3103         err.append((UChar)124/*|*/).append(pe.postContext);
 3104         logln("Ok: " + err);
 3105         return;
 3106     }
 3107     errln("FAIL: No syntax error");
 3108 }
 3109 
 3110 /**
 3111  * Test invalid post context error handling
 3112  */
 3113 void TransliteratorTest::TestInvalidPostContext() {
 3114     UnicodeString rule = "a}b{c>d;";
 3115     UErrorCode ec = U_ZERO_ERROR;
 3116     UParseError pe;
 3117     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
 3118     delete t;
 3119     if (U_FAILURE(ec)) {
 3120         UnicodeString err(pe.preContext);
 3121         err.append((UChar)124/*|*/).append(pe.postContext);
 3122         if (err.indexOf("a}b{c") >= 0) {
 3123             logln("Ok: " + err);
 3124         } else {
 3125             errln("FAIL: " + err);
 3126         }
 3127         return;
 3128     }
 3129     errln("FAIL: No syntax error");
 3130 }
 3131 
 3132 /**
 3133  * Test ID form variants
 3134  */
 3135 void TransliteratorTest::TestIDForms() {
 3136     const char* DATA[] = {
 3137         "NFC", NULL, "NFD",
 3138         "nfd", NULL, "NFC", // make sure case is ignored
 3139         "Any-NFKD", NULL, "Any-NFKC",
 3140         "Null", NULL, "Null",
 3141         "-nfkc", "nfkc", "NFKD",
 3142         "-nfkc/", "nfkc", "NFKD",
 3143         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
 3144         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
 3145         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
 3146         "Source-", NULL, NULL,
 3147         "Source/Variant-", NULL, NULL,
 3148         "Source-/Variant", NULL, NULL,
 3149         "/Variant", NULL, NULL,
 3150         "/Variant-", NULL, NULL,
 3151         "-/Variant", NULL, NULL,
 3152         "-/", NULL, NULL,
 3153         "-", NULL, NULL,
 3154         "/", NULL, NULL,
 3155     };
 3156     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
 3157     
 3158     for (int32_t i=0; i<DATA_length; i+=3) {
 3159         const char* ID = DATA[i];
 3160         const char* expID = DATA[i+1];
 3161         const char* expInvID = DATA[i+2];
 3162         UBool expValid = (expInvID != NULL);
 3163         if (expID == NULL) {
 3164             expID = ID;
 3165         }
 3166         UParseError pe;
 3167         UErrorCode ec = U_ZERO_ERROR;
 3168         Transliterator *t =
 3169             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
 3170         if (U_FAILURE(ec)) {
 3171             if (!expValid) {
 3172                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
 3173             } else {
 3174                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
 3175             }
 3176             delete t;
 3177             continue;
 3178         }
 3179         Transliterator *u = t->createInverse(ec);
 3180         if (U_FAILURE(ec)) {
 3181             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
 3182             delete t;
 3183             delete u;
 3184             continue;
 3185         }
 3186         if (t->getID() == expID &&
 3187             u->getID() == expInvID) {
 3188             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
 3189         } else {
 3190             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
 3191                   t->getID() + " x getInverse() => " + u->getID() +
 3192                   ", expected " + expInvID);
 3193         }
 3194         delete t;
 3195         delete u;
 3196     }
 3197 }
 3198 
 3199 static const UChar SPACE[]   = {32,0};
 3200 static const UChar NEWLINE[] = {10,0};
 3201 static const UChar RETURN[]  = {13,0};
 3202 static const UChar EMPTY[]   = {0};
 3203 
 3204 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
 3205                                     const UnicodeString& testRulesForward) {
 3206     UnicodeString rules2; t2.toRules(rules2, TRUE);
 3207     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
 3208     rules2.findAndReplace(SPACE, EMPTY);
 3209     rules2.findAndReplace(NEWLINE, EMPTY);
 3210     rules2.findAndReplace(RETURN, EMPTY);
 3211 
 3212     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
 3213     
 3214     if (rules2 != testRules) {
 3215         errln(label);
 3216         logln((UnicodeString)"GENERATED RULES: " + rules2);
 3217         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
 3218     }
 3219 }
 3220 
 3221 /**
 3222  * Mark's toRules test.
 3223  */
 3224 void TransliteratorTest::TestToRulesMark() {
 3225     const char* testRules = 
 3226         "::[[:Latin:][:Mark:]];"
 3227         "::NFKD (NFC);"
 3228         "::Lower (Lower);"
 3229         "a <> \\u03B1;" // alpha
 3230         "::NFKC (NFD);"
 3231         "::Upper (Lower);"
 3232         "::Lower ();"
 3233         "::([[:Greek:][:Mark:]]);"
 3234         ;
 3235     const char* testRulesForward = 
 3236         "::[[:Latin:][:Mark:]];"
 3237         "::NFKD(NFC);"
 3238         "::Lower(Lower);"
 3239         "a > \\u03B1;"
 3240         "::NFKC(NFD);"
 3241         "::Upper (Lower);"
 3242         "::Lower ();"
 3243         ;
 3244     const char* testRulesBackward = 
 3245         "::[[:Greek:][:Mark:]];"
 3246         "::Lower (Upper);"
 3247         "::NFD(NFKC);"
 3248         "\\u03B1 > a;"
 3249         "::Lower(Lower);"
 3250         "::NFC(NFKD);"
 3251         ;
 3252     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
 3253     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
 3254     
 3255     UParseError pe;
 3256     UErrorCode ec = U_ZERO_ERROR;
 3257     LocalPointer<Transliterator> t2(
 3258             Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec));
 3259     LocalPointer<Transliterator> t3(
 3260             Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec));
 3261 
 3262     if (U_FAILURE(ec)) {
 3263         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
 3264         return;
 3265     }
 3266     
 3267     expect(*t2, source, target);
 3268     expect(*t3, target, source);
 3269     
 3270     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
 3271     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
 3272 }
 3273 
 3274 /**
 3275  * Test Escape and Unescape transliterators.
 3276  */
 3277 void TransliteratorTest::TestEscape() {
 3278     UParseError pe;
 3279     UErrorCode ec;
 3280     Transliterator *t;
 3281 
 3282     ec = U_ZERO_ERROR;
 3283     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
 3284     if (U_FAILURE(ec)) {
 3285         errln((UnicodeString)"FAIL: createInstance");
 3286     } else {
 3287         expect(*t,
 3288                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
 3289                "@12Q");
 3290     }
 3291     delete t;
 3292 
 3293     ec = U_ZERO_ERROR;
 3294     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
 3295     if (U_FAILURE(ec)) {
 3296         errln((UnicodeString)"FAIL: createInstance");
 3297     } else {
 3298         expect(*t,
 3299                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
 3300                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
 3301     }
 3302     delete t;
 3303 
 3304     ec = U_ZERO_ERROR;
 3305     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
 3306     if (U_FAILURE(ec)) {
 3307         errln((UnicodeString)"FAIL: createInstance");
 3308     } else {
 3309         expect(*t,
 3310                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
 3311                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
 3312     }
 3313     delete t;
 3314 
 3315     ec = U_ZERO_ERROR;
 3316     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
 3317     if (U_FAILURE(ec)) {
 3318         errln((UnicodeString)"FAIL: createInstance");
 3319     } else {
 3320         expect(*t,
 3321                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
 3322                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
 3323     }
 3324     delete t;
 3325 }
 3326 
 3327 
 3328 void TransliteratorTest::TestAnchorMasking(){
 3329     UnicodeString rule ("^a > Q; a > q;");
 3330     UErrorCode status= U_ZERO_ERROR;
 3331     UParseError parseError;
 3332 
 3333     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
 3334     if(U_FAILURE(status)){
 3335         errln(UnicodeString("FAIL: ") + "ID" +
 3336               ".createFromRules() => bad rules" +
 3337               /*", parse error " + parseError.code +*/
 3338               ", line " + parseError.line +
 3339               ", offset " + parseError.offset +
 3340               ", context " + prettify(parseError.preContext, TRUE) +
 3341               ", rules: " + prettify(rule, TRUE));
 3342     }
 3343     delete t;
 3344 }
 3345 
 3346 /**
 3347  * Make sure display names of variants look reasonable.
 3348  */
 3349 void TransliteratorTest::TestDisplayName() {
 3350 #if UCONFIG_NO_FORMATTING
 3351     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
 3352     return;
 3353 #else
 3354     static const char* DATA[] = {
 3355         // ID, forward name, reverse name
 3356         // Update the text as necessary -- the important thing is
 3357         // not the text itself, but how various cases are handled.
 3358         
 3359         // Basic test
 3360         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
 3361         
 3362         // Variants
 3363         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
 3364         
 3365         // Target-only IDs
 3366         "NFC", "Any to NFC", "Any to NFD",
 3367     };
 3368 
 3369     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 3370     
 3371     Locale US("en", "US");
 3372     
 3373     for (int32_t i=0; i<DATA_length; i+=3) {
 3374         UnicodeString name;
 3375         Transliterator::getDisplayName(DATA[i], US, name);
 3376         if (name != DATA[i+1]) {
 3377             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
 3378                   name + ", expected " + DATA[i+1]);
 3379         } else {
 3380             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
 3381         }
 3382         UErrorCode ec = U_ZERO_ERROR;
 3383         UParseError pe;
 3384         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
 3385         if (U_FAILURE(ec)) {
 3386             delete t;
 3387             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
 3388             continue;
 3389         }
 3390         name = Transliterator::getDisplayName(t->getID(), US, name);
 3391         if (name != DATA[i+2]) {
 3392             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
 3393                   name + ", expected " + DATA[i+2]);
 3394         } else {
 3395             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
 3396         }
 3397         delete t;
 3398     }
 3399 #endif
 3400 }
 3401 
 3402 void TransliteratorTest::TestSpecialCases(void) {
 3403     const UnicodeString registerRules[] = {
 3404         "Any-Dev1", "x > X; y > Y;",
 3405         "Any-Dev2", "XY > Z",
 3406         "Greek-Latin/FAKE", 
 3407             CharsToUnicodeString
 3408             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
 3409         "" // END MARKER
 3410     };
 3411 
 3412     const UnicodeString testCases[] = {
 3413         // NORMALIZATION
 3414         // should add more test cases
 3415         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
 3416         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
 3417         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
 3418         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
 3419 
 3420         // mp -> b BUG
 3421         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
 3422         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
 3423     
 3424         // check for devanagari bug
 3425         "nfd;Dev1;Dev2;nfc", "xy", "Z",
 3426 
 3427         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
 3428         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 
 3429                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee, 
 3430                  
 3431         //TODO: enable this test once Titlecase works right
 3432         /*
 3433         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 
 3434                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee, 
 3435                  */
 3436         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 
 3437                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
 3438         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 
 3439                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
 3440     
 3441         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
 3442         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
 3443 
 3444          // FORMS OF S
 3445         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 
 3446                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
 3447         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"), 
 3448                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
 3449         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 
 3450                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
 3451         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"), 
 3452                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
 3453         // Tatiana bug
 3454         // Upper: TAT\\u02B9\\u00C2NA
 3455         // Lower: tat\\u02B9\\u00E2na
 3456         // Title: Tat\\u02B9\\u00E2na
 3457         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
 3458                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
 3459         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
 3460                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
 3461         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
 3462                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
 3463 
 3464         "" // END MARKER
 3465     };
 3466 
 3467     UParseError pos;
 3468     int32_t i;
 3469     for (i = 0; registerRules[i].length()!=0; i+=2) {
 3470         UErrorCode status = U_ZERO_ERROR;
 3471 
 3472         Transliterator *t = Transliterator::createFromRules(registerRules[0+i], 
 3473             registerRules[i+1], UTRANS_FORWARD, pos, status);
 3474         if (U_FAILURE(status)) {
 3475             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
 3476         } else {
 3477             Transliterator::registerInstance(t);
 3478         }
 3479     }
 3480     for (i = 0; testCases[i].length()!=0; i+=3) {
 3481         UErrorCode ec = U_ZERO_ERROR;
 3482         UParseError pe;
 3483         const UnicodeString& name = testCases[i];
 3484         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
 3485         if (U_FAILURE(ec)) {
 3486             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
 3487             delete t;
 3488             continue;
 3489         }
 3490         const UnicodeString& id = t->getID();
 3491         const UnicodeString& source = testCases[i+1];
 3492         UnicodeString target;
 3493 
 3494         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
 3495         
 3496         if (testCases[i+2].length() > 0) {
 3497             target = testCases[i+2];
 3498         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
 3499             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
 3500         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
 3501             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
 3502         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
 3503             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
 3504         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
 3505             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
 3506         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
 3507             target = source;
 3508             target.toLower(Locale::getUS());
 3509         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
 3510             target = source;
 3511             target.toUpper(Locale::getUS());
 3512         }
 3513         if (U_FAILURE(ec)) {
 3514             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
 3515             continue;
 3516         }
 3517 
 3518         expect(*t, source, target);
 3519         delete t;
 3520     }
 3521     for (i = 0; registerRules[i].length()!=0; i+=2) {
 3522         Transliterator::unregister(registerRules[i]);
 3523     }
 3524 }
 3525 
 3526 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
 3527     if (ch <= 0xFFFF) {
 3528         sprintf(buffer, "\\u%04x", (int)ch);
 3529     } else {
 3530         sprintf(buffer, "\\U%08x", (int)ch);
 3531     }
 3532     return buffer;
 3533 }
 3534 
 3535 void TransliteratorTest::TestSurrogateCasing (void) {
 3536     // check that casing handles surrogates
 3537     // titlecase is currently defective
 3538     char buffer[20];
 3539     UChar buffer2[20];
 3540     UChar32 dee;
 3541     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
 3542     UnicodeString DEE(u_totitle(dee));
 3543     if (DEE != DESERET_DEE) {
 3544         err("Fails titlecase of surrogates");
 3545         err(Char32ToEscapedChars(dee, buffer)); 
 3546         err(", ");
 3547         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
 3548     }
 3549         
 3550     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
 3551     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
 3552     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
 3553     UErrorCode status= U_ZERO_ERROR;
 3554 
 3555     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
 3556     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
 3557         errln("Fails: Can't uppercase surrogates.");
 3558     }
 3559         
 3560     status= U_ZERO_ERROR;
 3561     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
 3562     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
 3563         errln("Fails: Can't lowercase surrogates.");
 3564     }
 3565 }
 3566 
 3567 static void _trans(Transliterator& t, const UnicodeString& src,
 3568                    UnicodeString& result) {
 3569     result = src;
 3570     t.transliterate(result);
 3571 }
 3572 
 3573 static void _trans(const UnicodeString& id, const UnicodeString& src,
 3574                    UnicodeString& result, UErrorCode ec) {
 3575     UParseError pe;
 3576     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
 3577     if (U_SUCCESS(ec)) {
 3578         _trans(*t, src, result);
 3579     }
 3580     delete t;
 3581 }
 3582 
 3583 static UnicodeString _findMatch(const UnicodeString& source,
 3584                                        const UnicodeString* pairs) {
 3585     UnicodeString empty;
 3586     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
 3587         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
 3588             return pairs[i+1];
 3589         }
 3590     }
 3591     return empty;
 3592 }
 3593 
 3594 // Check to see that incremental gets at least part way through a reasonable string.
 3595 
 3596 void TransliteratorTest::TestIncrementalProgress(void) {
 3597     UErrorCode ec = U_ZERO_ERROR;
 3598     UnicodeString latinTest = "The Quick Brown Fox.";
 3599     UnicodeString devaTest;
 3600     _trans("Latin-Devanagari", latinTest, devaTest, ec);
 3601     UnicodeString kataTest;
 3602     _trans("Latin-Katakana", latinTest, kataTest, ec);
 3603     if (U_FAILURE(ec)) {
 3604         errln("FAIL: Internal error");
 3605         return;
 3606     }
 3607     const UnicodeString tests[] = {
 3608         "Any", latinTest,
 3609         "Latin", latinTest,
 3610         "Halfwidth", latinTest,
 3611         "Devanagari", devaTest,
 3612         "Katakana", kataTest,
 3613         "" // END MARKER
 3614     };
 3615 
 3616     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
 3617     int32_t i = 0, j=0, k=0;
 3618     int32_t sources = Transliterator::countAvailableSources();
 3619     for (i = 0; i < sources; i++) {
 3620         UnicodeString source;
 3621         Transliterator::getAvailableSource(i, source);
 3622         UnicodeString test = _findMatch(source, tests);
 3623         if (test.length() == 0) {
 3624             logln((UnicodeString)"Skipping " + source + "-X");
 3625             continue;
 3626         }
 3627         int32_t targets = Transliterator::countAvailableTargets(source);
 3628         for (j = 0; j < targets; j++) {
 3629             UnicodeString target;
 3630             Transliterator::getAvailableTarget(j, source, target);
 3631             int32_t variants = Transliterator::countAvailableVariants(source, target);
 3632             for (k =0; k< variants; k++) {
 3633                 UnicodeString variant;
 3634                 UParseError err;
 3635                 UErrorCode status = U_ZERO_ERROR;
 3636 
 3637                 Transliterator::getAvailableVariant(k, source, target, variant);
 3638                 UnicodeString id = source + "-" + target + "/" + variant;
 3639                 
 3640                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
 3641                 if (U_FAILURE(status)) {
 3642                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
 3643                     delete t;
 3644                     continue;
 3645                 }
 3646                 status = U_ZERO_ERROR;
 3647                 CheckIncrementalAux(t, test);
 3648 
 3649                 UnicodeString rev;
 3650                 _trans(*t, test, rev);
 3651                 Transliterator *inv = t->createInverse(status);
 3652                 if (U_FAILURE(status)) {
 3653                     // The following are forward-only, it is OK that creating an inverse will not work:
 3654                     // 1. Devanagari-Arabic
 3655                     // 2. Any-*/BGN
 3656                     // 2a. Any-*/BGN_1981
 3657                     // 3. Any-*/UNGEGN
 3658                     // 4. Any-*/MNS
 3659                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
 3660                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
 3661                          && !(id.startsWith((UnicodeString)"Any-") &&
 3662                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
 3663                              )
 3664 #if UCONFIG_NO_BREAK_ITERATION
 3665                          && id.compare((UnicodeString)"Latin-Thai/") != 0
 3666 #endif
 3667                        )
 3668                     {
 3669                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
 3670                     }
 3671                     delete t;
 3672                     delete inv;
 3673                     continue;
 3674                 }
 3675                 CheckIncrementalAux(inv, rev);
 3676                 delete t;
 3677                 delete inv;
 3678             }
 3679         }
 3680     }
 3681 }
 3682 
 3683 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t, 
 3684                                                       const UnicodeString& input) {
 3685     UErrorCode ec = U_ZERO_ERROR;
 3686     UTransPosition pos;
 3687     UnicodeString test = input;
 3688 
 3689     pos.contextStart = 0;
 3690     pos.contextLimit = input.length();
 3691     pos.start = 0;
 3692     pos.limit = input.length();
 3693 
 3694     t->transliterate(test, pos, ec);
 3695     if (U_FAILURE(ec)) {
 3696         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
 3697         return;
 3698     }
 3699     UBool gotError = FALSE;
 3700     (void)gotError;    // Suppress set but not used warning.
 3701 
 3702     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
 3703 
 3704     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
 3705         errln((UnicodeString)"No Progress, " +
 3706               t->getID() + ": " + formatInput(test, input, pos));
 3707         gotError = TRUE;
 3708     } else {
 3709         logln((UnicodeString)"PASS Progress, " +
 3710               t->getID() + ": " + formatInput(test, input, pos));
 3711     }
 3712     t->finishTransliteration(test, pos);
 3713     if (pos.start != pos.limit) {
 3714         errln((UnicodeString)"Incomplete, " +
 3715               t->getID() + ": " + formatInput(test, input, pos));
 3716         gotError = TRUE;
 3717     }
 3718 }
 3719 
 3720 void TransliteratorTest::TestFunction() {
 3721     // Careful with spacing and ';' here:  Phrase this exactly
 3722     // as toRules() is going to return it.  If toRules() changes
 3723     // with regard to spacing or ';', then adjust this string.
 3724     UnicodeString rule =
 3725         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
 3726     
 3727     UParseError pe;
 3728     UErrorCode ec = U_ZERO_ERROR;
 3729     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
 3730     if (t == NULL) {
 3731         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
 3732         return;
 3733     }
 3734     
 3735     UnicodeString r;
 3736     t->toRules(r, TRUE);
 3737     if (r == rule) {
 3738         logln((UnicodeString)"OK: toRules() => " + r);
 3739     } else {
 3740         errln((UnicodeString)"FAIL: toRules() => " + r +
 3741               ", expected " + rule);
 3742     }
 3743     
 3744     expect(*t, "The Quick Brown Fox",
 3745            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
 3746 
 3747     delete t;
 3748 }
 3749 
 3750 void TransliteratorTest::TestInvalidBackRef(void) {
 3751     UnicodeString rule =  ". > $1;";
 3752     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
 3753     UParseError pe;
 3754     UErrorCode ec = U_ZERO_ERROR;
 3755     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
 3756     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
 3757 
 3758     if (t != NULL) {
 3759         errln("FAIL: createFromRules should have returned NULL");
 3760         delete t;
 3761     }
 3762 
 3763     if (t2 != NULL) {
 3764         errln("FAIL: createFromRules should have returned NULL");
 3765         delete t2;
 3766     }
 3767 
 3768     if (U_SUCCESS(ec)) {
 3769         errln("FAIL: Ok: . > $1; => no error");
 3770     } else {
 3771         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
 3772     }
 3773 }
 3774 
 3775 void TransliteratorTest::TestMulticharStringSet() {
 3776     // Basic testing
 3777     const char* rule =
 3778         "       [{aa}]       > x;"
 3779         "         a          > y;"
 3780         "       [b{bc}]      > z;"
 3781         "[{gd}] { e          > q;"
 3782         "         e } [{fg}] > r;" ;
 3783         
 3784     UParseError pe;
 3785     UErrorCode ec = U_ZERO_ERROR;
 3786     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
 3787     if (t == NULL || U_FAILURE(ec)) {
 3788         delete t;
 3789         errln("FAIL: createFromRules failed");
 3790         return;
 3791     }
 3792         
 3793     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
 3794            "y x yz z d gd de gdq gdqfg ddrfg");
 3795     delete t;
 3796 
 3797     // Overlapped string test.  Make sure that when multiple
 3798     // strings can match that the longest one is matched.
 3799     rule =
 3800         "    [a {ab} {abc}]    > x;"
 3801         "           b          > y;"
 3802         "           c          > z;"
 3803         " q [t {st} {rst}] { e > p;" ;
 3804         
 3805     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
 3806     if (t == NULL || U_FAILURE(ec)) {
 3807         delete t;
 3808         errln("FAIL: createFromRules failed");
 3809         return;
 3810     }
 3811         
 3812     expect(*t, "a ab abc qte qste qrste",
 3813            "x x x qtp qstp qrstp");
 3814     delete t;
 3815 }
 3816 
 3817 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
 3818 // BEGIN TestUserFunction support factory
 3819 
 3820 Transliterator* _TUFF[4];
 3821 UnicodeString* _TUFID[4];
 3822 
 3823 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
 3824                                    Transliterator::Token context) {
 3825     return _TUFF[context.integer]->clone();
 3826 }
 3827 
 3828 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
 3829     _TUFF[n] = t;
 3830     _TUFID[n] = new UnicodeString(ID);
 3831     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
 3832 }
 3833 
 3834 static void _TUFUnreg(int32_t n) {
 3835     if (_TUFF[n] != NULL) {
 3836         Transliterator::unregister(*_TUFID[n]);
 3837         delete _TUFF[n];
 3838         delete _TUFID[n];
 3839     }
 3840 }
 3841 
 3842 // END TestUserFunction support factory
 3843 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 3844 
 3845 /**
 3846  * Test that user-registered transliterators can be used under function
 3847  * syntax.
 3848  */
 3849 void TransliteratorTest::TestUserFunction() {
 3850  
 3851     Transliterator* t;
 3852     UParseError pe;
 3853     UErrorCode ec = U_ZERO_ERROR;
 3854 
 3855     // Setup our factory
 3856     int32_t i;
 3857     for (i=0; i<4; ++i) {
 3858         _TUFF[i] = NULL;
 3859     }
 3860 
 3861     // There's no need to register inverses if we don't use them
 3862     t = Transliterator::createFromRules("gif",
 3863                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
 3864                                         UTRANS_FORWARD, pe, ec);
 3865     if (t == NULL || U_FAILURE(ec)) {
 3866         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
 3867         return;
 3868     }
 3869     _TUFReg("Any-gif", t, 0);
 3870 
 3871     t = Transliterator::createFromRules("RemoveCurly",
 3872                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
 3873                                         UTRANS_FORWARD, pe, ec);
 3874     if (t == NULL || U_FAILURE(ec)) {
 3875         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
 3876         goto FAIL;
 3877     }
 3878     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
 3879     _TUFReg("Any-RemoveCurly", t, 1);
 3880 
 3881     logln("Trying &hex");
 3882     t = Transliterator::createFromRules("hex2",
 3883                                         "(.) > &hex($1);",
 3884                                         UTRANS_FORWARD, pe, ec);
 3885     if (t == NULL || U_FAILURE(ec)) {
 3886         errln("FAIL: createFromRules");
 3887         goto FAIL;
 3888     }
 3889     logln("Registering");
 3890     _TUFReg("Any-hex2", t, 2);
 3891     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
 3892     if (t == NULL || U_FAILURE(ec)) {
 3893         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
 3894         goto FAIL;
 3895     }
 3896     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
 3897     delete t;
 3898 
 3899     logln("Trying &gif");
 3900     t = Transliterator::createFromRules("gif2",
 3901                                         "(.) > &Gif(&Hex2($1));",
 3902                                         UTRANS_FORWARD, pe, ec);
 3903     if (t == NULL || U_FAILURE(ec)) {
 3904         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
 3905         goto FAIL;
 3906     }
 3907     logln("Registering");
 3908     _TUFReg("Any-gif2", t, 3);
 3909     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
 3910     if (t == NULL || U_FAILURE(ec)) {
 3911         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
 3912         goto FAIL;
 3913     }
 3914     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
 3915            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
 3916     delete t;
 3917 
 3918     // Test that filters are allowed after &
 3919     t = Transliterator::createFromRules("test",
 3920                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
 3921                                         UTRANS_FORWARD, pe, ec);
 3922     if (t == NULL || U_FAILURE(ec)) {
 3923         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
 3924         goto FAIL;
 3925     }
 3926     expect(*t, "abc",
 3927            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
 3928     delete t;
 3929 
 3930  FAIL:
 3931     for (i=0; i<4; ++i) {
 3932         _TUFUnreg(i);
 3933     }
 3934 }
 3935 
 3936 /**
 3937  * Test the Any-X transliterators.
 3938  */
 3939 void TransliteratorTest::TestAnyX(void) {
 3940     UParseError parseError;
 3941     UErrorCode status = U_ZERO_ERROR;
 3942     Transliterator* anyLatin =
 3943         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
 3944     if (anyLatin==0) {
 3945         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
 3946         delete anyLatin;
 3947         return;
 3948     }
 3949 
 3950     expect(*anyLatin,
 3951            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
 3952            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
 3953 
 3954     delete anyLatin;
 3955 }
 3956 
 3957 /**
 3958  * Test Any-X transliterators with sample letters from all scripts.
 3959  */
 3960 void TransliteratorTest::TestAny(void) {
 3961     UErrorCode status = U_ZERO_ERROR;
 3962     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
 3963     //       function call parameters going on in this test.
 3964     UnicodeSet alphabetic("[:alphabetic:]", status);
 3965     if (U_FAILURE(status)) {
 3966         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
 3967         return;
 3968     }
 3969     alphabetic.freeze();
 3970 
 3971     UnicodeString testString;
 3972     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
 3973         const char *scriptName = uscript_getShortName((UScriptCode)i);
 3974         if (scriptName == NULL) {
 3975             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
 3976             return;
 3977         }
 3978 
 3979         UnicodeSet sample;
 3980         sample.applyPropertyAlias("script", scriptName, status);
 3981         if (U_FAILURE(status)) {
 3982             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
 3983             return;
 3984         }
 3985         sample.retainAll(alphabetic);
 3986         for (int32_t count=0; count<5; count++) {
 3987             UChar32 c = sample.charAt(count);
 3988             if (c == -1) {
 3989                 break;
 3990             }
 3991             testString.append(c);
 3992         }
 3993     }
 3994 
 3995     UParseError parseError;
 3996     Transliterator* anyLatin =
 3997         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
 3998     if (U_FAILURE(status)) {
 3999         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
 4000         return;
 4001     }
 4002 
 4003     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
 4004     anyLatin->transliterate(testString);
 4005     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
 4006     delete anyLatin;
 4007 }
 4008 
 4009 
 4010 /**
 4011  * Test the source and target set API.  These are only implemented
 4012  * for RBT and CompoundTransliterator at this time.
 4013  */
 4014 void TransliteratorTest::TestSourceTargetSet() {
 4015     UErrorCode ec = U_ZERO_ERROR;
 4016 
 4017     // Rules
 4018     const char* r =
 4019         "a > b; "
 4020         "r [x{lu}] > q;";
 4021 
 4022     // Expected source
 4023     UnicodeSet expSrc("[arx{lu}]", ec);
 4024 
 4025     // Expected target
 4026     UnicodeSet expTrg("[bq]", ec);
 4027 
 4028     UParseError pe;
 4029     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
 4030 
 4031     if (U_FAILURE(ec)) {
 4032         delete t;
 4033         errln("FAIL: Couldn't set up test");
 4034         return;
 4035     }
 4036 
 4037     UnicodeSet src; t->getSourceSet(src);
 4038     UnicodeSet trg; t->getTargetSet(trg);
 4039 
 4040     if (src == expSrc && trg == expTrg) {
 4041         UnicodeString a, b;
 4042         logln((UnicodeString)"Ok: " +
 4043               r + " => source = " + src.toPattern(a, TRUE) +
 4044               ", target = " + trg.toPattern(b, TRUE));
 4045     } else {
 4046         UnicodeString a, b, c, d;
 4047         errln((UnicodeString)"FAIL: " +
 4048               r + " => source = " + src.toPattern(a, TRUE) +
 4049               ", expected " + expSrc.toPattern(b, TRUE) +
 4050               "; target = " + trg.toPattern(c, TRUE) +
 4051               ", expected " + expTrg.toPattern(d, TRUE));
 4052     }
 4053 
 4054     delete t;
 4055 }
 4056 
 4057 /**
 4058  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
 4059  */
 4060 void TransliteratorTest::TestPatternWhiteSpace() {
 4061     // Rules
 4062     const char* r = "a > \\u200E b;";
 4063     
 4064     UErrorCode ec = U_ZERO_ERROR;
 4065     UParseError pe;
 4066     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
 4067     
 4068     if (U_FAILURE(ec)) {
 4069         errln("FAIL: Couldn't set up test");
 4070     } else {
 4071         expect(*t, "a", "b");
 4072     }
 4073     delete t;
 4074     
 4075     // UnicodeSet
 4076     ec = U_ZERO_ERROR;
 4077     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
 4078     
 4079     if (U_FAILURE(ec)) {
 4080         errln("FAIL: Couldn't set up test");
 4081     } else {
 4082         if (set.contains(0x200E)) {
 4083             errln("FAIL: U+200E not being ignored by UnicodeSet");
 4084         }
 4085     }
 4086 }
 4087 //======================================================================
 4088 // this method is in TestUScript.java
 4089 //======================================================================
 4090 void TransliteratorTest::TestAllCodepoints(){
 4091     UScriptCode code= USCRIPT_INVALID_CODE;
 4092     char id[256]={'\0'};
 4093     char abbr[256]={'\0'};
 4094     char newId[256]={'\0'};
 4095     char newAbbrId[256]={'\0'};
 4096     char oldId[256]={'\0'};
 4097     char oldAbbrId[256]={'\0'};
 4098 
 4099     UErrorCode status =U_ZERO_ERROR;
 4100     UParseError pe;
 4101     
 4102     for(uint32_t i = 0; i<=0x10ffff; i++){
 4103         code =  uscript_getScript(i,&status);
 4104         if(code == USCRIPT_INVALID_CODE){
 4105             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
 4106         }
 4107         const char* myId = uscript_getName(code);
 4108         if(!myId) {
 4109           dataerrln("Valid script code returned NULL name. Check your data!");
 4110           return;
 4111         }
 4112         uprv_strcpy(id,myId);
 4113         uprv_strcpy(abbr,uscript_getShortName(code));
 4114 
 4115         uprv_strcpy(newId,"[:");
 4116         uprv_strcat(newId,id);
 4117         uprv_strcat(newId,":];NFD");
 4118 
 4119         uprv_strcpy(newAbbrId,"[:");
 4120         uprv_strcat(newAbbrId,abbr);
 4121         uprv_strcat(newAbbrId,":];NFD");
 4122 
 4123         if(uprv_strcmp(newId,oldId)!=0){
 4124             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
 4125             if(t==NULL || U_FAILURE(status)){
 4126                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
 4127             }
 4128             delete t;
 4129         }
 4130         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
 4131             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
 4132             if(t==NULL || U_FAILURE(status)){
 4133                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
 4134             }
 4135             delete t;
 4136         }
 4137         uprv_strcpy(oldId,newId);
 4138         uprv_strcpy(oldAbbrId, newAbbrId);
 4139 
 4140     }
 4141 
 4142 } 
 4143 
 4144 #define TEST_TRANSLIT_ID(id, cls) UPRV_BLOCK_MACRO_BEGIN { \
 4145   UErrorCode ec = U_ZERO_ERROR; \
 4146   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
 4147   if (U_FAILURE(ec)) { \
 4148     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
 4149   } else { \
 4150     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
 4151       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
 4152     } \
 4153     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
 4154   } \
 4155   delete t; \
 4156 } UPRV_BLOCK_MACRO_END
 4157 
 4158 #define TEST_TRANSLIT_RULE(rule, cls) UPRV_BLOCK_MACRO_BEGIN { \
 4159   UErrorCode ec = U_ZERO_ERROR; \
 4160   UParseError pe; \
 4161   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
 4162   if (U_FAILURE(ec)) { \
 4163     errln("FAIL: Couldn't create " rule); \
 4164   } else { \
 4165     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
 4166       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
 4167     } \
 4168     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
 4169   } \
 4170   delete t; \
 4171 } UPRV_BLOCK_MACRO_END
 4172 
 4173 void TransliteratorTest::TestBoilerplate() {
 4174     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
 4175     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
 4176     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
 4177     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
 4178     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
 4179     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
 4180     TEST_TRANSLIT_ID("Null", NullTransliterator);
 4181     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
 4182     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
 4183     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
 4184     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
 4185     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
 4186     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
 4187 }
 4188 
 4189 void TransliteratorTest::TestAlternateSyntax() {
 4190     // U+2206 == &
 4191     // U+2190 == <
 4192     // U+2192 == >
 4193     // U+2194 == <>
 4194     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
 4195            "abc",
 4196            "xbz");
 4197     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
 4198            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
 4199            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
 4200 }
 4201 
 4202 static const char* BEGIN_END_RULES[] = {
 4203     // [0]
 4204     "abc > xy;"
 4205     "aba > z;",
 4206 
 4207     // [1]
 4208 /*
 4209     "::BEGIN;"
 4210     "abc > xy;"
 4211     "::END;"
 4212     "::BEGIN;"
 4213     "aba > z;"
 4214     "::END;",
 4215 */
 4216     "", // test case commented out below, this is here to keep from messing up the indexes
 4217 
 4218     // [2]
 4219 /*
 4220     "abc > xy;"
 4221     "::BEGIN;"
 4222     "aba > z;"
 4223     "::END;",
 4224 */
 4225     "", // test case commented out below, this is here to keep from messing up the indexes
 4226 
 4227     // [3]
 4228 /*
 4229     "::BEGIN;"
 4230     "abc > xy;"
 4231     "::END;"
 4232     "aba > z;",
 4233 */
 4234     "", // test case commented out below, this is here to keep from messing up the indexes
 4235 
 4236     // [4]
 4237     "abc > xy;"
 4238     "::Null;"
 4239     "aba > z;",
 4240 
 4241     // [5]
 4242     "::Upper;"
 4243     "ABC > xy;"
 4244     "AB > x;"
 4245     "C > z;"
 4246     "::Upper;"
 4247     "XYZ > p;"
 4248     "XY > q;"
 4249     "Z > r;"
 4250     "::Upper;",
 4251 
 4252     // [6]
 4253     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4254     "$delim = [\\-$ws];"
 4255     "$ws $delim* > ' ';"
 4256     "'-' $delim* > '-';",
 4257 
 4258     // [7]
 4259     "::Null;"
 4260     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4261     "$delim = [\\-$ws];"
 4262     "$ws $delim* > ' ';"
 4263     "'-' $delim* > '-';",
 4264 
 4265     // [8]
 4266     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4267     "$delim = [\\-$ws];"
 4268     "$ws $delim* > ' ';"
 4269     "'-' $delim* > '-';"
 4270     "::Null;",
 4271 
 4272     // [9]
 4273     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4274     "$delim = [\\-$ws];"
 4275     "::Null;"
 4276     "$ws $delim* > ' ';"
 4277     "'-' $delim* > '-';",
 4278 
 4279     // [10]
 4280 /*
 4281     "::BEGIN;"
 4282     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4283     "$delim = [\\-$ws];"
 4284     "::END;"
 4285     "$ws $delim* > ' ';"
 4286     "'-' $delim* > '-';",
 4287 */
 4288     "", // test case commented out below, this is here to keep from messing up the indexes
 4289 
 4290     // [11]
 4291 /*
 4292     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4293     "$delim = [\\-$ws];"
 4294     "::BEGIN;"
 4295     "$ws $delim* > ' ';"
 4296     "'-' $delim* > '-';"
 4297     "::END;",
 4298 */
 4299     "", // test case commented out below, this is here to keep from messing up the indexes
 4300 
 4301     // [12]
 4302 /*
 4303     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4304     "$delim = [\\-$ws];"
 4305     "$ab = [ab];"
 4306     "::BEGIN;"
 4307     "$ws $delim* > ' ';"
 4308     "'-' $delim* > '-';"
 4309     "::END;"
 4310     "::BEGIN;"
 4311     "$ab { ' ' } $ab > '-';"
 4312     "c { ' ' > ;"
 4313     "::END;"
 4314     "::BEGIN;"
 4315     "'a-a' > a\\%|a;"
 4316     "::END;",
 4317 */
 4318     "", // test case commented out below, this is here to keep from messing up the indexes
 4319 
 4320     // [13]
 4321     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
 4322     "$delim = [\\-$ws];"
 4323     "$ab = [ab];"
 4324     "::Null;"
 4325     "$ws $delim* > ' ';"
 4326     "'-' $delim* > '-';"
 4327     "::Null;"
 4328     "$ab { ' ' } $ab > '-';"
 4329     "c { ' ' > ;"
 4330     "::Null;"
 4331     "'a-a' > a\\%|a;",
 4332 
 4333     // [14]
 4334 /*
 4335     "::[abc];"
 4336     "::BEGIN;"
 4337     "abc > xy;"
 4338     "::END;"
 4339     "::BEGIN;"
 4340     "aba > yz;"
 4341     "::END;"
 4342     "::Upper;",
 4343 */
 4344     "", // test case commented out below, this is here to keep from messing up the indexes
 4345 
 4346     // [15]
 4347     "::[abc];"
 4348     "abc > xy;"
 4349     "::Null;"
 4350     "aba > yz;"
 4351     "::Upper;",
 4352 
 4353     // [16]
 4354 /*
 4355     "::[abc];"
 4356     "::BEGIN;"
 4357     "abc <> xy;"
 4358     "::END;"
 4359     "::BEGIN;"
 4360     "aba <> yz;"
 4361     "::END;"
 4362     "::Upper(Lower);"
 4363     "::([XYZ]);"
 4364 */
 4365     "", // test case commented out below, this is here to keep from messing up the indexes
 4366 
 4367     // [17]
 4368     "::[abc];"
 4369     "abc <> xy;"
 4370     "::Null;"
 4371     "aba <> yz;"
 4372     "::Upper(Lower);"
 4373     "::([XYZ]);"
 4374 };
 4375 
 4376 /*
 4377 (This entire test is commented out below and will need some heavy revision when we re-add
 4378 the ::BEGIN/::END stuff)
 4379 static const char* BOGUS_BEGIN_END_RULES[] = {
 4380     // [7]
 4381     "::BEGIN;"
 4382     "abc > xy;"
 4383     "::BEGIN;"
 4384     "aba > z;"
 4385     "::END;"
 4386     "::END;",
 4387 
 4388     // [8]
 4389     "abc > xy;"
 4390     " aba > z;"
 4391     "::END;",
 4392 
 4393     // [9]
 4394     "::BEGIN;"
 4395     "::Upper;"
 4396     "::END;"
 4397 };
 4398 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
 4399 */
 4400 
 4401 static const char* BEGIN_END_TEST_CASES[] = {
 4402     // rules             input                   expected output
 4403     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
 4404 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
 4405 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
 4406 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
 4407     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
 4408     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
 4409 
 4410     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
 4411     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
 4412     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
 4413     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
 4414 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
 4415 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
 4416 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
 4417 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
 4418 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
 4419     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
 4420     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
 4421     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
 4422 
 4423 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
 4424     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
 4425 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
 4426     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
 4427 };
 4428 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
 4429 
 4430 void TransliteratorTest::TestBeginEnd() {
 4431     // run through the list of test cases above
 4432     int32_t i = 0;
 4433     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
 4434         expect((UnicodeString)"Test case #" + (i / 3),
 4435                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
 4436                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
 4437                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
 4438     }
 4439 
 4440     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
 4441     UParseError parseError;
 4442     UErrorCode status = U_ZERO_ERROR;
 4443     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
 4444             UTRANS_REVERSE, parseError, status);
 4445     if (reversed == 0 || U_FAILURE(status)) {
 4446         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
 4447     } else {
 4448         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
 4449     }
 4450     delete reversed;
 4451 
 4452     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
 4453     // that all of them cause errors
 4454 /*
 4455 (commented out until we have the real ::BEGIN/::END stuff in place
 4456     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
 4457         UParseError parseError;
 4458         UErrorCode status = U_ZERO_ERROR;
 4459         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
 4460                 UTRANS_FORWARD, parseError, status);
 4461         if (!U_FAILURE(status)) {
 4462             delete t;
 4463             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
 4464         }
 4465     }
 4466 */
 4467 }
 4468 
 4469 void TransliteratorTest::TestBeginEndToRules() {
 4470     // run through the same list of test cases we used above, but this time, instead of just
 4471     // instantiating a Transliterator from the rules and running the test against it, we instantiate
 4472     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
 4473     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
 4474     // to (i.e., does the same thing as) the original rule set
 4475     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
 4476         UParseError parseError;
 4477         UErrorCode status = U_ZERO_ERROR;
 4478         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
 4479                 UTRANS_FORWARD, parseError, status);
 4480         if (U_FAILURE(status)) {
 4481             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
 4482         } else {
 4483             UnicodeString rules;
 4484             t->toRules(rules, TRUE);
 4485             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
 4486                     UTRANS_FORWARD, parseError, status);
 4487             if (U_FAILURE(status)) {
 4488                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
 4489                         parseError, status);
 4490                 delete t;
 4491             } else {
 4492                 expect(*t2,
 4493                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
 4494                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
 4495                 delete t;
 4496                 delete t2;
 4497             }
 4498         }
 4499     }
 4500 
 4501     // do the same thing for the reversible test case
 4502     UParseError parseError;
 4503     UErrorCode status = U_ZERO_ERROR;
 4504     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
 4505             UTRANS_REVERSE, parseError, status);
 4506     if (U_FAILURE(status)) {
 4507         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
 4508     } else {
 4509         UnicodeString rules;
 4510         reversed->toRules(rules, FALSE);
 4511         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
 4512                 parseError, status);
 4513         if (U_FAILURE(status)) {
 4514             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
 4515                     parseError, status);
 4516             delete reversed;
 4517         } else {
 4518             expect(*reversed2,
 4519                    UnicodeString("xy XY XYZ yz YZ"),
 4520                    UnicodeString("xy abc xaba yz aba"));
 4521             delete reversed;
 4522             delete reversed2;
 4523         }
 4524     }
 4525 }
 4526 
 4527 void TransliteratorTest::TestRegisterAlias() {
 4528     UnicodeString longID("Lower;[aeiou]Upper");
 4529     UnicodeString shortID("Any-CapVowels");
 4530     UnicodeString reallyShortID("CapVowels");
 4531 
 4532     Transliterator::registerAlias(shortID, longID);
 4533 
 4534     UErrorCode err = U_ZERO_ERROR;
 4535     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
 4536     if (U_FAILURE(err)) {
 4537         errln("Failed to instantiate transliterator with long ID");
 4538         Transliterator::unregister(shortID);
 4539         return;
 4540     }
 4541     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
 4542     if (U_FAILURE(err)) {
 4543         errln("Failed to instantiate transliterator with short ID");
 4544         delete t1;
 4545         Transliterator::unregister(shortID);
 4546         return;
 4547     }
 4548 
 4549     if (t1->getID() != longID)
 4550         errln("Transliterator instantiated with long ID doesn't have long ID");
 4551     if (t2->getID() != reallyShortID)
 4552         errln("Transliterator instantiated with short ID doesn't have short ID");
 4553 
 4554     UnicodeString rules1;
 4555     UnicodeString rules2;
 4556 
 4557     t1->toRules(rules1, TRUE);
 4558     t2->toRules(rules2, TRUE);
 4559     if (rules1 != rules2)
 4560         errln("Alias transliterators aren't the same");
 4561 
 4562     delete t1;
 4563     delete t2;
 4564     Transliterator::unregister(shortID);
 4565 
 4566     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
 4567     if (U_SUCCESS(err)) {
 4568         errln("Instantiation with short ID succeeded after short ID was unregistered");
 4569         delete t1;
 4570     }
 4571 
 4572     // try the same thing again, but this time with something other than
 4573     // an instance of CompoundTransliterator
 4574     UnicodeString realID("Latin-Greek");
 4575     UnicodeString fakeID("Latin-dlgkjdflkjdl");
 4576     Transliterator::registerAlias(fakeID, realID);
 4577 
 4578     err = U_ZERO_ERROR;
 4579     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
 4580     if (U_FAILURE(err)) {
 4581         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
 4582         Transliterator::unregister(realID);
 4583         return;
 4584     }
 4585     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
 4586     if (U_FAILURE(err)) {
 4587         errln("Failed to instantiate transliterator with fake ID");
 4588         delete t1;
 4589         Transliterator::unregister(realID);
 4590         return;
 4591     }
 4592 
 4593     t1->toRules(rules1, TRUE);
 4594     t2->toRules(rules2, TRUE);
 4595     if (rules1 != rules2)
 4596         errln("Alias transliterators aren't the same");
 4597 
 4598     delete t1;
 4599     delete t2;
 4600     Transliterator::unregister(fakeID);
 4601 }
 4602 
 4603 void TransliteratorTest::TestRuleStripping() {
 4604     /*