w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

strsrch.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  * © 2016 and later: Unicode, Inc. and others.
3  * License & terms of use: http://www.unicode.org/copyright.html
4  *
5  *************************************************************************
6  *************************************************************************
7  * COPYRIGHT:
8  * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved.
9  *
10  *************************************************************************/
11 
12 /**
13  * This program demos string collation
14  */
15 
16 const char gHelpString[] =
17  "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
18  "-help Display this message.\n"
19  "-locale name ICU locale to use. Default is en_US\n"
20  "-rules rule Collation rules file (overrides locale)\n"
21  "-french French accent ordering\n"
22  "-norm Normalizing mode on\n"
23  "-shifted Shifted mode\n"
24  "-lower Lower case first\n"
25  "-upper Upper case first\n"
26  "-case Enable separate case level\n"
27  "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
28  "-source string Source string\n"
29  "-pattern string Pattern string to look for in source\n"
30  "-overlap Enable searching to be done on overlapping patterns\n"
31  "-canonical Enable searching to be done matching canonical equivalent patterns"
32  "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
33  "The format \\uXXXX is supported for the rules and comparison strings\n"
34  ;
35 
36 #include <stdio.h>
37 #include <string.h>
38 #include <stdlib.h>
39 
40 #include <unicode/utypes.h>
41 #include <unicode/ucol.h>
42 #include <unicode/usearch.h>
43 #include <unicode/ustring.h>
44 
45 /**
46  * Command line option variables
47  * These global variables are set according to the options specified
48  * on the command line by the user.
49  */
50 char * opt_locale = "en_US";
51 char * opt_rules = 0;
52 UBool opt_help = false;
53 UBool opt_norm = false;
54 UBool opt_french = false;
56 UBool opt_lower = false;
57 UBool opt_upper = false;
58 UBool opt_case = false;
61 int opt_level = 0;
62 char * opt_source = "International Components for Unicode";
63 char * opt_pattern = "Unicode";
66 UChar rules[100];
69 
70 /**
71  * Definitions for the command line options
72  */
73 struct OptSpec {
74  const char *name;
75  enum {FLAG, NUM, STRING} type;
76  void *pVar;
77 };
78 
80  {"-locale", OptSpec::STRING, &opt_locale},
81  {"-rules", OptSpec::STRING, &opt_rules},
82  {"-source", OptSpec::STRING, &opt_source},
83  {"-pattern", OptSpec::STRING, &opt_pattern},
84  {"-norm", OptSpec::FLAG, &opt_norm},
85  {"-french", OptSpec::FLAG, &opt_french},
86  {"-shifted", OptSpec::FLAG, &opt_shifted},
87  {"-lower", OptSpec::FLAG, &opt_lower},
88  {"-upper", OptSpec::FLAG, &opt_upper},
89  {"-case", OptSpec::FLAG, &opt_case},
90  {"-level", OptSpec::NUM, &opt_level},
91  {"-overlap", OptSpec::FLAG, &opt_overlap},
92  {"-canonical", OptSpec::FLAG, &opt_canonical},
93  {"-help", OptSpec::FLAG, &opt_help},
94  {"-?", OptSpec::FLAG, &opt_help},
95  {0, OptSpec::FLAG, 0}
96 };
97 
98 /**
99  * processOptions() Function to read the command line options.
100  */
101 UBool processOptions(int argc, const char **argv, OptSpec opts[])
102 {
103  for (int argNum = 1; argNum < argc; argNum ++) {
104  const char *pArgName = argv[argNum];
105  OptSpec *pOpt;
106  for (pOpt = opts; pOpt->name != 0; pOpt ++) {
107  if (strcmp(pOpt->name, pArgName) == 0) {
108  switch (pOpt->type) {
109  case OptSpec::FLAG:
110  *(UBool *)(pOpt->pVar) = true;
111  break;
112  case OptSpec::STRING:
113  argNum ++;
114  if (argNum >= argc) {
115  fprintf(stderr, "value expected for \"%s\" option.\n",
116  pOpt->name);
117  return false;
118  }
119  *(const char **)(pOpt->pVar) = argv[argNum];
120  break;
121  case OptSpec::NUM:
122  argNum ++;
123  if (argNum >= argc) {
124  fprintf(stderr, "value expected for \"%s\" option.\n",
125  pOpt->name);
126  return false;
127  }
128  char *endp;
129  int i = strtol(argv[argNum], &endp, 0);
130  if (endp == argv[argNum]) {
131  fprintf(stderr,
132  "integer value expected for \"%s\" option.\n",
133  pOpt->name);
134  return false;
135  }
136  *(int *)(pOpt->pVar) = i;
137  }
138  break;
139  }
140  }
141  if (pOpt->name == 0)
142  {
143  fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
144  return false;
145  }
146  }
147  return true;
148 }
149 
150 /**
151  * Creates a collator
152  */
154 {
155  // Set up an ICU collator
157 
158  if (opt_rules != 0) {
159  u_unescape(opt_rules, rules, 100);
161  NULL, &status);
162  }
163  else {
165  }
166  if (U_FAILURE(status)) {
167  fprintf(stderr, "Collator creation failed.: %d\n", status);
168  return false;
169  }
171  fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
172  opt_locale);
173  }
175  fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
176  opt_locale);
177  }
178  if (opt_norm) {
180  }
181  if (opt_french) {
183  }
184  if (opt_lower) {
186  &status);
187  }
188  if (opt_upper) {
190  &status);
191  }
192  if (opt_case) {
194  }
195  if (opt_shifted) {
197  &status);
198  }
199  if (opt_level != 0) {
200  switch (opt_level) {
201  case 1:
203  break;
204  case 2:
206  &status);
207  break;
208  case 3:
210  break;
211  case 4:
213  &status);
214  break;
215  case 5:
217  &status);
218  break;
219  default:
220  fprintf(stderr, "-level param must be between 1 and 5\n");
221  return false;
222  }
223  }
224  if (U_FAILURE(status)) {
225  fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
226  return false;
227  }
228  return true;
229 }
230 
231 /**
232  * Creates a string search
233  */
235 {
240  &status);
241  if (U_FAILURE(status)) {
242  return false;
243  }
244  if (static_cast<bool>(opt_overlap)) {
246  }
247  if (static_cast<bool>(opt_canonical)) {
249  &status);
250  }
251  if (U_FAILURE(status)) {
252  fprintf(stderr, "Error setting search attributes\n");
253  return false;
254  }
255  return true;
256 }
257 
259 {
262  if (offset == USEARCH_DONE) {
263  fprintf(stdout, "Pattern not found in source\n");
264  }
265  while (offset != USEARCH_DONE) {
266  fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
269  }
270  if (U_FAILURE(status)) {
271  fprintf(stderr, "Error in searching for pattern %d\n", status);
272  return false;
273  }
274  fprintf(stdout, "End of search\n");
275  return true;
276 }
277 
278 /**
279  * Main -- process command line, read in and pre-process the test file,
280  * call other functions to do the actual tests.
281  */
282 int main(int argc, const char** argv)
283 {
284  if (!static_cast<bool>(processOptions(argc, argv, opts)) || static_cast<bool>(opt_help)) {
286  return -1;
287  }
288 
289  if (!static_cast<bool>(processCollator())) {
290  fprintf(stderr, "Error creating collator\n");
291  return -1;
292  }
293 
294  if (!static_cast<bool>(processStringSearch())) {
295  fprintf(stderr, "Error creating string search\n");
296  return -1;
297  }
298 
299  fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
300  opt_source);
301 
302  findPattern();
305  return 0;
306 }
long __cdecl strtol(char const *_String, char **_EndPtr, int _Radix)
int strcmp()
Definition: coll.cpp:143
int printf()
unsigned char UChar
Definition: bzip2.c:163
#define NULL
Definition: ftobjs.h:61
small capitals from c petite p scientific i
Definition: afcover.h:80
signed int int32_t
Definition: stdint.h:77
#define fprintf
Definition: mendex.h:64
static int offset
Definition: ppmtogif.c:642
#define status
C API: Unicode string handling functions.
UBool opt_upper
Definition: strsrch.cpp:57
int main(int argc, const char **argv)
Definition: strsrch.cpp:282
char * opt_locale
Definition: strsrch.cpp:50
UBool opt_overlap
Definition: strsrch.cpp:59
UBool processStringSearch()
Definition: strsrch.cpp:234
UBool opt_lower
Definition: strsrch.cpp:56
UChar rules[100]
Definition: strsrch.cpp:66
UBool findPattern()
Definition: strsrch.cpp:258
char * opt_source
Definition: strsrch.cpp:62
char * opt_pattern
Definition: strsrch.cpp:63
char * opt_rules
Definition: strsrch.cpp:51
UBool opt_canonical
Definition: strsrch.cpp:60
UBool opt_norm
Definition: strsrch.cpp:53
UCollator * collator
Definition: strsrch.cpp:64
UBool opt_help
Definition: strsrch.cpp:52
const char gHelpString[]
Definition: strsrch.cpp:16
UBool processCollator()
Definition: strsrch.cpp:153
UBool opt_shifted
Definition: strsrch.cpp:55
int opt_level
Definition: strsrch.cpp:61
UBool processOptions(int argc, const char **argv, OptSpec opts[])
Definition: strsrch.cpp:101
OptSpec opts[]
Definition: strsrch.cpp:79
UBool opt_french
Definition: strsrch.cpp:54
UBool opt_case
Definition: strsrch.cpp:58
Definition: coll.cpp:65
enum OptSpec::@1250 type
@ FLAG
Definition: coll.cpp:67
@ NUM
Definition: coll.cpp:67
@ STRING
Definition: coll.cpp:67
const char * name
Definition: coll.cpp:66
void * pVar
Definition: coll.cpp:68
A string of characters.
Definition: t1part.c:49
Definition: search.h:25
Definition: sh.h:1345
C API: Collator.
struct UCollator UCollator
Definition: ucol.h:61
@ UCOL_CASE_FIRST
Definition: ucol.h:272
@ UCOL_STRENGTH
Definition: ucol.h:313
@ UCOL_CASE_LEVEL
Definition: ucol.h:282
@ UCOL_FRENCH_COLLATION
Definition: ucol.h:252
@ UCOL_ALTERNATE_HANDLING
Definition: ucol.h:263
@ UCOL_NORMALIZATION_MODE
Definition: ucol.h:292
@ UCOL_UPPER_FIRST
Definition: ucol.h:129
@ UCOL_TERTIARY
Definition: ucol.h:101
@ UCOL_IDENTICAL
Definition: ucol.h:108
@ UCOL_OFF
Definition: ucol.h:114
@ UCOL_QUATERNARY
Definition: ucol.h:106
@ UCOL_SHIFTED
Definition: ucol.h:121
@ UCOL_PRIMARY
Definition: ucol.h:97
@ UCOL_LOWER_FIRST
Definition: ucol.h:127
@ UCOL_SECONDARY
Definition: ucol.h:99
@ UCOL_ON
Definition: ucol.h:118
int8_t UBool
Definition: umachine.h:269
#define usearch_next
Definition: urename.h:1644
#define u_unescape
Definition: urename.h:406
#define usearch_setAttribute
Definition: urename.h:1652
#define ucol_setAttribute
Definition: urename.h:774
#define ucol_close
Definition: urename.h:721
#define usearch_getMatchedLength
Definition: urename.h:1633
#define usearch_close
Definition: urename.h:1627
#define usearch_openFromCollator
Definition: urename.h:1646
#define ucol_openRules
Definition: urename.h:766
#define ucol_open
Definition: urename.h:761
C API: StringSearch.
@ USEARCH_CANONICAL_MATCH
Definition: usearch.h:187
@ USEARCH_OVERLAP
Definition: usearch.h:171
#define USEARCH_DONE
Definition: usearch.h:150
@ USEARCH_ON
Definition: usearch.h:223
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Definition: utypes.h:431
@ U_ZERO_ERROR
Definition: utypes.h:465
@ U_USING_FALLBACK_WARNING
Definition: utypes.h:437
@ U_USING_DEFAULT_WARNING
Definition: utypes.h:441
#define U_FAILURE(x)
Definition: utypes.h:735
#define argv
Definition: xmain.c:270
#define argc
Definition: xmain.c:269