"Fossies" - the Fresh Open Source Software Archive

Member "ucsdet_8h_source.html" (3 Oct 2019, 32779 Bytes) of package /linux/misc/icu4c-65_1-docs.zip:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the project site itself.

ICU 65.1  65.1
ucsdet.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  * Copyright (C) 2005-2013, International Business Machines
6  * Corporation and others. All Rights Reserved.
7  **********************************************************************
8  * file name: ucsdet.h
9  * encoding: UTF-8
10  * indentation:4
11  *
12  * created on: 2005Aug04
13  * created by: Andy Heninger
14  *
15  * ICU Character Set Detection, API for C
16  *
17  * Draft version 18 Oct 2005
18  *
19  */
20 
21 #ifndef __UCSDET_H
22 #define __UCSDET_H
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_CONVERSION
27 
28 #include "unicode/localpointer.h"
29 #include "unicode/uenum.h"
30 
55 struct UCharsetDetector;
61 
62 struct UCharsetMatch;
69 
78 U_STABLE UCharsetDetector * U_EXPORT2
79 ucsdet_open(UErrorCode *status);
80 
90 U_STABLE void U_EXPORT2
92 
93 #if U_SHOW_CPLUSPLUS_API
94 
95 U_NAMESPACE_BEGIN
96 
107 
108 U_NAMESPACE_END
109 
110 #endif
111 
127 U_STABLE void U_EXPORT2
128 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
129 
130 
149 U_STABLE void U_EXPORT2
150 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
151 
152 
178 U_STABLE const UCharsetMatch * U_EXPORT2
180 
181 
212 U_STABLE const UCharsetMatch ** U_EXPORT2
213 ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
214 
215 
216 
232 U_STABLE const char * U_EXPORT2
233 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
234 
258 U_STABLE int32_t U_EXPORT2
259 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
260 
290 U_STABLE const char * U_EXPORT2
291 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
292 
293 
316 U_STABLE int32_t U_EXPORT2
317 ucsdet_getUChars(const UCharsetMatch *ucsm,
318  UChar *buf, int32_t cap, UErrorCode *status);
319 
320 
321 
350 U_STABLE UEnumeration * U_EXPORT2
352 
364 U_STABLE UBool U_EXPORT2
366 
367 
379 U_STABLE UBool U_EXPORT2
381 
382 #ifndef U_HIDE_INTERNAL_API
383 
396 U_INTERNAL UEnumeration * U_EXPORT2
398 
412 U_INTERNAL void U_EXPORT2
413 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
414 #endif /* U_HIDE_INTERNAL_API */
415 
416 #endif
417 #endif /* __UCSDET_H */
418 
419 
void ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
Enable or disable individual charset encoding.
UBool ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
Enable filtering of input text.
struct UEnumeration UEnumeration
structure representing an enumeration object instance
Definition: uenum.h:43
const char * ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
Get the name of the charset represented by a UCharsetMatch.
#define U_INTERNAL
This is used to declare a function as an internal ICU C API.
Definition: umachine.h:119
void ucsdet_close(UCharsetDetector *ucsd)
Close a charset detector.
int32_t ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status)
Get the entire input text as a UChar string, placing it into a caller-supplied buffer.
UEnumeration * ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of detectable charsets - over the charsets that are enabled by the speci...
const UCharsetMatch * ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
Return the charset that best matches the supplied input data.
void ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
Set the input byte data whose charset is to detected.
void ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
Set the declared encoding for charset detection.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
struct UCharsetDetector UCharsetDetector
Structure representing a charset detector.
Definition: ucsdet.h:60
"Smart pointer" class, closes a UCharsetDetector via ucsdet_close().
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
const UCharsetMatch ** ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status)
Find all charset matches that appear to be consistent with the input, returning an array of results...
UEnumeration * ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
Get an iterator over the set of all detectable charsets - over the charsets that are known to the cha...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
UCharsetDetector * ucsdet_open(UErrorCode *status)
Open a charset detector.
C API: String Enumeration.
int32_t ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
Get a confidence number for the quality of the match of the byte data with the charset.
Basic definitions for ICU, for both C and C++ APIs.
UBool ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
Test whether input filtering is enabled for this charset detector.
const char * ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
Get the RFC 3066 code for the language of the input data.
struct UCharsetMatch UCharsetMatch
Opaque structure representing a match that was identified from a charset detection operation...
Definition: ucsdet.h:68
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261